From 1a056533d37a8ea86534d4ef67ad277d14bb9374 Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Tue, 12 Sep 2006 16:00:00 +0200 Subject: [PATCH] Added `html_entities.js' A JavaScript version of the `HTML::Entities' Perl module. It may be used to decode HTML entities. --- html_entities.js | 301 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 html_entities.js diff --git a/html_entities.js b/html_entities.js new file mode 100644 index 0000000..13ea41c --- /dev/null +++ b/html_entities.js @@ -0,0 +1,301 @@ +entityList = new Array (); +charList = new Array (); + +/* Really important entities */ +entityList.push ("amp"); charList.push ("&"); /* This MUST be the first entity! */ +entityList.push ("gt"); charList.push (">"); +entityList.push ("lt"); charList.push ("<"); +entityList.push ("quot"); charList.push ('"'); +entityList.push ("apos"); charList.push ("'"); + +/* RFC 1866, section 14 */ +entityList.push ("nbsp"); charList.push (unescape ("%a0")); +entityList.push ("iexcl"); charList.push (unescape ("%a1")); +entityList.push ("cent"); charList.push (unescape ("%a2")); +entityList.push ("pound"); charList.push (unescape ("%a3")); +entityList.push ("curren"); charList.push (unescape ("%a4")); +entityList.push ("yen"); charList.push (unescape ("%a5")); +entityList.push ("brvbar"); charList.push (unescape ("%a6")); +entityList.push ("sect"); charList.push (unescape ("%a7")); +entityList.push ("uml"); charList.push (unescape ("%a8")); +entityList.push ("copy"); charList.push (unescape ("%a9")); +entityList.push ("ordf"); charList.push (unescape ("%aa")); +entityList.push ("laquo"); charList.push (unescape ("%ab")); +entityList.push ("not"); charList.push (unescape ("%ac")); +entityList.push ("shy"); charList.push (unescape ("%ad")); +entityList.push ("reg"); charList.push (unescape ("%ae")); +entityList.push ("macr"); charList.push (unescape ("%af")); +entityList.push ("deg"); charList.push (unescape ("%b0")); +entityList.push ("plusmn"); charList.push (unescape ("%b1")); +entityList.push ("sup2"); charList.push (unescape ("%b2")); +entityList.push ("sup3"); charList.push (unescape ("%b3")); +entityList.push ("acute"); charList.push (unescape ("%b4")); +entityList.push ("micro"); charList.push (unescape ("%b5")); +entityList.push ("para"); charList.push (unescape ("%b6")); +entityList.push ("middot"); charList.push (unescape ("%b7")); +entityList.push ("cedil"); charList.push (unescape ("%b8")); +entityList.push ("sup1"); charList.push (unescape ("%b9")); +entityList.push ("ordm"); charList.push (unescape ("%ba")); +entityList.push ("raquo"); charList.push (unescape ("%bb")); +entityList.push ("frac14"); charList.push (unescape ("%bc")); +entityList.push ("frac12"); charList.push (unescape ("%bd")); +entityList.push ("frac34"); charList.push (unescape ("%be")); +entityList.push ("iquest"); charList.push (unescape ("%bf")); +entityList.push ("Agrave"); charList.push (unescape ("%c0")); +entityList.push ("Aacute"); charList.push (unescape ("%c1")); +entityList.push ("Acirc"); charList.push (unescape ("%c2")); +entityList.push ("Atilde"); charList.push (unescape ("%c3")); +entityList.push ("Auml"); charList.push (unescape ("%c4")); +entityList.push ("Aring"); charList.push (unescape ("%c5")); +entityList.push ("AElig"); charList.push (unescape ("%c6")); +entityList.push ("Ccedil"); charList.push (unescape ("%c7")); +entityList.push ("Egrave"); charList.push (unescape ("%c8")); +entityList.push ("Eacute"); charList.push (unescape ("%c9")); +entityList.push ("Ecirc"); charList.push (unescape ("%ca")); +entityList.push ("Euml"); charList.push (unescape ("%cb")); +entityList.push ("Igrave"); charList.push (unescape ("%cc")); +entityList.push ("Iacute"); charList.push (unescape ("%cd")); +entityList.push ("Icirc"); charList.push (unescape ("%ce")); +entityList.push ("Iuml"); charList.push (unescape ("%cf")); +entityList.push ("ETH"); charList.push (unescape ("%d0")); +entityList.push ("Ntilde"); charList.push (unescape ("%d1")); +entityList.push ("Ograve"); charList.push (unescape ("%d2")); +entityList.push ("Oacute"); charList.push (unescape ("%d3")); +entityList.push ("Ocirc"); charList.push (unescape ("%d4")); +entityList.push ("Otilde"); charList.push (unescape ("%d5")); +entityList.push ("Ouml"); charList.push (unescape ("%d6")); +entityList.push ("times"); charList.push (unescape ("%d7")); +entityList.push ("Oslash"); charList.push (unescape ("%d8")); +entityList.push ("Ugrave"); charList.push (unescape ("%d9")); +entityList.push ("Uacute"); charList.push (unescape ("%da")); +entityList.push ("Ucirc"); charList.push (unescape ("%db")); +entityList.push ("Uuml"); charList.push (unescape ("%dc")); +entityList.push ("Yacute"); charList.push (unescape ("%dd")); +entityList.push ("THORN"); charList.push (unescape ("%de")); +entityList.push ("szlig"); charList.push (unescape ("%df")); +entityList.push ("agrave"); charList.push (unescape ("%e0")); +entityList.push ("aacute"); charList.push (unescape ("%e1")); +entityList.push ("acirc"); charList.push (unescape ("%e2")); +entityList.push ("atilde"); charList.push (unescape ("%e3")); +entityList.push ("auml"); charList.push (unescape ("%e4")); +entityList.push ("aring"); charList.push (unescape ("%e5")); +entityList.push ("aelig"); charList.push (unescape ("%e6")); +entityList.push ("ccedil"); charList.push (unescape ("%e7")); +entityList.push ("egrave"); charList.push (unescape ("%e8")); +entityList.push ("eacute"); charList.push (unescape ("%e9")); +entityList.push ("ecirc"); charList.push (unescape ("%ea")); +entityList.push ("euml"); charList.push (unescape ("%eb")); +entityList.push ("igrave"); charList.push (unescape ("%ec")); +entityList.push ("iacute"); charList.push (unescape ("%ed")); +entityList.push ("icirc"); charList.push (unescape ("%ee")); +entityList.push ("iuml"); charList.push (unescape ("%ef")); +entityList.push ("eth"); charList.push (unescape ("%f0")); +entityList.push ("ntilde"); charList.push (unescape ("%f1")); +entityList.push ("ograve"); charList.push (unescape ("%f2")); +entityList.push ("oacute"); charList.push (unescape ("%f3")); +entityList.push ("ocirc"); charList.push (unescape ("%f4")); +entityList.push ("otilde"); charList.push (unescape ("%f5")); +entityList.push ("ouml"); charList.push (unescape ("%f6")); +entityList.push ("divide"); charList.push (unescape ("%f7")); +entityList.push ("oslash"); charList.push (unescape ("%f8")); +entityList.push ("ugrave"); charList.push (unescape ("%f9")); +entityList.push ("uacute"); charList.push (unescape ("%fa")); +entityList.push ("ucirc"); charList.push (unescape ("%fb")); +entityList.push ("uuml"); charList.push (unescape ("%fc")); +entityList.push ("yacute"); charList.push (unescape ("%fd")); +entityList.push ("thorn"); charList.push (unescape ("%fe")); +entityList.push ("yuml"); charList.push (unescape ("%ff")); + +/* UTF-8 characters */ +entityList.push ("OElig"); charList.push (unescape ("%u0152")); +entityList.push ("oelig"); charList.push (unescape ("%u0153")); +entityList.push ("Scaron"); charList.push (unescape ("%u0160")); +entityList.push ("scaron"); charList.push (unescape ("%u0161")); +entityList.push ("Yuml"); charList.push (unescape ("%u0178")); +entityList.push ("fnof"); charList.push (unescape ("%u0192")); +entityList.push ("circ"); charList.push (unescape ("%u02c6")); +entityList.push ("tilde"); charList.push (unescape ("%u02dc")); +entityList.push ("Alpha"); charList.push (unescape ("%u0391")); +entityList.push ("Beta"); charList.push (unescape ("%u0392")); +entityList.push ("Gamma"); charList.push (unescape ("%u0393")); +entityList.push ("Delta"); charList.push (unescape ("%u0394")); +entityList.push ("Epsilon"); charList.push (unescape ("%u0395")); +entityList.push ("Zeta"); charList.push (unescape ("%u0396")); +entityList.push ("Eta"); charList.push (unescape ("%u0397")); +entityList.push ("Theta"); charList.push (unescape ("%u0398")); +entityList.push ("Iota"); charList.push (unescape ("%u0399")); +entityList.push ("Kappa"); charList.push (unescape ("%u039a")); +entityList.push ("Lambda"); charList.push (unescape ("%u039b")); +entityList.push ("Mu"); charList.push (unescape ("%u039c")); +entityList.push ("Nu"); charList.push (unescape ("%u039d")); +entityList.push ("Xi"); charList.push (unescape ("%u039e")); +entityList.push ("Omicron"); charList.push (unescape ("%u039f")); +entityList.push ("Pi"); charList.push (unescape ("%u03a0")); +entityList.push ("Rho"); charList.push (unescape ("%u03a1")); +entityList.push ("Sigma"); charList.push (unescape ("%u03a3")); +entityList.push ("Tau"); charList.push (unescape ("%u03a4")); +entityList.push ("Upsilon"); charList.push (unescape ("%u03a5")); +entityList.push ("Phi"); charList.push (unescape ("%u03a6")); +entityList.push ("Chi"); charList.push (unescape ("%u03a7")); +entityList.push ("Psi"); charList.push (unescape ("%u03a8")); +entityList.push ("Omega"); charList.push (unescape ("%u03a9")); +entityList.push ("alpha"); charList.push (unescape ("%u03b1")); +entityList.push ("beta"); charList.push (unescape ("%u03b2")); +entityList.push ("gamma"); charList.push (unescape ("%u03b3")); +entityList.push ("delta"); charList.push (unescape ("%u03b4")); +entityList.push ("epsilon"); charList.push (unescape ("%u03b5")); +entityList.push ("zeta"); charList.push (unescape ("%u03b6")); +entityList.push ("eta"); charList.push (unescape ("%u03b7")); +entityList.push ("theta"); charList.push (unescape ("%u03b8")); +entityList.push ("iota"); charList.push (unescape ("%u03b9")); +entityList.push ("kappa"); charList.push (unescape ("%u03ba")); +entityList.push ("lambda"); charList.push (unescape ("%u03bb")); +entityList.push ("mu"); charList.push (unescape ("%u03bc")); +entityList.push ("nu"); charList.push (unescape ("%u03bd")); +entityList.push ("xi"); charList.push (unescape ("%u03be")); +entityList.push ("omicron"); charList.push (unescape ("%u03bf")); +entityList.push ("pi"); charList.push (unescape ("%u03c0")); +entityList.push ("rho"); charList.push (unescape ("%u03c1")); +entityList.push ("sigmaf"); charList.push (unescape ("%u03c2")); +entityList.push ("sigma"); charList.push (unescape ("%u03c3")); +entityList.push ("tau"); charList.push (unescape ("%u03c4")); +entityList.push ("upsilon"); charList.push (unescape ("%u03c5")); +entityList.push ("phi"); charList.push (unescape ("%u03c6")); +entityList.push ("chi"); charList.push (unescape ("%u03c7")); +entityList.push ("psi"); charList.push (unescape ("%u03c8")); +entityList.push ("omega"); charList.push (unescape ("%u03c9")); +entityList.push ("thetasym"); charList.push (unescape ("%u03d1")); +entityList.push ("upsih"); charList.push (unescape ("%u03d2")); +entityList.push ("piv"); charList.push (unescape ("%u03d6")); +entityList.push ("ensp"); charList.push (unescape ("%u2002")); +entityList.push ("emsp"); charList.push (unescape ("%u2003")); +entityList.push ("thinsp"); charList.push (unescape ("%u2009")); +entityList.push ("zwnj"); charList.push (unescape ("%u200c")); +entityList.push ("zwj"); charList.push (unescape ("%u200d")); +entityList.push ("lrm"); charList.push (unescape ("%u200e")); +entityList.push ("rlm"); charList.push (unescape ("%u200f")); +entityList.push ("ndash"); charList.push (unescape ("%u2013")); +entityList.push ("mdash"); charList.push (unescape ("%u2014")); +entityList.push ("lsquo"); charList.push (unescape ("%u2018")); +entityList.push ("rsquo"); charList.push (unescape ("%u2019")); +entityList.push ("sbquo"); charList.push (unescape ("%u201a")); +entityList.push ("ldquo"); charList.push (unescape ("%u201c")); +entityList.push ("rdquo"); charList.push (unescape ("%u201d")); +entityList.push ("bdquo"); charList.push (unescape ("%u201e")); +entityList.push ("dagger"); charList.push (unescape ("%u2020")); +entityList.push ("Dagger"); charList.push (unescape ("%u2021")); +entityList.push ("bull"); charList.push (unescape ("%u2022")); +entityList.push ("hellip"); charList.push (unescape ("%u2026")); +entityList.push ("permil"); charList.push (unescape ("%u2030")); +entityList.push ("prime"); charList.push (unescape ("%u2032")); +entityList.push ("Prime"); charList.push (unescape ("%u2033")); +entityList.push ("lsaquo"); charList.push (unescape ("%u2039")); +entityList.push ("rsaquo"); charList.push (unescape ("%u203a")); +entityList.push ("oline"); charList.push (unescape ("%u203e")); +entityList.push ("frasl"); charList.push (unescape ("%u2044")); +entityList.push ("euro"); charList.push (unescape ("%u20ac")); +entityList.push ("image"); charList.push (unescape ("%u2111")); +entityList.push ("weierp"); charList.push (unescape ("%u2118")); +entityList.push ("real"); charList.push (unescape ("%u211c")); +entityList.push ("trade"); charList.push (unescape ("%u2122")); +entityList.push ("alefsym"); charList.push (unescape ("%u2135")); +entityList.push ("larr"); charList.push (unescape ("%u2190")); +entityList.push ("uarr"); charList.push (unescape ("%u2191")); +entityList.push ("rarr"); charList.push (unescape ("%u2192")); +entityList.push ("darr"); charList.push (unescape ("%u2193")); +entityList.push ("harr"); charList.push (unescape ("%u2194")); +entityList.push ("crarr"); charList.push (unescape ("%u21b5")); +entityList.push ("lArr"); charList.push (unescape ("%u21d0")); +entityList.push ("uArr"); charList.push (unescape ("%u21d1")); +entityList.push ("rArr"); charList.push (unescape ("%u21d2")); +entityList.push ("dArr"); charList.push (unescape ("%u21d3")); +entityList.push ("hArr"); charList.push (unescape ("%u21d4")); +entityList.push ("forall"); charList.push (unescape ("%u2200")); +entityList.push ("part"); charList.push (unescape ("%u2202")); +entityList.push ("exist"); charList.push (unescape ("%u2203")); +entityList.push ("empty"); charList.push (unescape ("%u2205")); +entityList.push ("nabla"); charList.push (unescape ("%u2207")); +entityList.push ("isin"); charList.push (unescape ("%u2208")); +entityList.push ("notin"); charList.push (unescape ("%u2209")); +entityList.push ("ni"); charList.push (unescape ("%u220b")); +entityList.push ("prod"); charList.push (unescape ("%u220f")); +entityList.push ("sum"); charList.push (unescape ("%u2211")); +entityList.push ("minus"); charList.push (unescape ("%u2212")); +entityList.push ("lowast"); charList.push (unescape ("%u2217")); +entityList.push ("radic"); charList.push (unescape ("%u221a")); +entityList.push ("prop"); charList.push (unescape ("%u221d")); +entityList.push ("infin"); charList.push (unescape ("%u221e")); +entityList.push ("ang"); charList.push (unescape ("%u2220")); +entityList.push ("and"); charList.push (unescape ("%u2227")); +entityList.push ("or"); charList.push (unescape ("%u2228")); +entityList.push ("cap"); charList.push (unescape ("%u2229")); +entityList.push ("cup"); charList.push (unescape ("%u222a")); +entityList.push ("int"); charList.push (unescape ("%u222b")); +entityList.push ("there4"); charList.push (unescape ("%u2234")); +entityList.push ("sim"); charList.push (unescape ("%u223c")); +entityList.push ("cong"); charList.push (unescape ("%u2245")); +entityList.push ("asymp"); charList.push (unescape ("%u2248")); +entityList.push ("ne"); charList.push (unescape ("%u2260")); +entityList.push ("equiv"); charList.push (unescape ("%u2261")); +entityList.push ("le"); charList.push (unescape ("%u2264")); +entityList.push ("ge"); charList.push (unescape ("%u2265")); +entityList.push ("sub"); charList.push (unescape ("%u2282")); +entityList.push ("sup"); charList.push (unescape ("%u2283")); +entityList.push ("nsub"); charList.push (unescape ("%u2284")); +entityList.push ("sube"); charList.push (unescape ("%u2286")); +entityList.push ("supe"); charList.push (unescape ("%u2287")); +entityList.push ("oplus"); charList.push (unescape ("%u2295")); +entityList.push ("otimes"); charList.push (unescape ("%u2297")); +entityList.push ("perp"); charList.push (unescape ("%u22a5")); +entityList.push ("sdot"); charList.push (unescape ("%u22c5")); +entityList.push ("lceil"); charList.push (unescape ("%u2308")); +entityList.push ("rceil"); charList.push (unescape ("%u2309")); +entityList.push ("lfloor"); charList.push (unescape ("%u230a")); +entityList.push ("rfloor"); charList.push (unescape ("%u230b")); +entityList.push ("lang"); charList.push (unescape ("%u2329")); +entityList.push ("rang"); charList.push (unescape ("%u232a")); +entityList.push ("loz"); charList.push (unescape ("%u25ca")); +entityList.push ("spades"); charList.push (unescape ("%u2660")); +entityList.push ("clubs"); charList.push (unescape ("%u2663")); +entityList.push ("hearts"); charList.push (unescape ("%u2665")); +entityList.push ("diams"); charList.push (unescape ("%u2666")); + +function encode_entities (str) +{ + for (var i = 0; (i < entityList.length) && (i < charList.length); i++) + { + var entity = entityList[i]; + var mchar = charList[i]; + + var re = new RegExp (mchar); + while (str.match (re)) + { + str = str.replace (re, "&" + entity + ";"); + } + } + + return (str); +} + +function decode_entities (str) +{ + if (!str.match (/&[A-Za-z0-9]+;/)) + return (str); + + for (var i = 0; (i < entityList.length) && (i < charList.length); i++) + { + var entity = entityList[i]; + var mchar = charList[i]; + + var re = new RegExp ("&" + entity + ";"); + while (str.match (re)) + { + str = str.replace (re, mchar); + } + + if (!str.match (/&[A-Za-z0-9]+;/)) + break; + } + + return (str); +} -- 2.11.0