diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-21 09:38:35 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-21 09:38:35 +0000 |
commit | 6ff9f6e32f422de0ef7bc428037fe6ec896d0ead (patch) | |
tree | 16947b078598cc1307528c5a26907e16acc4adeb | |
parent | 323fbbf6a49a376683ddc1644e6dc33a29a068ac (diff) |
Utils.decode_html_entities: get rid of commented table and implement latin-1 (and then some) decoding
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 312 |
1 files changed, 64 insertions, 248 deletions
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index 11526370..e251178e 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -44,254 +44,70 @@ rescue LoadError 'gt' => '>', 'hellip' => '…', 'nbsp' => ' ', -=begin - # extras codes, for future use... - 'zwnj' => '‌', - 'aring' => '\xe5', - 'gt' => '>', - 'yen' => '\xa5', - 'ograve' => '\xf2', - 'Chi' => 'Χ', - 'bull' => '•', - 'Egrave' => '\xc8', - 'Ntilde' => '\xd1', - 'upsih' => 'ϒ', - 'Yacute' => '\xdd', - 'asymp' => '≈', - 'radic' => '√', - 'otimes' => '⊗', - 'nabla' => '∇', - 'aelig' => '\xe6', - 'oelig' => 'œ', - 'equiv' => '≡', - 'Psi' => 'Ψ', - 'auml' => '\xe4', - 'circ' => 'ˆ', - 'Acirc' => '\xc2', - 'Epsilon' => 'Ε', - 'Yuml' => 'Ÿ', - 'Eta' => 'Η', - 'Icirc' => '\xce', - 'Upsilon' => 'Υ', - 'ndash' => '–', - 'there4' => '∴', - 'Prime' => '″', - 'prime' => '′', - 'psi' => 'ψ', - 'Kappa' => 'Κ', - 'rsaquo' => '›', - 'Tau' => 'Τ', - 'darr' => '↓', - 'ocirc' => '\xf4', - 'lrm' => '‎', - 'zwj' => '‍', - 'cedil' => '\xb8', - 'Ecirc' => '\xca', - 'not' => '\xac', - 'AElig' => '\xc6', - 'oslash' => '\xf8', - 'acute' => '\xb4', - 'lceil' => '⌈', - 'shy' => '\xad', - 'rdquo' => '”', - 'ge' => '≥', - 'Igrave' => '\xcc', - 'Ograve' => '\xd2', - 'euro' => '€', - 'dArr' => '⇓', - 'sdot' => '⋅', - 'nbsp' => '\xa0', - 'lfloor' => '⌊', - 'lArr' => '⇐', - 'Auml' => '\xc4', - 'larr' => '←', - 'Atilde' => '\xc3', - 'Otilde' => '\xd5', - 'szlig' => '\xdf', - 'clubs' => '♣', - 'diams' => '♦', - 'agrave' => '\xe0', - 'Ocirc' => '\xd4', - 'Iota' => 'Ι', - 'Theta' => 'Θ', - 'Pi' => 'Π', - 'OElig' => 'Œ', - 'Scaron' => 'Š', - 'frac14' => '\xbc', - 'egrave' => '\xe8', - 'sub' => '⊂', - 'iexcl' => '\xa1', - 'frac12' => '\xbd', - 'sbquo' => '‚', - 'ordf' => '\xaa', - 'sum' => '∑', - 'prop' => '∝', - 'Uuml' => '\xdc', - 'ntilde' => '\xf1', - 'sup' => '⊃', - 'theta' => 'θ', - 'prod' => '∏', - 'nsub' => '⊄', - 'hArr' => '⇔', - 'rlm' => '‏', - 'THORN' => '\xde', - 'infin' => '∞', - 'yuml' => '\xff', - 'Mu' => 'Μ', - 'le' => '≤', - 'Eacute' => '\xc9', - 'thinsp' => ' ', - 'ecirc' => '\xea', - 'bdquo' => '„', - 'Sigma' => 'Σ', - 'fnof' => 'ƒ', - 'Aring' => '\xc5', - 'tilde' => '˜', - 'frac34' => '\xbe', - 'emsp' => ' ', - 'mdash' => '—', - 'uarr' => '↑', - 'permil' => '‰', - 'Ugrave' => '\xd9', - 'rarr' => '→', - 'Agrave' => '\xc0', - 'chi' => 'χ', - 'forall' => '∀', - 'eth' => '\xf0', - 'rceil' => '⌉', - 'iuml' => '\xef', - 'gamma' => 'γ', - 'lambda' => 'λ', - 'harr' => '↔', - 'rang' => '〉', - 'xi' => 'ξ', - 'dagger' => '†', - 'divide' => '\xf7', - 'Ouml' => '\xd6', - 'image' => 'ℑ', - 'alefsym' => 'ℵ', - 'igrave' => '\xec', - 'otilde' => '\xf5', - 'Oacute' => '\xd3', - 'sube' => '⊆', - 'alpha' => 'α', - 'frasl' => '⁄', - 'ETH' => '\xd0', - 'lowast' => '∗', - 'Nu' => 'Ν', - 'plusmn' => '\xb1', - 'Euml' => '\xcb', - 'real' => 'ℜ', - 'sup1' => '\xb9', - 'sup2' => '\xb2', - 'sup3' => '\xb3', - 'Oslash' => '\xd8', - 'Aacute' => '\xc1', - 'cent' => '\xa2', - 'oline' => '‾', - 'Beta' => 'Β', - 'perp' => '⊥', - 'Delta' => 'Δ', - 'loz' => '◊', - 'pi' => 'π', - 'iota' => 'ι', - 'empty' => '∅', - 'euml' => '\xeb', - 'brvbar' => '\xa6', - 'iacute' => '\xed', - 'para' => '\xb6', - 'micro' => '\xb5', - 'cup' => '∪', - 'weierp' => '℘', - 'uuml' => '\xfc', - 'part' => '∂', - 'icirc' => '\xee', - 'delta' => 'δ', - 'omicron' => 'ο', - 'upsilon' => 'υ', - 'Iuml' => '\xcf', - 'Lambda' => 'Λ', - 'Xi' => 'Ξ', - 'kappa' => 'κ', - 'ccedil' => '\xe7', - 'Ucirc' => '\xdb', - 'cap' => '∩', - 'mu' => 'μ', - 'scaron' => 'š', - 'lsquo' => '‘', - 'isin' => '∈', - 'Zeta' => 'Ζ', - 'supe' => '⊇', - 'deg' => '\xb0', - 'and' => '∧', - 'tau' => 'τ', - 'pound' => '\xa3', - 'hellip' => '…', - 'curren' => '\xa4', - 'int' => '∫', - 'ucirc' => '\xfb', - 'rfloor' => '⌋', - 'ensp' => ' ', - 'crarr' => '↵', - 'ugrave' => '\xf9', - 'notin' => '∉', - 'exist' => '∃', - 'uArr' => '⇑', - 'cong' => '≅', - 'Dagger' => '‡', - 'oplus' => '⊕', - 'times' => '\xd7', - 'atilde' => '\xe3', - 'piv' => 'ϖ', - 'ni' => '∋', - 'Phi' => 'Φ', - 'lsaquo' => '‹', - 'Uacute' => '\xda', - 'Omicron' => 'Ο', - 'ang' => '∠', - 'ne' => '≠', - 'iquest' => '\xbf', - 'eta' => 'η', - 'yacute' => '\xfd', - 'Rho' => 'Ρ', - 'uacute' => '\xfa', - 'Alpha' => 'Α', - 'zeta' => 'ζ', - 'Omega' => 'Ω', - 'nu' => 'ν', - 'sim' => '∼', - 'sect' => '\xa7', - 'phi' => 'φ', - 'sigmaf' => 'ς', - 'macr' => '\xaf', - 'minus' => '−', - 'Ccedil' => '\xc7', - 'ordm' => '\xba', - 'epsilon' => 'ε', - 'beta' => 'β', - 'rArr' => '⇒', - 'rho' => 'ρ', - 'aacute' => '\xe1', - 'eacute' => '\xe9', - 'omega' => 'ω', - 'middot' => '\xb7', - 'Gamma' => 'Γ', - 'Iacute' => '\xcd', - 'lang' => '〈', - 'spades' => '♠', - 'rsquo' => '’', - 'uml' => '\xa8', - 'thorn' => '\xfe', - 'ouml' => '\xf6', - 'thetasym' => 'ϑ', - 'or' => '∨', - 'raquo' => '\xbb', - 'acirc' => '\xe2', - 'ldquo' => '“', - 'hearts' => '♥', - 'sigma' => 'σ', - 'oacute' => '\xf3', -=end + 'Agrave' => 'À', + 'Aacute' => 'Á', + 'Acirc' => 'Â', + 'Atilde' => 'Ã', + 'Auml' => 'Ä', + 'Aring' => 'Å', + 'AElig' => 'Æ', + 'OElig' => 'Œ', + 'Ccedil' => 'Ç', + 'Egrave' => 'È', + 'Eacute' => 'É', + 'Ecirc' => 'Ê', + 'Euml' => 'Ë', + 'Igrave' => 'Ì', + 'Iacute' => 'Í', + 'Icirc' => 'Î', + 'Iuml' => 'Ï', + 'ETH' => 'Ð', + 'Ntilde' => 'Ñ', + 'Ograve' => 'Ò', + 'Oacute' => 'Ó', + 'Ocirc' => 'Ô', + 'Otilde' => 'Õ', + 'Ouml' => 'Ö', + 'Oslash' => 'Ø', + 'Ugrave' => 'Ù', + 'Uacute' => 'Ú', + 'Ucirc' => 'Û', + 'Uuml' => 'Ü', + 'Yacute' => 'Ý', + 'THORN' => 'Þ', + 'szlig' => 'ß', + 'agrave' => 'à', + 'aacute' => 'á', + 'acirc' => 'â', + 'atilde' => 'ã', + 'auml' => 'ä', + 'aring' => 'å', + 'aelig' => 'æ', + 'oelig' => 'œ', + 'ccedil' => 'ç', + 'egrave' => 'è', + 'eacute' => 'é', + 'ecirc' => 'ê', + 'euml' => 'ë', + 'igrave' => 'ì', + 'iacute' => 'í', + 'icirc' => 'î', + 'iuml' => 'ï', + 'eth' => 'ð', + 'ntilde' => 'ñ', + 'ograve' => 'ò', + 'oacute' => 'ó', + 'ocirc' => 'ô', + 'otilde' => 'õ', + 'ouml' => 'ö', + 'oslash' => 'ø', + 'ugrave' => 'ù', + 'uacute' => 'ú', + 'ucirc' => 'û', + 'uuml' => 'ü', + 'yacute' => 'ý', + 'thorn' => 'þ', + 'yuml' => 'ÿ' } end end |