summaryrefslogtreecommitdiff
path: root/lib/rbot
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2006-10-14 13:11:02 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2006-10-14 13:11:02 +0000
commit8678df0ab320408813a4e33c6d049157bd20a7f1 (patch)
treee51e9b2348e1505ee4624f03927d09559f320f62 /lib/rbot
parent6cbe66cdd40ef0bc0d25ba37c22bb7e08404a09f (diff)
Create Utils.decode_html_entities, inspired from equivalent code in the url plugin, can now be used by everybody. Can make use of the htmlentities ruby package, if found
Diffstat (limited to 'lib/rbot')
-rw-r--r--lib/rbot/utils.rb287
1 files changed, 287 insertions, 0 deletions
diff --git a/lib/rbot/utils.rb b/lib/rbot/utils.rb
index 75277c51..eabe5c17 100644
--- a/lib/rbot/utils.rb
+++ b/lib/rbot/utils.rb
@@ -1,5 +1,275 @@
require 'net/http'
require 'uri'
+begin
+ $we_have_html_entities_decoder = require 'htmlentities'
+rescue LoadError
+ $we_have_html_entities_decoder = false
+ UNESCAPE_TABLE = {
+ 'raquo' => '>>',
+ 'quot' => '"',
+ 'apos' => '\'',
+ 'micro' => 'u',
+ 'copy' => '(c)',
+ 'trade' => '(tm)',
+ 'reg' => '(R)',
+ '#174' => '(R)',
+ '#8220' => '"',
+ '#8221' => '"',
+ '#8212' => '--',
+ '#39' => '\'',
+=begin
+ # extras codes, for future use...
+ 'zwnj' => '&#8204;',
+ 'aring' => '\xe5',
+ 'gt' => '>',
+ 'yen' => '\xa5',
+ 'ograve' => '\xf2',
+ 'Chi' => '&#935;',
+ 'bull' => '&#8226;',
+ 'Egrave' => '\xc8',
+ 'Ntilde' => '\xd1',
+ 'upsih' => '&#978;',
+ 'Yacute' => '\xdd',
+ 'asymp' => '&#8776;',
+ 'radic' => '&#8730;',
+ 'otimes' => '&#8855;',
+ 'nabla' => '&#8711;',
+ 'aelig' => '\xe6',
+ 'oelig' => '&#339;',
+ 'equiv' => '&#8801;',
+ 'Psi' => '&#936;',
+ 'auml' => '\xe4',
+ 'circ' => '&#710;',
+ 'Acirc' => '\xc2',
+ 'Epsilon' => '&#917;',
+ 'Yuml' => '&#376;',
+ 'Eta' => '&#919;',
+ 'lt' => '<',
+ 'Icirc' => '\xce',
+ 'Upsilon' => '&#933;',
+ 'ndash' => '&#8211;',
+ 'there4' => '&#8756;',
+ 'Prime' => '&#8243;',
+ 'prime' => '&#8242;',
+ 'psi' => '&#968;',
+ 'Kappa' => '&#922;',
+ 'rsaquo' => '&#8250;',
+ 'Tau' => '&#932;',
+ 'darr' => '&#8595;',
+ 'ocirc' => '\xf4',
+ 'lrm' => '&#8206;',
+ 'zwj' => '&#8205;',
+ 'cedil' => '\xb8',
+ 'Ecirc' => '\xca',
+ 'not' => '\xac',
+ 'amp' => '&',
+ 'AElig' => '\xc6',
+ 'oslash' => '\xf8',
+ 'acute' => '\xb4',
+ 'lceil' => '&#8968;',
+ 'laquo' => '\xab',
+ 'shy' => '\xad',
+ 'rdquo' => '&#8221;',
+ 'ge' => '&#8805;',
+ 'Igrave' => '\xcc',
+ 'Ograve' => '\xd2',
+ 'euro' => '&#8364;',
+ 'dArr' => '&#8659;',
+ 'sdot' => '&#8901;',
+ 'nbsp' => '\xa0',
+ 'lfloor' => '&#8970;',
+ 'lArr' => '&#8656;',
+ 'Auml' => '\xc4',
+ 'larr' => '&#8592;',
+ 'Atilde' => '\xc3',
+ 'Otilde' => '\xd5',
+ 'szlig' => '\xdf',
+ 'clubs' => '&#9827;',
+ 'diams' => '&#9830;',
+ 'agrave' => '\xe0',
+ 'Ocirc' => '\xd4',
+ 'Iota' => '&#921;',
+ 'Theta' => '&#920;',
+ 'Pi' => '&#928;',
+ 'OElig' => '&#338;',
+ 'Scaron' => '&#352;',
+ 'frac14' => '\xbc',
+ 'egrave' => '\xe8',
+ 'sub' => '&#8834;',
+ 'iexcl' => '\xa1',
+ 'frac12' => '\xbd',
+ 'sbquo' => '&#8218;',
+ 'ordf' => '\xaa',
+ 'sum' => '&#8721;',
+ 'prop' => '&#8733;',
+ 'Uuml' => '\xdc',
+ 'ntilde' => '\xf1',
+ 'sup' => '&#8835;',
+ 'theta' => '&#952;',
+ 'prod' => '&#8719;',
+ 'nsub' => '&#8836;',
+ 'hArr' => '&#8660;',
+ 'rlm' => '&#8207;',
+ 'THORN' => '\xde',
+ 'infin' => '&#8734;',
+ 'yuml' => '\xff',
+ 'Mu' => '&#924;',
+ 'le' => '&#8804;',
+ 'Eacute' => '\xc9',
+ 'thinsp' => '&#8201;',
+ 'ecirc' => '\xea',
+ 'bdquo' => '&#8222;',
+ 'Sigma' => '&#931;',
+ 'fnof' => '&#402;',
+ 'Aring' => '\xc5',
+ 'tilde' => '&#732;',
+ 'frac34' => '\xbe',
+ 'emsp' => '&#8195;',
+ 'mdash' => '&#8212;',
+ 'uarr' => '&#8593;',
+ 'permil' => '&#8240;',
+ 'Ugrave' => '\xd9',
+ 'rarr' => '&#8594;',
+ 'Agrave' => '\xc0',
+ 'chi' => '&#967;',
+ 'forall' => '&#8704;',
+ 'eth' => '\xf0',
+ 'rceil' => '&#8969;',
+ 'iuml' => '\xef',
+ 'gamma' => '&#947;',
+ 'lambda' => '&#955;',
+ 'harr' => '&#8596;',
+ 'rang' => '&#9002;',
+ 'xi' => '&#958;',
+ 'dagger' => '&#8224;',
+ 'divide' => '\xf7',
+ 'Ouml' => '\xd6',
+ 'image' => '&#8465;',
+ 'alefsym' => '&#8501;',
+ 'igrave' => '\xec',
+ 'otilde' => '\xf5',
+ 'Oacute' => '\xd3',
+ 'sube' => '&#8838;',
+ 'alpha' => '&#945;',
+ 'frasl' => '&#8260;',
+ 'ETH' => '\xd0',
+ 'lowast' => '&#8727;',
+ 'Nu' => '&#925;',
+ 'plusmn' => '\xb1',
+ 'Euml' => '\xcb',
+ 'real' => '&#8476;',
+ 'sup1' => '\xb9',
+ 'sup2' => '\xb2',
+ 'sup3' => '\xb3',
+ 'Oslash' => '\xd8',
+ 'Aacute' => '\xc1',
+ 'cent' => '\xa2',
+ 'oline' => '&#8254;',
+ 'Beta' => '&#914;',
+ 'perp' => '&#8869;',
+ 'Delta' => '&#916;',
+ 'loz' => '&#9674;',
+ 'pi' => '&#960;',
+ 'iota' => '&#953;',
+ 'empty' => '&#8709;',
+ 'euml' => '\xeb',
+ 'brvbar' => '\xa6',
+ 'iacute' => '\xed',
+ 'para' => '\xb6',
+ 'micro' => '\xb5',
+ 'cup' => '&#8746;',
+ 'weierp' => '&#8472;',
+ 'uuml' => '\xfc',
+ 'part' => '&#8706;',
+ 'icirc' => '\xee',
+ 'delta' => '&#948;',
+ 'omicron' => '&#959;',
+ 'upsilon' => '&#965;',
+ 'Iuml' => '\xcf',
+ 'Lambda' => '&#923;',
+ 'Xi' => '&#926;',
+ 'kappa' => '&#954;',
+ 'ccedil' => '\xe7',
+ 'Ucirc' => '\xdb',
+ 'cap' => '&#8745;',
+ 'mu' => '&#956;',
+ 'scaron' => '&#353;',
+ 'lsquo' => '&#8216;',
+ 'isin' => '&#8712;',
+ 'Zeta' => '&#918;',
+ 'supe' => '&#8839;',
+ 'deg' => '\xb0',
+ 'and' => '&#8743;',
+ 'tau' => '&#964;',
+ 'pound' => '\xa3',
+ 'hellip' => '&#8230;',
+ 'curren' => '\xa4',
+ 'int' => '&#8747;',
+ 'ucirc' => '\xfb',
+ 'rfloor' => '&#8971;',
+ 'ensp' => '&#8194;',
+ 'crarr' => '&#8629;',
+ 'ugrave' => '\xf9',
+ 'notin' => '&#8713;',
+ 'exist' => '&#8707;',
+ 'uArr' => '&#8657;',
+ 'cong' => '&#8773;',
+ 'Dagger' => '&#8225;',
+ 'oplus' => '&#8853;',
+ 'times' => '\xd7',
+ 'atilde' => '\xe3',
+ 'piv' => '&#982;',
+ 'ni' => '&#8715;',
+ 'Phi' => '&#934;',
+ 'lsaquo' => '&#8249;',
+ 'Uacute' => '\xda',
+ 'Omicron' => '&#927;',
+ 'ang' => '&#8736;',
+ 'ne' => '&#8800;',
+ 'iquest' => '\xbf',
+ 'eta' => '&#951;',
+ 'yacute' => '\xfd',
+ 'Rho' => '&#929;',
+ 'uacute' => '\xfa',
+ 'Alpha' => '&#913;',
+ 'zeta' => '&#950;',
+ 'Omega' => '&#937;',
+ 'nu' => '&#957;',
+ 'sim' => '&#8764;',
+ 'sect' => '\xa7',
+ 'phi' => '&#966;',
+ 'sigmaf' => '&#962;',
+ 'macr' => '\xaf',
+ 'minus' => '&#8722;',
+ 'Ccedil' => '\xc7',
+ 'ordm' => '\xba',
+ 'epsilon' => '&#949;',
+ 'beta' => '&#946;',
+ 'rArr' => '&#8658;',
+ 'rho' => '&#961;',
+ 'aacute' => '\xe1',
+ 'eacute' => '\xe9',
+ 'omega' => '&#969;',
+ 'middot' => '\xb7',
+ 'Gamma' => '&#915;',
+ 'Iacute' => '\xcd',
+ 'lang' => '&#9001;',
+ 'spades' => '&#9824;',
+ 'rsquo' => '&#8217;',
+ 'uml' => '\xa8',
+ 'thorn' => '\xfe',
+ 'ouml' => '\xf6',
+ 'thetasym' => '&#977;',
+ 'or' => '&#8744;',
+ 'raquo' => '\xbb',
+ 'acirc' => '\xe2',
+ 'ldquo' => '&#8220;',
+ 'hearts' => '&#9829;',
+ 'sigma' => '&#963;',
+ 'oacute' => '\xf3',
+=end
+ }
+end
module Irc
@@ -79,5 +349,22 @@ module Irc
return nil
end
end
+
+ def Utils.decode_html_entities(str)
+ if $we_have_html_entities_decoder
+ return HTMLEntities.decode_entities(str)
+ else
+ str.gsub(/(&(.+?);)/) {
+ symbol = $2
+ # remove the 0-paddng from unicode integers
+ if symbol =~ /#(.+)/
+ symbol = "##{$1.to_i.to_s}"
+ end
+
+ # output the symbol's irc-translated character, or a * if it's unknown
+ UNESCAPE_TABLE[symbol] || '*'
+ }
+ end
+ end
end
end