diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-18 06:15:45 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-18 06:15:45 +0000 |
commit | 2da3a85740963a5dc4e9390115e13139f97511e2 (patch) | |
tree | 9b8df767c92c1ab1d406e1bad9d832b0b19df801 /lib | |
parent | 6b57387fd524539e831fc434f626659d7d07d61c (diff) |
HTML processing refactoring: HTML title extraction is now a String method
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rbot/core/utils/extends.rb | 14 | ||||
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 5 |
2 files changed, 18 insertions, 1 deletions
diff --git a/lib/rbot/core/utils/extends.rb b/lib/rbot/core/utils/extends.rb index e0c781b1..0b07257a 100644 --- a/lib/rbot/core/utils/extends.rb +++ b/lib/rbot/core/utils/extends.rb @@ -178,6 +178,20 @@ class ::String def riphtml self.gsub(/<[^>]+>/, '').gsub(/&/,'&').gsub(/"/,'"').gsub(/</,'<').gsub(/>/,'>').gsub(/&ellip;/,'...').gsub(/'/, "'").gsub("\n",'') end + + # This method tries to find an HTML title in the string, + # and returns it if found + def get_html_title + return unless Irc::Utils::TITLE_REGEX.match(self) + $1 + end + + # This method returns the IRC-formatted version of an + # HTML title found in the string + def ircify_html_title + return unless Irc::Utils::TITLE_REGEX.match(self) + $1.ircify_html + end end diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index a4f071a2..0b10b52f 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -317,7 +317,10 @@ rescue LoadError else module ::Irc module Utils - # Define some regular expressions to be used by first_html_par + # Some regular expressions to manage HTML data + + # Title + TITLE_REGEX = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im # H1, H2, etc HX_REGEX = /<h(\d)(?:\s+[^>]*)?>(.*?)<\/h\1>/im |