From 2da3a85740963a5dc4e9390115e13139f97511e2 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Tue, 18 Sep 2007 06:15:45 +0000 Subject: HTML processing refactoring: HTML title extraction is now a String method --- lib/rbot/core/utils/extends.rb | 14 ++++++++++++++ lib/rbot/core/utils/utils.rb | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'lib/rbot/core/utils') diff --git a/lib/rbot/core/utils/extends.rb b/lib/rbot/core/utils/extends.rb index e0c781b1..0b07257a 100644 --- a/lib/rbot/core/utils/extends.rb +++ b/lib/rbot/core/utils/extends.rb @@ -178,6 +178,20 @@ class ::String def riphtml self.gsub(/<[^>]+>/, '').gsub(/&/,'&').gsub(/"/,'"').gsub(/</,'<').gsub(/>/,'>').gsub(/&ellip;/,'...').gsub(/'/, "'").gsub("\n",'') end + + # This method tries to find an HTML title in the string, + # and returns it if found + def get_html_title + return unless Irc::Utils::TITLE_REGEX.match(self) + $1 + end + + # This method returns the IRC-formatted version of an + # HTML title found in the string + def ircify_html_title + return unless Irc::Utils::TITLE_REGEX.match(self) + $1.ircify_html + end end diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index a4f071a2..0b10b52f 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -317,7 +317,10 @@ rescue LoadError else module ::Irc module Utils - # Define some regular expressions to be used by first_html_par + # Some regular expressions to manage HTML data + + # Title + TITLE_REGEX = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im # H1, H2, etc HX_REGEX = /]*)?>(.*?)<\/h\1>/im -- cgit v1.2.3