diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-02-06 15:08:25 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-02-06 15:08:25 +0000 |
commit | 059f917a709673d1d88f7056b45e86916de29ad4 (patch) | |
tree | c8e88dfb8611cf0132d36065fcbd9ae935eb6846 /lib/rbot | |
parent | 64689811ca5ee4e190d1837463675c68f9a094ff (diff) |
Move code to find and ircify first par from search plugin to utils
Diffstat (limited to 'lib/rbot')
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index fc89e1c3..52375334 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -415,5 +415,33 @@ module ::Irc } end end + + # Try to grab and IRFify the first HTML par (<p> tag) in the given string. + # If possible, grab the one after the first h1 heading + def Utils.ircify_first_html_par(xml) + header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im) + txt = String.new + if header_found + debug "Found header: #{header_found[1].inspect}" + while txt.empty? + header_found = $' + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] + break unless candidate + txt = candidate.ircify_html + end + end + # If we haven't found a first par yet, try to get it from the whole + # document + if txt.empty? + header_found = xml + while txt.empty? + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] + break unless candidate + txt = candidate.ircify_html + header_found = $' + end + end + return txt + end end end |