diff options
Diffstat (limited to 'lib/rbot')
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index fc89e1c3..52375334 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -415,5 +415,33 @@ module ::Irc } end end + + # Try to grab and IRFify the first HTML par (<p> tag) in the given string. + # If possible, grab the one after the first h1 heading + def Utils.ircify_first_html_par(xml) + header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im) + txt = String.new + if header_found + debug "Found header: #{header_found[1].inspect}" + while txt.empty? + header_found = $' + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] + break unless candidate + txt = candidate.ircify_html + end + end + # If we haven't found a first par yet, try to get it from the whole + # document + if txt.empty? + header_found = xml + while txt.empty? + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] + break unless candidate + txt = candidate.ircify_html + header_found = $' + end + end + return txt + end end end |