summaryrefslogtreecommitdiff
path: root/lib/rbot
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbot')
-rw-r--r--lib/rbot/core/utils/utils.rb28
1 files changed, 28 insertions, 0 deletions
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index fc89e1c3..52375334 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -415,5 +415,33 @@ module ::Irc
}
end
end
+
+ # Try to grab and IRFify the first HTML par (<p> tag) in the given string.
+ # If possible, grab the one after the first h1 heading
+ def Utils.ircify_first_html_par(xml)
+ header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im)
+ txt = String.new
+ if header_found
+ debug "Found header: #{header_found[1].inspect}"
+ while txt.empty?
+ header_found = $'
+ candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+ break unless candidate
+ txt = candidate.ircify_html
+ end
+ end
+ # If we haven't found a first par yet, try to get it from the whole
+ # document
+ if txt.empty?
+ header_found = xml
+ while txt.empty?
+ candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+ break unless candidate
+ txt = candidate.ircify_html
+ header_found = $'
+ end
+ end
+ return txt
+ end
end
end