diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-18 23:40:44 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-18 23:40:44 +0000 |
commit | 93876c8804eb04a1ee7a43943d6a07c34ff6f0fc (patch) | |
tree | c73f4dbfd92120ebcbdcc35596aa250e18f9e4f9 | |
parent | 9ec5b9bab1b41cd2869b583d28afcffbb429d702 (diff) |
first_html_par: after-paragraph matches should prefer divs and spans to other elements
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index 0582cd4b..1b6a0ce9 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -307,7 +307,8 @@ begin require 'hpricot' module ::Irc module Utils - AFTER_PAR_PATH = /^(?:div|span|td|tr|tbody|table)$/ + AFTER_PAR_PATH = /^(?:div|span)$/ + AFTER_PAR_EX = /^(?:td|tr|tbody|table)$/ AFTER_PAR_CLASS = /body|message|text/i end end @@ -556,13 +557,23 @@ module ::Irc # 'message' or 'text' in their class to mark actual text. Since we want # the class match to be partial and case insensitive, we collect # the common elements that may have this class and then filter out those - # we don't need + # we don't need. If no divs or spans are found, we'll accept additional + # elements too (td, tr, tbody, table). if by_span.nil? by_span = Hpricot::Elements[] + extra = Hpricot::Elements[] doc.search("*") { |el| next if el.bogusetag? - by_span.push el if el.pathname =~ AFTER_PAR_PATH and (el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS) + case el.pathname + when AFTER_PAR_PATH + by_span.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS + when AFTER_PAR_EX + extra.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS + end } + if by_span.empty? and not extra.empty? + by_span.concat extra + end debug "other \#1: found: #{by_span.pretty_inspect}" end |