summaryrefslogtreecommitdiff
path: root/lib/rbot
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-09-18 17:31:24 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-09-18 17:31:24 +0000
commitadb212bdfc678af04fa438b42ec06047a13a8f2c (patch)
tree4d7f0be9c6eb9e5e5cbef2ffc3e6d921b0064452 /lib/rbot
parent663a2b1553d400bca97c4490c82822d93dcf0a24 (diff)
first_html_par: build lists 'manually' when using Hpricot
Hpricot selectors (like doc/"css path") don't return elements in their natural (depth-first) order. Use custom searches from the root of the document to achieve this.
Diffstat (limited to 'lib/rbot')
-rw-r--r--lib/rbot/core/utils/utils.rb32
1 files changed, 16 insertions, 16 deletions
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index 32b05700..9b678def 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -498,25 +498,26 @@ module ::Irc
txt = String.new
- h = %w{h1 h2 h3 h4 h5 h6}
- p = %w{p}
- ar = []
- h.each { |hx|
- p.each { |px|
- ar << "#{hx}~#{px}"
- }
- }
- h_p_css = ar.join("|")
- debug "css search: #{h_p_css}"
-
pre_h = pars = by_span = nil
while true
debug "Minimum number of spaces: #{min_spaces}"
# Initial attempt: <p> that follows <h\d>
- pre_h = doc/h_p_css if pre_h.nil?
- debug "Hx: found: #{pre_h.pretty_inspect}"
+ if pre_h.nil?
+ pre_h = Hpricot::Elements[]
+ found_h = false
+ doc.root.search("*") { |e|
+ case e.pathname
+ when /^h\d/
+ found_h = true
+ when 'p'
+ pre_h << e if found_h
+ end
+ }
+ debug "Hx: found: #{pre_h.pretty_inspect}"
+ end
+
pre_h.each { |p|
debug p
txt = p.to_html.ircify_html
@@ -551,9 +552,8 @@ module ::Irc
# we don't need
if by_span.nil?
by_span = Hpricot::Elements[]
- pre_pars = doc/"div|span|td|tr|tbody|table"
- pre_pars.each { |el|
- by_span.push el if el[:class] =~ /body|message|text/i
+ doc.root.each("*") { |el|
+ by_span.push el if el.pathname =~ /^(?:div|span|td|tr|tbody|table)$/ and el[:class] =~ /body|message|text/i
}
debug "other \#1: found: #{by_span.pretty_inspect}"
end