summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-02-04 23:44:56 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-02-04 23:44:56 +0000
commiteb161b03f7b2012e1f33834220b9400848497173 (patch)
treef06436bc55ceeab886bd240bbd03b35df3eb9872
parent609c06621ad4829afb88fcc31d7f74ce99b969bf (diff)
More search.rb first_par fixups
-rw-r--r--data/rbot/plugins/search.rb21
1 files changed, 13 insertions, 8 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb
index f5bab421..3e1066f7 100644
--- a/data/rbot/plugins/search.rb
+++ b/data/rbot/plugins/search.rb
@@ -115,6 +115,9 @@ class SearchPlugin < Plugin
while first_pars > 0 and urls.length > 0
url.replace(urls.shift)
idx += 1
+
+ # FIXME what happens if some big file is returned? We should share
+ # code with the url plugin to only retrieve partial file content!
xml = @bot.httputil.get_cached(url)
if xml.nil?
debug "Unable to retrieve #{url}"
@@ -127,26 +130,28 @@ class SearchPlugin < Plugin
debug "Found header: #{header_found[1].inspect}"
while txt.empty?
header_found = $'
- candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html
+ candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
break unless candidate
- txt.replace candidate
+ txt.replace candidate.ircify_html
end
end
# If we haven't found a first par yet, try to get it from the whole
# document
if txt.empty?
- txt = xml[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html
+ header_found = xml
while txt.empty?
- header_found = $'
- candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html
+ candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
break unless candidate
- txt.replace candidate
+ txt.replace candidate.ircify_html
+ header_found = $'
end
end
- # Nothing yet, give up
+ # Nothing yet, try title
if txt.empty?
debug "No first par found\n#{xml}"
- next
+ # FIXME only do this if the 'url' plugin is loaded
+ txt.replace @bot.plugins['url'].get_title_from_html(xml)
+ next if txt.empty?
end
m.reply "[#{idx}] #{txt}".omissis_after(400)
first_pars -=1