From eb161b03f7b2012e1f33834220b9400848497173 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 4 Feb 2007 23:44:56 +0000 Subject: More search.rb first_par fixups --- data/rbot/plugins/search.rb | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'data') diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb index f5bab421..3e1066f7 100644 --- a/data/rbot/plugins/search.rb +++ b/data/rbot/plugins/search.rb @@ -115,6 +115,9 @@ class SearchPlugin < Plugin while first_pars > 0 and urls.length > 0 url.replace(urls.shift) idx += 1 + + # FIXME what happens if some big file is returned? We should share + # code with the url plugin to only retrieve partial file content! xml = @bot.httputil.get_cached(url) if xml.nil? debug "Unable to retrieve #{url}" @@ -127,26 +130,28 @@ class SearchPlugin < Plugin debug "Found header: #{header_found[1].inspect}" while txt.empty? header_found = $' - candidate = header_found[/]*)?>.*?<\/p>/im].ircify_html + candidate = header_found[/]*)?>.*?<\/p>/im] break unless candidate - txt.replace candidate + txt.replace candidate.ircify_html end end # If we haven't found a first par yet, try to get it from the whole # document if txt.empty? - txt = xml[/]*)?>.*?<\/p>/im].ircify_html + header_found = xml while txt.empty? - header_found = $' - candidate = header_found[/]*)?>.*?<\/p>/im].ircify_html + candidate = header_found[/]*)?>.*?<\/p>/im] break unless candidate - txt.replace candidate + txt.replace candidate.ircify_html + header_found = $' end end - # Nothing yet, give up + # Nothing yet, try title if txt.empty? debug "No first par found\n#{xml}" - next + # FIXME only do this if the 'url' plugin is loaded + txt.replace @bot.plugins['url'].get_title_from_html(xml) + next if txt.empty? end m.reply "[#{idx}] #{txt}".omissis_after(400) first_pars -=1 -- cgit v1.2.3