More search.rb first_par fixups

author: Giuseppe Bilotta <giuseppe.bilotta@gmail.com> 2007-02-04 23:44:56 +0000
committer: Giuseppe Bilotta <giuseppe.bilotta@gmail.com> 2007-02-04 23:44:56 +0000
commit: eb161b03f7b2012e1f33834220b9400848497173 (patch)
tree: f06436bc55ceeab886bd240bbd03b35df3eb9872 /data
parent: 609c06621ad4829afb88fcc31d7f74ce99b969bf (diff)
1 files changed, 13 insertions, 8 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb
index f5bab421..3e1066f7 100644
--- a/data/rbot/plugins/search.rb
+++ b/data/rbot/plugins/search.rb
@@ -115,6 +115,9 @@ class SearchPlugin < Plugin
     while first_pars > 0 and urls.length > 0
       url.replace(urls.shift)
       idx += 1
+
+      # FIXME what happens if some big file is returned? We should share
+      # code with the url plugin to only retrieve partial file content!
       xml = @bot.httputil.get_cached(url)
       if xml.nil?
         debug "Unable to retrieve #{url}"
@@ -127,26 +130,28 @@ class SearchPlugin < Plugin
         debug "Found header: #{header_found[1].inspect}"
         while txt.empty? 
           header_found = $'
-          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html
+          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
           break unless candidate
-          txt.replace candidate
+          txt.replace candidate.ircify_html
         end
       end
       # If we haven't found a first par yet, try to get it from the whole
       # document
       if txt.empty?
-        txt = xml[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html
+	header_found = xml
         while txt.empty? 
-          header_found = $'
-          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html
+          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
           break unless candidate
-          txt.replace candidate
+          txt.replace candidate.ircify_html
+          header_found = $'
         end
       end
-      # Nothing yet, give up
+      # Nothing yet, try title
       if txt.empty?
         debug "No first par found\n#{xml}"
-        next
+	# FIXME only do this if the 'url' plugin is loaded
+	txt.replace @bot.plugins['url'].get_title_from_html(xml)
+        next if txt.empty?
       end
       m.reply "[#{idx}] #{txt}".omissis_after(400)
       first_pars -=1
author	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>	2007-02-04 23:44:56 +0000
committer	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>	2007-02-04 23:44:56 +0000
commit	eb161b03f7b2012e1f33834220b9400848497173 (patch)
tree	f06436bc55ceeab886bd240bbd03b35df3eb9872 /data
parent	609c06621ad4829afb88fcc31d7f74ce99b969bf (diff)