diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2012-09-10 06:33:32 +0200 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2012-09-10 06:33:32 +0200 |
commit | fa639cb4885f63e887493afbd4e0dbacbe4a0e99 (patch) | |
tree | eaacacfb38a5979da7cd95468c3fc7cae68c9510 /data | |
parent | 211e189e0308ae15a993012e82773204f36da783 (diff) |
search: fix google calc scraping
Diffstat (limited to 'data')
-rw-r--r-- | data/rbot/plugins/search.rb | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb index 0a439708..0e80a2d8 100644 --- a/data/rbot/plugins/search.rb +++ b/data/rbot/plugins/search.rb @@ -18,7 +18,7 @@ GOOGLE_SEARCH = "http://www.google.com/search?oe=UTF-8&q=" GOOGLE_WAP_SEARCH = "http://www.google.com/m/search?hl=en&q=" GOOGLE_WAP_LINK = /"r">(?:<div[^>]*>)?<a href="([^"]+)"[^>]*>(.*?)<\/a>/im -GOOGLE_CALC_RESULT = %r{<img src=/images/calc_img\.gif(?: width=40 height=30 alt="")?>.*?<h[1-6] class=r[^>]*><b>(.+?)</b>} +GOOGLE_CALC_RESULT = %r{<h[1-6] class="r" [^>]*>(.+?)</h} GOOGLE_COUNT_RESULT = %r{<font size=-1>Results <b>1<\/b> - <b>10<\/b> of about <b>(.*)<\/b> for} GOOGLE_DEF_RESULT = %r{onebox_result">\s*(.*?)\s*<br/>\s*(.*?)<table} GOOGLE_TIME_RESULT = %r{alt="Clock"></td><td valign=[^>]+>(.+?)<(br|/td)>} @@ -202,7 +202,7 @@ class SearchPlugin < Plugin debug "#{html.size} bytes of html recieved" debug html - candidates = html.match(/font-weight:bold">(.*?)<\/(?:span|div)>/) + candidates = html.match(GOOGLE_CALC_RESULT) debug "candidates: #{candidates.inspect}" if candidates.nil? |