From a6ac5d31a3c783eeb54b49ae04587f083eb8c6a1 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Tue, 13 Mar 2007 23:18:33 +0000 Subject: imdb plugin: update to the new site layout, use rbot headers --- data/rbot/plugins/imdb.rb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/data/rbot/plugins/imdb.rb b/data/rbot/plugins/imdb.rb index 6cbdaf53..5e4cc65a 100644 --- a/data/rbot/plugins/imdb.rb +++ b/data/rbot/plugins/imdb.rb @@ -6,8 +6,11 @@ # Author:: Arnaud Cornet # Copyright:: (C) 2005 Arnaud Cornet # License:: MIT license +# +# Notes by Giuseppe Bilotta: +# TODO return more than one match (configurable) +# TODO why do we use CGI.unescapeHTML? shall we rely on the rbot methods? -require 'net/http' require 'cgi' require 'uri/common' @@ -21,7 +24,7 @@ class Imdb @http = @bot.httputil.get_proxy(URI.parse("http://us.imdb.com/find?q=#{str}")) @http.start begin - resp, data = @http.get("/find?q=#{str}", "User-Agent" => "Mozilla/5.0") + resp, data = @http.get("/find?q=#{str}", @bot.httputil.headers) rescue Net::ProtoRetriableError => detail head = detail.data if head.code == "301" or head.code == "302" @@ -29,7 +32,7 @@ class Imdb end end if resp.code == "200" - m = /]*)>([^<]*)<\/a>/.match(resp.body) + m = /]*)>([^<]*)<\/a>/.match(resp.body) if m url = m[1] title = m[2] @@ -47,14 +50,13 @@ class Imdb debug "IMDB: search returned NIL" return nil end - resp, data = @http.get(sr, "User-Agent" => - "Mozilla/5.0 (compatible; Konqueror/3.1; Linux)") + resp, data = @http.get(sr, @bot.httputil.headers) if resp.code == "200" m = /([^<]*)<\/title>/.match(resp.body) return nil if !m title = CGI.unescapeHTML(m[1]) - m = /<b>([0-9.]+)\/10<\/b> \(([0-9,]+) votes?\)/.match(resp.body) + m = /<b>([0-9.]+)\/10<\/b>\n?\r?\s+<small>\(<a href="ratings">([0-9,]+) votes?<\/a>\)<\/small>/.match(resp.body) return nil if !m score = m[1] votes = m[2] -- cgit v1.2.3