summaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-13 23:18:33 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-13 23:18:33 +0000
commita6ac5d31a3c783eeb54b49ae04587f083eb8c6a1 (patch)
treec9176ba76b2235daa74459ada929a84f5927be73 /data
parent65a6d64f368b9bdd1cfe1ce8d781f5ecdcb3aab6 (diff)
imdb plugin: update to the new site layout, use rbot headers
Diffstat (limited to 'data')
-rw-r--r--data/rbot/plugins/imdb.rb14
1 files changed, 8 insertions, 6 deletions
diff --git a/data/rbot/plugins/imdb.rb b/data/rbot/plugins/imdb.rb
index 6cbdaf53..5e4cc65a 100644
--- a/data/rbot/plugins/imdb.rb
+++ b/data/rbot/plugins/imdb.rb
@@ -6,8 +6,11 @@
# Author:: Arnaud Cornet <arnaud.cornet@gmail.com>
# Copyright:: (C) 2005 Arnaud Cornet
# License:: MIT license
+#
+# Notes by Giuseppe Bilotta:
+# TODO return more than one match (configurable)
+# TODO why do we use CGI.unescapeHTML? shall we rely on the rbot methods?
-require 'net/http'
require 'cgi'
require 'uri/common'
@@ -21,7 +24,7 @@ class Imdb
@http = @bot.httputil.get_proxy(URI.parse("http://us.imdb.com/find?q=#{str}"))
@http.start
begin
- resp, data = @http.get("/find?q=#{str}", "User-Agent" => "Mozilla/5.0")
+ resp, data = @http.get("/find?q=#{str}", @bot.httputil.headers)
rescue Net::ProtoRetriableError => detail
head = detail.data
if head.code == "301" or head.code == "302"
@@ -29,7 +32,7 @@ class Imdb
end
end
if resp.code == "200"
- m = /<a href="(\/title\/tt[0-9]+\/?)[^"]*"(:?[^>]*)>([^<]*)<\/a>/.match(resp.body)
+ m = /<a href="(\/title\/tt[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/.match(resp.body)
if m
url = m[1]
title = m[2]
@@ -47,14 +50,13 @@ class Imdb
debug "IMDB: search returned NIL"
return nil
end
- resp, data = @http.get(sr, "User-Agent" =>
- "Mozilla/5.0 (compatible; Konqueror/3.1; Linux)")
+ resp, data = @http.get(sr, @bot.httputil.headers)
if resp.code == "200"
m = /<title>([^<]*)<\/title>/.match(resp.body)
return nil if !m
title = CGI.unescapeHTML(m[1])
- m = /<b>([0-9.]+)\/10<\/b> \(([0-9,]+) votes?\)/.match(resp.body)
+ m = /<b>([0-9.]+)\/10<\/b>\n?\r?\s+<small>\(<a href="ratings">([0-9,]+) votes?<\/a>\)<\/small>/.match(resp.body)
return nil if !m
score = m[1]
votes = m[2]