summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Kim <dmitry point kim at gmail point com>2007-03-30 23:44:02 +0000
committerDmitry Kim <dmitry point kim at gmail point com>2007-03-30 23:44:02 +0000
commitb11c3c4042b03e36639370002ecf86c44f7ddde4 (patch)
tree05a35024a2d56c7e3d313317376a17cb7c41a99f
parentb73d6c7dc6554e1c6eb6abce68350ed2c13191b8 (diff)
*** (httputil) major rework, new caching implementation, unified request
processing + (httputil) post support, partial request support, other features - (httputil) removed partial_body() and get_cached() [merged into get()] * (plugins/, utils) minimal changes to accomodate for the new http_utils * (utils, ircbot) moved utils initialization into utils.rb * (tube.rb) (partially) accomodate for upstream site layout changes
-rw-r--r--data/rbot/plugins/bash.rb7
-rw-r--r--data/rbot/plugins/dict.rb12
-rw-r--r--data/rbot/plugins/digg.rb2
-rw-r--r--data/rbot/plugins/fish.rb51
-rw-r--r--data/rbot/plugins/freshmeat.rb4
-rw-r--r--data/rbot/plugins/games/azgame.rb10
-rw-r--r--data/rbot/plugins/games/quiz.rb2
-rw-r--r--data/rbot/plugins/grouphug.rb9
-rw-r--r--data/rbot/plugins/imdb.rb27
-rw-r--r--data/rbot/plugins/rss.rb4
-rw-r--r--data/rbot/plugins/search.rb2
-rw-r--r--data/rbot/plugins/slashdot.rb4
-rw-r--r--data/rbot/plugins/threat.rb2
-rw-r--r--data/rbot/plugins/tube.rb64
-rw-r--r--data/rbot/plugins/urban.rb21
-rw-r--r--data/rbot/plugins/url.rb85
-rw-r--r--data/rbot/plugins/weather.rb4
-rw-r--r--data/rbot/plugins/wserver.rb38
-rw-r--r--lib/rbot/core/utils/httputil.rb611
-rw-r--r--lib/rbot/core/utils/utils.rb5
-rw-r--r--lib/rbot/ircbot.rb5
21 files changed, 458 insertions, 511 deletions
diff --git a/data/rbot/plugins/bash.rb b/data/rbot/plugins/bash.rb
index 6f954a16..2a5bedad 100644
--- a/data/rbot/plugins/bash.rb
+++ b/data/rbot/plugins/bash.rb
@@ -25,11 +25,12 @@ class BashPlugin < Plugin
def bash(m, id=0)
if(id != 0)
- xml = @bot.httputil.get URI.parse("http://bash.org/xml/?" + id + "&num=1")
+ xml = @bot.httputil.get("http://bash.org/xml/?" + id + "&num=1")
elsif(id == "latest")
- xml = @bot.httputil.get URI.parse("http://bash.org/xml/?latest&num=1")
+ xml = @bot.httputil.get("http://bash.org/xml/?latest&num=1")
else
- xml = @bot.httputil.get URI.parse("http://bash.org/xml/?random&num=1")
+ xml = @bot.httputil.get("http://bash.org/xml/?random&num=1",
+ :cache => false)
end
unless xml
m.reply "bash.org rss parse failed"
diff --git a/data/rbot/plugins/dict.rb b/data/rbot/plugins/dict.rb
index 504a9d7b..d486e97b 100644
--- a/data/rbot/plugins/dict.rb
+++ b/data/rbot/plugins/dict.rb
@@ -57,9 +57,10 @@ class DictPlugin < Plugin
word = params[:word].downcase
url = @dmwapurl % URI.escape(word)
- xml = @bot.httputil.get_cached(url)
+ xml = nil
+ info = @bot.httputil.get_response(url) rescue nil
+ xml = info.body if info
if xml.nil?
- info = @bot.httputil.last_response
info = info ? " (#{info.code} - #{info.message})" : ""
return false if justcheck
m.reply "An error occurred while looking for #{word}#{info}"
@@ -108,7 +109,7 @@ class DictPlugin < Plugin
word = params[:word].join
[word, word + "_1"].each { |check|
url = @oxurl % URI.escape(check)
- h = @bot.httputil.head(url)
+ h = @bot.httputil.head(url, :max_redir => 5)
if h
m.reply("#{word} found: #{url}") unless justcheck
return true
@@ -128,10 +129,11 @@ class DictPlugin < Plugin
word = params[:word].to_s.downcase
url = @chambersurl % URI.escape(word)
- xml = @bot.httputil.get_cached(url)
+ xml = nil
+ info = @bot.httputil.get_response(url) rescue nil
+ xml = info.body if info
case xml
when nil
- info = @bot.httputil.last_response
info = info ? " (#{info.code} - #{info.message})" : ""
return false if justcheck
m.reply "An error occurred while looking for #{word}#{info}"
diff --git a/data/rbot/plugins/digg.rb b/data/rbot/plugins/digg.rb
index 43b7c9f5..dc2e41cf 100644
--- a/data/rbot/plugins/digg.rb
+++ b/data/rbot/plugins/digg.rb
@@ -13,7 +13,7 @@ class DiggPlugin < Plugin
def digg(m, params)
max = params[:limit].to_i
debug "max is #{max}"
- xml = @bot.httputil.get_cached(URI.parse("http://digg.com/rss/index.xml"))
+ xml = @bot.httputil.get('http://digg.com/rss/index.xml')
unless xml
m.reply "digg news parse failed"
return
diff --git a/data/rbot/plugins/fish.rb b/data/rbot/plugins/fish.rb
index 8c115f90..d7dda52b 100644
--- a/data/rbot/plugins/fish.rb
+++ b/data/rbot/plugins/fish.rb
@@ -30,39 +30,40 @@ class BabelPlugin < Plugin
return
end
- http = @bot.httputil.get_proxy(URI.parse("http://babelfish.altavista.com"))
-
headers = {
- "content-type" => "application/x-www-form-urlencoded; charset=utf-8",
- 'accept-charset' => 'utf-8'
+ "content-type" => "application/x-www-form-urlencoded; charset=utf-8"
}
- http.start {|http|
- resp = http.post(query, data, headers)
-
- if (resp.code == "200")
- lines = Array.new
- resp.body.each_line do |l|
- lines.push l
+ begin
+ resp = @bot.httputil.get_response('http://babelfish.altavista.com'+query,
+ :method => :post,
+ :body => data,
+ :headers => headers)
+ rescue Exception => e
+ m.reply "http error: #{e.message}"
+ return
end
- l = lines.join(" ")
- debug "babelfish response: #{l}"
+ if (resp.code == "200")
+ lines = Array.new
+ resp.body.each_line { |l| lines.push l }
+
+ l = lines.join(" ")
+ debug "babelfish response: #{l}"
- if(l =~ /^\s+<td bgcolor=white class=s><div style=padding:10px;>(.*)<\/div>/)
- answer = $1
- # cache the answer
- if(answer.length > 0)
- @registry["#{trans_pair}/#{data_text}"] = answer
+ if(l =~ /^\s+<td bgcolor=white class=s><div style=padding:10px;>(.*)<\/div>/)
+ answer = $1
+ # cache the answer
+ if(answer.length > 0)
+ @registry["#{trans_pair}/#{data_text}"] = answer
+ end
+ m.reply answer
+ return
end
- m.reply answer
- return
+ m.reply "couldn't parse babelfish response html :("
+ else
+ m.reply "couldn't talk to babelfish :("
end
- m.reply "couldn't parse babelfish response html :("
- else
- m.reply "couldn't talk to babelfish :("
- end
- }
end
end
plugin = BabelPlugin.new
diff --git a/data/rbot/plugins/freshmeat.rb b/data/rbot/plugins/freshmeat.rb
index c8f529cb..5a045123 100644
--- a/data/rbot/plugins/freshmeat.rb
+++ b/data/rbot/plugins/freshmeat.rb
@@ -12,7 +12,7 @@ class FreshmeatPlugin < Plugin
search = params[:search].to_s
max = 8 if max > 8
begin
- xml = @bot.httputil.get_cached(URI.parse("http://freshmeat.net/search-xml/?orderby=locate_projectname_full_DESC&q=#{URI.escape(search)}"))
+ xml = @bot.httputil.get("http://freshmeat.net/search-xml/?orderby=locate_projectname_full_DESC&q=#{URI.escape(search)}")
rescue URI::InvalidURIError, URI::BadURIError => e
m.reply "illegal search string #{search}"
return
@@ -59,7 +59,7 @@ class FreshmeatPlugin < Plugin
max = params[:limit].to_i
max = 8 if max > 8
begin
- xml = @bot.httputil.get(URI.parse("http://images.feedstermedia.com/feedcache/ostg/freshmeat/fm-releases-global.xml"))
+ xml = @bot.httputil.get('http://images.feedstermedia.com/feedcache/ostg/freshmeat/fm-releases-global.xml')
unless xml
m.reply "freshmeat news parse failed"
return
diff --git a/data/rbot/plugins/games/azgame.rb b/data/rbot/plugins/games/azgame.rb
index a6979830..f62232c0 100644
--- a/data/rbot/plugins/games/azgame.rb
+++ b/data/rbot/plugins/games/azgame.rb
@@ -351,7 +351,7 @@ class AzGamePlugin < Plugin
wc = @wordcache[:italian]
return true if wc.key?(word.to_sym)
rules = @rules[:italian]
- p = @bot.httputil.get_cached(rules[:wapurl] % word)
+ p = @bot.httputil.get(rules[:wapurl] % word)
if not p
error "could not connect!"
return false
@@ -404,11 +404,11 @@ class AzGamePlugin < Plugin
l = ('a'..'z').to_a[rand(26)]
debug "getting random word from dictionary, starting with letter #{l}"
first = rules[:url] % "lettera_#{l}_0_50"
- p = @bot.httputil.get_cached(first)
+ p = @bot.httputil.get(first)
max_page = p.match(/ \/ (\d+)<\/label>/)[1].to_i
pp = rand(max_page)+1
debug "getting random word from dictionary, starting with letter #{l}, page #{pp}"
- p = @bot.httputil.get_cached(first+"&pagina=#{pp}") if pp > 1
+ p = @bot.httputil.get(first+"&pagina=#{pp}") if pp > 1
lemmi = Array.new
good = rules[:good]
bad = rules[:bad]
@@ -446,7 +446,7 @@ class AzGamePlugin < Plugin
wc = @wordcache[:english]
return true if wc.key?(word.to_sym)
rules = @rules[:english]
- p = @bot.httputil.get_cached(rules[:url] % URI.escape(word))
+ p = @bot.httputil.get(rules[:url] % URI.escape(word))
if not p
error "could not connect!"
return false
@@ -497,7 +497,7 @@ class AzGamePlugin < Plugin
ll = ('a'..'z').to_a[rand(26)]
random = [l,ll].join('*') + '*'
debug "getting random word from dictionary, matching #{random}"
- p = @bot.httputil.get_cached(rules[:url] % URI.escape(random))
+ p = @bot.httputil.get(rules[:url] % URI.escape(random))
debug p
lemmi = Array.new
good = rules[:good]
diff --git a/data/rbot/plugins/games/quiz.rb b/data/rbot/plugins/games/quiz.rb
index 63383262..56b1bca6 100644
--- a/data/rbot/plugins/games/quiz.rb
+++ b/data/rbot/plugins/games/quiz.rb
@@ -203,7 +203,7 @@ class QuizPlugin < Plugin
if p =~ /^https?:\/\//
# Wiki data
begin
- serverdata = @bot.httputil.get_cached( URI.parse( p ) ) # "http://amarok.kde.org/amarokwiki/index.php/Rbot_Quiz"
+ serverdata = @bot.httputil.get(p) # "http://amarok.kde.org/amarokwiki/index.php/Rbot_Quiz"
serverdata = serverdata.split( "QUIZ DATA START\n" )[1]
serverdata = serverdata.split( "\nQUIZ DATA END" )[0]
serverdata = serverdata.gsub( /&nbsp;/, " " ).gsub( /&amp;/, "&" ).gsub( /&quot;/, "\"" )
diff --git a/data/rbot/plugins/grouphug.rb b/data/rbot/plugins/grouphug.rb
index 75093665..15735b9f 100644
--- a/data/rbot/plugins/grouphug.rb
+++ b/data/rbot/plugins/grouphug.rb
@@ -13,10 +13,15 @@ class GrouphugPlugin < Plugin
end
def confess(m, params)
+ opts = { :cache => false }
path = "random"
- path = "confessions/#{params[:num]}" if params[:num]
+ if params[:num]
+ path = "confessions/#{params[:num]}"
+ opts.delete(:cache)
+ end
+
begin
- data = @bot.httputil.get_cached(URI.parse("http://grouphug.us/#{path}"))
+ data = @bot.httputil.get("http://grouphug.us/#{path}", opts)
reg = Regexp.new( '(<td class="conf-text")(.*?)(<p>)(.*?)(</p>)', Regexp::MULTILINE )
confession = reg.match( data )[4].ircify_html
diff --git a/data/rbot/plugins/imdb.rb b/data/rbot/plugins/imdb.rb
index 5e4cc65a..cfadd3c8 100644
--- a/data/rbot/plugins/imdb.rb
+++ b/data/rbot/plugins/imdb.rb
@@ -21,16 +21,16 @@ class Imdb
def search(rawstr)
str = URI.escape(rawstr)
- @http = @bot.httputil.get_proxy(URI.parse("http://us.imdb.com/find?q=#{str}"))
- @http.start
+ resp = nil
begin
- resp, data = @http.get("/find?q=#{str}", @bot.httputil.headers)
- rescue Net::ProtoRetriableError => detail
- head = detail.data
- if head.code == "301" or head.code == "302"
- return head['location'].gsub(/http:\/\/us.imdb.com/, "").gsub(/\?.*/, "")
- end
+ resp = @bot.httputil.get_response("http://us.imdb.com/find?q=#{str}",
+ :max_redir => -1)
+ rescue Exception => e
+ error e.message
+ warning e.backtrace.join("\n")
+ return nil
end
+
if resp.code == "200"
m = /<a href="(\/title\/tt[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/.match(resp.body)
if m
@@ -50,7 +50,16 @@ class Imdb
debug "IMDB: search returned NIL"
return nil
end
- resp, data = @http.get(sr, @bot.httputil.headers)
+ resp = nil
+ begin
+ resp = @bot.httputil.get_response('http://us.imdb.com' + sr,
+ :max_redir => -1)
+ rescue Exception => e
+ error e.message
+ warning e.backtrace.join("\n")
+ return nil
+ end
+
if resp.code == "200"
m = /<title>([^<]*)<\/title>/.match(resp.body)
return nil if !m
diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb
index 15b121b7..68a5271b 100644
--- a/data/rbot/plugins/rss.rb
+++ b/data/rbot/plugins/rss.rb
@@ -654,7 +654,9 @@ class RSSFeedsPlugin < Plugin
def fetchRss(feed, m=nil)
begin
# Use 60 sec timeout, cause the default is too low
- xml = @bot.httputil.get_cached(feed.url, 60, 60)
+ xml = @bot.httputil.get(feed.url,
+ :read_timeout => 60,
+ :open_timeout => 60)
rescue URI::InvalidURIError, URI::BadURIError => e
report_problem("invalid rss feed #{feed.url}", e, m)
return nil
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb
index 221071cb..1884b38a 100644
--- a/data/rbot/plugins/search.rb
+++ b/data/rbot/plugins/search.rb
@@ -62,7 +62,7 @@ class SearchPlugin < Plugin
hits = params[:hits] || @bot.config['google.hits']
begin
- wml = @bot.httputil.get_cached(url)
+ wml = @bot.httputil.get(url)
rescue => e
m.reply "error googling for #{what}"
return
diff --git a/data/rbot/plugins/slashdot.rb b/data/rbot/plugins/slashdot.rb
index ef96a4b0..fa1338bd 100644
--- a/data/rbot/plugins/slashdot.rb
+++ b/data/rbot/plugins/slashdot.rb
@@ -12,7 +12,7 @@ class SlashdotPlugin < Plugin
search = params[:search].to_s
begin
- xml = @bot.httputil.get_cached(URI.parse("http://slashdot.org/search.pl?content_type=rss&query=#{URI.escape(search)}"))
+ xml = @bot.httputil.get("http://slashdot.org/search.pl?content_type=rss&query=#{URI.escape(search)}")
rescue URI::InvalidURIError, URI::BadURIError => e
m.reply "illegal search string #{search}"
return
@@ -53,7 +53,7 @@ class SlashdotPlugin < Plugin
debug params.inspect
max = params[:limit].to_i
debug "max is #{max}"
- xml = @bot.httputil.get_cached(URI.parse("http://slashdot.org/slashdot.xml"))
+ xml = @bot.httputil.get('http://slashdot.org/slashdot.xml')
unless xml
m.reply "slashdot news parse failed"
return
diff --git a/data/rbot/plugins/threat.rb b/data/rbot/plugins/threat.rb
index bcfc8698..dacd1058 100644
--- a/data/rbot/plugins/threat.rb
+++ b/data/rbot/plugins/threat.rb
@@ -24,7 +24,7 @@ class ThreatPlugin < Plugin
green = "\x0303" # low
black = "\x0301" # default
- page = @bot.httputil.get_cached("http://www.dhs.gov/index.shtm")
+ page = @bot.httputil.get("http://www.dhs.gov/index.shtm")
if page =~ /\"Current National Threat Level is (.*?)\"/
state = $1
diff --git a/data/rbot/plugins/tube.rb b/data/rbot/plugins/tube.rb
index 0a9feb2f..16562274 100644
--- a/data/rbot/plugins/tube.rb
+++ b/data/rbot/plugins/tube.rb
@@ -7,56 +7,48 @@ require 'uri/common'
class TubePlugin < Plugin
include REXML
def help(plugin, topic="")
- "tube [district|circle|metropolitan|central|jubilee|bakerloo|waterloo_city|hammersmith_city|victoria|eastlondon|northern|piccadilly] => display tube service status for the specified line(Docklands Light Railway is not currently supported), tube stations => list tube stations (not lines) with problems"
+ "tube [district|circle|metropolitan|central|jubilee|bakerloo|waterlooandcity|hammersmithandcity|victoria|eastlondon|northern|piccadilly] => display tube service status for the specified line(Docklands Light Railway is not currently supported)" # , tube stations => list tube stations (not lines) with problems"
end
def tube(m, params)
line = params[:line]
- begin
- tube_page = @bot.httputil.get_cached(URI.parse("http://www.tfl.gov.uk/tfl/service_rt_tube.shtml"), 1, 1)
- rescue URI::InvalidURIError, URI::BadURIError => e
- m.reply "Cannot contact Tube Service Status page"
- return
- end
- unless tube_page
- m.reply "Cannot contact Tube Service Status page"
- return
- end
- next_line = false
- tube_page.each_line {|l|
- next if l == "\r\n"
- next if l == "\n"
- if (next_line)
- if (l =~ /^<tr valign=top> <td>\s*(.*)<\/td><\/tr>/i)
- m.reply $1.split(/<[^>]+>|&nbsp;/i).join(" ")
- return
- else
- m.reply "There are problems on the #{line} line, but I didn't understand the page format. You should check out http://www.tfl.gov.uk/tfl/service_rt_tube.shtml for more details."
- return
- end
+ tube_page = @bot.httputil.get('http://www.tfl.gov.uk/tfl/livetravelnews/realtime/tube/default.html')
+ unless tube_page
+ m.reply "Cannot contact Tube Service Status page"
+ return
end
- next_line = true if (l =~ /class="#{line}"/i)
+ next_line = false
+ tube_page.each_line {|l|
+ next if l == "\r\n"
+ next if l == "\n"
+ if (next_line)
+ if (l =~ /^<p>\s*(.*)<\/p>/i)
+ m.reply $1.split(/<[^>]+>|&nbsp;/i).join(" ")
+ return
+ elsif l =~ /ul|h3|"message"/
+ next
+ else
+ m.reply "There are problems on the #{line} line, but I didn't understand the page format. You should check out http://www.tfl.gov.uk/tfl/livetravelnews/realtime/tube/default.html for more details."
+ return
+ end
+ end
+ next_line = true if (l =~ /li class="#{line}"/i)
}
- m.reply "No Problems on the #{line} line."
+ m.reply "No Problems on the #{line} line."
end
def check_stations(m, params)
- begin
- tube_page = @bot.httputil.get_cached(URI.parse("http://www.tfl.gov.uk/tfl/service_rt_tube.shtml"))
- rescue URI::InvalidURIError, URI::BadURIError => e
- m.reply "Cannot contact Tube Service Status page"
- return
- end
+ tube_page = @bot.httputil.get('http://www.tfl.gov.uk/tfl/service_rt_tube.shtml')
unless tube_page
m.reply "Cannot contact Tube Service Status page"
return
end
stations_array = Array.new
tube_page.each_line {|l|
- if (l =~ /<tr valign=top> <td valign="middle" class="Station"><b>(.*)<\/b><\/td><\/tr>\s*/i)
- stations_array.push $1
- end
- }
+ if (l =~ /<tr valign=top> <td valign="middle" class="Station"><b>(.*)<\/b><\/td><\/tr>\s*/i)
+ stations_array.push $1
+ end
+ }
if stations_array.empty?
m.reply "There are no station-specific announcements"
return
@@ -67,5 +59,5 @@ class TubePlugin < Plugin
end
end
plugin = TubePlugin.new
-plugin.map 'tube stations', :action => 'check_stations'
+# plugin.map 'tube stations', :action => 'check_stations'
plugin.map 'tube :line'
diff --git a/data/rbot/plugins/urban.rb b/data/rbot/plugins/urban.rb
index c77a2e45..0b75febe 100644
--- a/data/rbot/plugins/urban.rb
+++ b/data/rbot/plugins/urban.rb
@@ -11,14 +11,15 @@ class UrbanPlugin < Plugin
n = params[:n].nil? ? 1 : params[:n].to_i rescue 1
if words.empty?
- uri = URI.parse( "http://www.urbandictionary.com/random.php" )
- @bot.httputil.head(uri) { |redir|
- words = URI.unescape(redir.match(/define.php\?term=(.*)$/)[1]) rescue nil
- }
+ resp = @bot.httputil.head('http://www.urbandictionary.com/random.php',
+ :max_redir => -1)
+ if resp.code == "302" && (loc = resp['location'])
+ words = URI.unescape(loc.match(/define.php\?term=(.*)$/)[1]) rescue nil
+ end
end
# we give a very high 'skip' because this will allow us to get the number of definitions by retrieving the previous definition
- uri = URI.parse("http://www.urbanwap.com/search.php?term=#{URI.escape words}&skip=65536")
- page = @bot.httputil.get_cached(uri)
+ uri = "http://www.urbanwap.com/search.php?term=#{URI.escape words}&skip=65536"
+ page = @bot.httputil.get(uri)
if page.nil?
m.reply "Couldn't retrieve an urban dictionary definition of #{words}"
return
@@ -38,8 +39,8 @@ class UrbanPlugin < Plugin
n = numdefs
end
if n < numdefs
- uri = URI.parse("http://www.urbanwap.com/search.php?term=#{URI.escape words}&skip=#{n-1}")
- page = @bot.httputil.get_cached(uri)
+ uri = "http://www.urbanwap.com/search.php?term=#{URI.escape words}&skip=#{n-1}"
+ page = @bot.httputil.get(uri)
if page.nil?
case n % 10
when 1
@@ -77,7 +78,7 @@ class UrbanPlugin < Plugin
end
def uotd(m, params)
- home = @bot.httputil.get_cached("http://www.urbanwap.com/")
+ home = @bot.httputil.get("http://www.urbanwap.com/")
if home.nil?
m.reply "Couldn't get the urban dictionary word of the day"
return
@@ -85,7 +86,7 @@ class UrbanPlugin < Plugin
home.match(/Word of the Day: <a href="(.*?)">.*?<\/a>/)
wotd = $1
debug "Urban word of the day: #{wotd}"
- page = @bot.httputil.get_cached(wotd)
+ page = @bot.httputil.get(wotd)
if page.nil?
m.reply "Couldn't get the urban dictionary word of the day"
else
diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb
index 23a32638..95ccb5d9 100644
--- a/data/rbot/plugins/url.rb
+++ b/data/rbot/plugins/url.rb
@@ -40,59 +40,48 @@ class UrlPlugin < Plugin
title = nil
begin
- @bot.httputil.get_response(url) { |response|
- case response
- when Net::HTTPSuccess
- extra = String.new
-
- if response['content-type'] =~ /^text\//
-
- title = String.new
-
- # since the content is 'text/*' and is small enough to
- # be a webpage, retrieve the title from the page
- debug "+ getting #{url.request_uri}"
-
- # we act differently depending on whether we want the first par or not:
- # in the first case we download the initial part and the parse it; in the second
- # case we only download as much as we need to find the title
- if @bot.config['url.first_par']
- partial = response.partial_body(@bot.config['http.info_bytes'])
- first_par = Utils.ircify_first_html_par(partial)
- extra << "\n#{LINK_INFO} text: #{first_par}" unless first_par.empty?
- title = get_title_from_html(partial)
- if title
- return "title: #{title}#{extra}"
- end
- else
- response.partial_body(@bot.config['http.info_bytes']) { |part|
- title = get_title_from_html(part)
- return "title: #{title}" if title
- }
- end
- # if nothing was found, provide more basic info
- end
+ range = @bot.config['http.info_bytes']
+ response = @bot.httputil.get_response(url, :range => "bytes=0-#{range}")
+ if response.code != "206" && response.code != "200"
+ return "Error getting link (#{response.code} - #{response.message})"
+ end
+ extra = String.new
- debug response.to_hash.inspect
- unless @bot.config['url.titles_only']
- # content doesn't have title, just display info.
- size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
- size = size ? ", size: #{size} bytes" : ""
- return "type: #{response['content-type']}#{size}#{extra}"
- end
- when Net::HTTPResponse
- return "Error getting link (#{response.code} - #{response.message})"
+ if response['content-type'] =~ /^text\//
+
+ body = response.body.slice(0, range)
+ title = String.new
+
+ # since the content is 'text/*' and is small enough to
+ # be a webpage, retrieve the title from the page
+ debug "+ getting #{url.request_uri}"
+
+ # we act differently depending on whether we want the first par or not:
+ # in the first case we download the initial part and the parse it; in the second
+ # case we only download as much as we need to find the title
+ if @bot.config['url.first_par']
+ first_par = Utils.ircify_first_html_par(body)
+ extra << "\n#{LINK_INFO} text: #{first_par}" unless first_par.empty?
+ title = get_title_from_html(body)
+ return "title: #{title}#{extra}" if title
else
- raise response
+ title = get_title_from_html(body)
+ return "title: #{title}" if title
end
- }
- rescue Object => e
- if e.class <= StandardError
- error e.inspect
- debug e.backtrace.join("\n")
+
+ # if nothing was found, provide more basic info
end
- msg = e.respond_to?(:message) ? e.message : e.to_s
+ debug response.to_hash.inspect
+ unless @bot.config['url.titles_only']
+ # content doesn't have title, just display info.
+ size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
+ size = size ? ", size: #{size} bytes" : ""
+ return "type: #{response['content-type']}#{size}#{extra}"
+ end
+ rescue Exception => e
+ error e.inspect
+ debug e.backtrace.join("\n")
return "Error connecting to site (#{e.message})"
end
end
diff --git a/data/rbot/plugins/weather.rb b/data/rbot/plugins/weather.rb
index 88194e90..cdabf9c1 100644
--- a/data/rbot/plugins/weather.rb
+++ b/data/rbot/plugins/weather.rb
@@ -165,7 +165,7 @@ class WeatherPlugin < Plugin
def wu_station(m, where, units)
begin
- xml = @bot.httputil.get_cached(@wu_station_url % [units, URI.escape(where)])
+ xml = @bot.httputil.get(@wu_station_url % [units, URI.escape(where)])
case xml
when nil
m.reply "couldn't retrieve weather information, sorry"
@@ -187,7 +187,7 @@ class WeatherPlugin < Plugin
def wu_weather(m, where, units)
begin
- xml = @bot.httputil.get_cached(@wu_url % [units, URI.escape(where)])
+ xml = @bot.httputil.get(@wu_url % [units, URI.escape(where)])
case xml
when nil
m.reply "couldn't retrieve weather information, sorry"
diff --git a/data/rbot/plugins/wserver.rb b/data/rbot/plugins/wserver.rb
index 1baa0d90..fdb4207d 100644
--- a/data/rbot/plugins/wserver.rb
+++ b/data/rbot/plugins/wserver.rb
@@ -29,30 +29,26 @@ class WserverPlugin < Plugin
return
end
- http = @bot.httputil.get_proxy(uri)
- http.open_timeout = 5
- http.start {|http|
- resp = http.head('/')
- server = resp['Server']
- if(server && server.length > 0)
- m.reply "#{uri.host} is running #{server}"
- else
- m.reply "couldn't tell what #{uri.host} is running"
- end
-
- if(resp.code == "302" || resp.code == "301")
- newloc = resp['location']
- newuri = URI.parse(newloc)
- # detect and ignore incorrect redirects (to relative paths etc)
- if (newuri.host != nil)
- if(uri.host != newuri.host)
- m.reply "#{uri.host} redirects to #{newuri.scheme}://#{newuri.host}"
- raise resp['location']
- end
+ resp = @bot.httputil.head(uri)
+ server = resp['Server']
+ if(server && server.length > 0)
+ m.reply "#{uri.host} is running #{server}"
+ else
+ m.reply "couldn't tell what #{uri.host} is running"
+ end
+
+ if(resp.code == "302" || resp.code == "301")
+ newloc = resp['location']
+ newuri = URI.parse(newloc)
+ # detect and ignore incorrect redirects (to relative paths etc)
+ if (newuri.host != nil)
+ if(uri.host != newuri.host)
+ m.reply "#{uri.host} redirects to #{newuri.scheme}://#{newuri.host}"
+ raise resp['location']
end
end
- }
+ end
rescue TimeoutError => err
m.reply "timed out connecting to #{uri.host}:#{uri.port} :("
return
diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb
index aebd1e81..4ce8dcc3 100644
--- a/lib/rbot/core/utils/httputil.rb
+++ b/lib/rbot/core/utils/httputil.rb
@@ -5,10 +5,11 @@
#
# Author:: Tom Gilbert <tom@linuxbrit.co.uk>
# Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
+# Author:: Dmitry "jsn" Kim <dmitry point kim at gmail point com>
#
# Copyright:: (C) 2002-2005 Tom Gilbert
# Copyright:: (C) 2006 Tom Gilbert, Giuseppe Bilotta
-# Copyright:: (C) 2006,2007 Giuseppe Bilotta
+# Copyright:: (C) 2007 Giuseppe Bilotta, Dmitry Kim
require 'resolv'
require 'net/http'
@@ -19,25 +20,6 @@ rescue LoadError => e
error "Secured HTTP connections will fail"
end
-module ::Net
- class HTTPResponse
- # Read chunks from the body until we have at least _size_ bytes, yielding
- # the partial text at each chunk. Return the partial body.
- def partial_body(size=0, &block)
-
- partial = String.new
-
- self.read_body { |chunk|
- partial << chunk
- yield partial if block_given?
- break if size and size > 0 and partial.length >= size
- }
-
- return partial
- end
- end
-end
-
Net::HTTP.version_1_2
module ::Irc
@@ -79,16 +61,113 @@ class HttpUtil
:default => 8192,
:desc => "How many bytes to download from a web page to find some information. Set to 0 to let the bot download the whole page.")
+ class CachedObject
+ attr_accessor :response, :last_used, :first_used, :count, :expires, :date
+
+ def self.maybe_new(resp)
+ debug "maybe new #{resp}"
+ return nil unless Net::HTTPOK === resp ||
+ Net::HTTPMovedPermanently === resp ||
+ Net::HTTPFound === resp ||
+ Net::HTTPPartialContent === resp
+
+ cc = resp['cache-control']
+ return nil if cc && (cc =~ /no-cache/i)
+
+ date = Time.now
+ if d = resp['date']
+ date = Time.httpdate(d)
+ end
+
+ return nil if resp['expires'] && (Time.httpdate(resp['expires']) < date)
+
+ debug "creating cache obj"
+
+ self.new(resp)
+ end
+
+ def use
+ now = Time.now
+ @first_used = now if @count == 0
+ @last_used = now
+ @count += 1
+ end
+
+ def expired?
+ debug "checking expired?"
+ if cc = self.response['cache-control'] && cc =~ /must-revalidate/
+ return true
+ end
+ return self.expires < Time.now
+ end
+
+ def setup_headers(hdr)
+ hdr['if-modified-since'] = self.date.rfc2822
+
+ debug "ims == #{hdr['if-modified-since']}"
+
+ if etag = self.response['etag']
+ hdr['if-none-match'] = etag
+ debug "etag: #{etag}"
+ end
+ end
+
+ def revalidate(resp = self.response)
+ @count = 0
+ self.use
+ self.date = resp.key?('date') ? Time.httpdate(resp['date']) : Time.now
+
+ cc = resp['cache-control']
+ if cc && (cc =~ /max-age=(\d+)/)
+ self.expires = self.date + $1.to_i
+ elsif resp.key?('expires')
+ self.expires = Time.httpdate(resp['expires'])
+ elsif lm = resp['last-modified']
+ delta = self.date - Time.httpdate(lm)
+ delta = 10 if delta <= 0
+ delta /= 5
+ self.expires = self.date + delta
+ else
+ self.expires = self.date + 300
+ end
+ # self.expires = Time.now + 10 # DEBUG
+ debug "expires on #{self.expires}"
+
+ return true
+ end
+
+ private
+ def initialize(resp)
+ @response = resp
+ begin
+ self.revalidate
+ self.response.body
+ rescue Exception => e
+ error e.message
+ error e.backtrace.join("\n")
+ raise e
+ end
+ end
+ end
+
def initialize(bot)
@bot = bot
@cache = Hash.new
@headers = {
- 'User-Agent' => "rbot http util #{$version} (http://linuxbrit.co.uk/rbot/)",
+ 'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8',
+ 'User-Agent' =>
+ "rbot http util #{$version} (http://linuxbrit.co.uk/rbot/)"
+ }
+ debug "starting http cache cleanup timer"
+ @timer = @bot.timer.add(300) {
+ self.remove_stale_cache unless @bot.config['http.no_expire_cache']
}
- @last_response = nil
+ end
+
+ def cleanup
+ debug 'stopping http cache cleanup timer'
+ @bot.timer.remove(@timer)
end
- attr_reader :last_response
- attr_reader :headers
# if http_proxy_include or http_proxy_exclude are set, then examine the
# uri to see if this is a proxied uri
@@ -139,7 +218,13 @@ class HttpUtil
# proxying based on the bot's proxy configuration.
# This will include per-url proxy configuration based on the bot config
# +http_proxy_include/exclude+ options.
- def get_proxy(uri)
+
+ def get_proxy(uri, options = {})
+ opts = {
+ :read_timeout => 10,
+ :open_timeout => 5
+ }.merge(options)
+
proxy = nil
proxy_host = nil
proxy_port = nil
@@ -166,363 +251,227 @@ class HttpUtil
h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_port)
h.use_ssl = true if uri.scheme == "https"
+
+ h.read_timeout = opts[:read_timeout]
+ h.open_timeout = opts[:open_timeout]
return h
end
- # uri:: uri to query (Uri object)
- # readtimeout:: timeout for reading the response
- # opentimeout:: timeout for opening the connection
- #
- # simple get request, returns (if possible) response body following redirs
- # and caching if requested
- # if a block is given, it yields the urls it gets redirected to
- # TODO we really need something to implement proper caching
- def get(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"], cache=false)
- if uri_or_str.kind_of?(URI)
- uri = uri_or_str
- else
- uri = URI.parse(uri_or_str.to_s)
- end
- debug "Getting #{uri}"
-
- proxy = get_proxy(uri)
- proxy.open_timeout = opentimeout
- proxy.read_timeout = readtimeout
-
- begin
- proxy.start() {|http|
- yield uri.request_uri() if block_given?
- req = Net::HTTP::Get.new(uri.request_uri(), @headers)
- if uri.user and uri.password
- req.basic_auth(uri.user, uri.password)
- end
- resp = http.request(req)
- case resp
- when Net::HTTPSuccess
- if cache
- debug "Caching #{uri.to_s}"
- cache_response(uri.to_s, resp)
- end
- return resp.body
- when Net::HTTPRedirection
- if resp.key?('location')
- new_loc = URI.join(uri, resp['location'])
- debug "Redirecting #{uri} to #{new_loc}"
- yield new_loc if block_given?
- if max_redir > 0
- # If cache is an Array, we assume get was called by get_cached
- # because of a cache miss and that the first value of the Array
- # was the noexpire value. Since the cache miss might have been
- # caused by a redirection, we want to try get_cached again
- # TODO FIXME look at Python's httplib2 for a most likely
- # better way to handle all this mess
- if cache.kind_of?(Array)
- return get_cached(new_loc, readtimeout, opentimeout, max_redir-1, cache[0])
- else
- return get(new_loc, readtimeout, opentimeout, max_redir-1, cache)
- end
- else
- warning "Max redirection reached, not going to #{new_loc}"
- end
- else
- warning "Unknown HTTP redirection #{resp.inspect}"
- end
- else
- debug "HttpUtil.get return code #{resp.code} #{resp.body}"
+ def handle_response(uri, resp, opts, &block)
+ if Net::HTTPRedirection === resp && opts[:max_redir] >= 0
+ if resp.key?('location')
+ raise 'Too many redirections' if opts[:max_redir] <= 0
+ yield resp if opts[:yield] == :all && block_given?
+ loc = resp['location']
+ new_loc = URI.join(uri.to_s, loc) rescue URI.parse(loc)
+ new_opts = opts.dup
+ new_opts[:max_redir] -= 1
+ case opts[:method].to_s.downcase.intern
+ when :post, :"net::http::post"
+ new_opts[:method] = :get
end
- @last_response = resp
- return nil
- }
- rescue StandardError, Timeout::Error => e
- error "HttpUtil.get exception: #{e.inspect}, while trying to get #{uri}"
- debug e.backtrace.join("\n")
+ debug "following the redirect to #{new_loc}"
+ return get_response(new_loc, new_opts, &block)
+ else
+ warning ":| redirect w/o location?"
+ end
end
- @last_response = nil
- return nil
- end
-
- # just like the above, but only gets the head
- def head(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"])
- if uri_or_str.kind_of?(URI)
- uri = uri_or_str
+ if block_given?
+ yield(resp)
else
- uri = URI.parse(uri_or_str.to_s)
+ resp.body
end
- proxy = get_proxy(uri)
- proxy.open_timeout = opentimeout
- proxy.read_timeout = readtimeout
-
- begin
- proxy.start() {|http|
- yield uri.request_uri() if block_given?
- req = Net::HTTP::Head.new(uri.request_uri(), @headers)
- if uri.user and uri.password
- req.basic_auth(uri.user, uri.password)
- end
- resp = http.request(req)
- case resp
- when Net::HTTPSuccess
- return resp
- when Net::HTTPRedirection
- debug "Redirecting #{uri} to #{resp['location']}"
- yield resp['location'] if block_given?
- if max_redir > 0
- return head( URI.parse(resp['location']), readtimeout, opentimeout, max_redir-1)
- else
- warning "Max redirection reached, not going to #{resp['location']}"
- end
- else
- debug "HttpUtil.head return code #{resp.code}"
- end
- @last_response = resp
- return nil
- }
- rescue StandardError, Timeout::Error => e
- error "HttpUtil.head exception: #{e.inspect}, while trying to get #{uri}"
- debug e.backtrace.join("\n")
- end
- @last_response = nil
- return nil
+ return resp
end
# uri:: uri to query (Uri object or String)
# opts:: options. Currently used:
+ # :method:: request method [:get (default), :post or :head]
# :open_timeout:: open timeout for the proxy
# :read_timeout:: read timeout for the proxy
# :cache:: should we cache results?
+ # :yield:: if :final [default], call &block for the response object
+ # if :all, call &block for all intermediate redirects, too
+ # :max_redir:: how many redirects to follow before raising the exception
+ # if -1, don't follow redirects, just return them
+ # :range:: make a ranged request (usually GET). accepts a string
+ # for HTTP/1.1 "Range:" header (i.e. "bytes=0-1000")
+ # :body:: request body (usually for POST requests)
#
- # This method is used to get responses following redirections.
+ # Generic http transaction method
#
- # It will return either a Net::HTTPResponse or an error.
+ # It will return a HTTP::Response object or raise an exception
#
- # If a block is given, it will yield the response or error instead of
- # returning it
- #
- def get_response(uri_or_str, opts={}, &block)
- if uri_or_str.kind_of?(URI)
- uri = uri_or_str
- else
- uri = URI.parse(uri_or_str.to_s)
+ # If a block is given, it will yield the response (see :yield option)
+
+ def get_response(uri_or_s, options = {}, &block)
+ uri = uri_or_s.kind_of?(URI) ? uri_or_s : URI.parse(uri_or_s.to_s)
+ opts = {
+ :max_redir => @bot.config['http.max_redir'],
+ :yield => :final,
+ :cache => true,
+ :method => :GET
+ }.merge(options)
+
+ resp = nil
+ cached = nil
+
+ req_class = case opts[:method].to_s.downcase.intern
+ when :head, :"net::http::head"
+ opts[:max_redir] = -1
+ Net::HTTP::Head
+ when :get, :"net::http::get"
+ Net::HTTP::Get
+ when :post, :"net::http::post"
+ opts[:cache] = false
+ opts[:body] or raise 'post request w/o a body?'
+ warning "refusing to cache POST request" if options[:cache]
+ Net::HTTP::Post
+ else
+ warning "unsupported method #{opts[:method]}, doing GET"
+ Net::HTTP::Get
+ end
+
+ if req_class != Net::HTTP::Get && opts[:range]
+ warning "can't request ranges for #{req_class}"
+ opts.delete(:range)
end
- debug "Getting #{uri}"
- options = {
- :read_timeout => 10,
- :open_timeout => 5,
- :max_redir => @bot.config["http.max_redir"],
- :cache => false,
- :yield => :none
- }.merge(opts)
-
- cache = options[:cache]
+ cache_key = "#{opts[:range]}|#{req_class}|#{uri.to_s}"
- proxy = get_proxy(uri)
- proxy.open_timeout = options[:open_timeout]
- proxy.read_timeout = options[:read_timeout]
-
- begin
- proxy.start() {|http|
- req = Net::HTTP::Get.new(uri.request_uri(), @headers)
- if uri.user and uri.password
- req.basic_auth(uri.user, uri.password)
- end
- http.request(req) { |resp|
- case resp
- when Net::HTTPSuccess
- if cache
- debug "Caching #{uri.to_s}"
- cache_response(uri.to_s, resp)
- end
- when Net::HTTPRedirection
- if resp.key?('location')
- new_loc = URI.join(uri, resp['location']) rescue URI.parse(resp['location'])
- debug "Redirecting #{uri} to #{new_loc}"
- if options[:max_redir] > 0
- new_opts = options.dup
- new_opts[:max_redir] -= 1
- return get_response(new_loc, new_opts, &block)
- else
- raise "Too many redirections"
- end
- end
- end
- if block_given?
- yield resp
- else
- return resp
- end
- }
- }
- rescue StandardError, Timeout::Error => e
- error "HttpUtil.get_response exception: #{e.inspect}, while trying to get #{uri}"
- debug e.backtrace.join("\n")
- def e.body
- nil
- end
- if block_given?
- yield e
- else
- return e
+ if req_class != Net::HTTP::Get && req_class != Net::HTTP::Head
+ if opts[:cache]
+ warning "can't cache #{req_class.inspect} requests, working w/o cache"
+ opts[:cache] = false
end
end
- raise "This shouldn't happen"
- end
+ debug "get_response(#{uri}, #{opts.inspect})"
- def cache_response(k, resp)
- begin
- if resp.key?('pragma') and resp['pragma'] == 'no-cache'
- debug "Not caching #{k}, it has Pragma: no-cache"
- return
+ if opts[:cache] && cached = @cache[cache_key]
+ debug "got cached"
+ if !cached.expired?
+ debug "using cached"
+ cached.use
+ return handle_response(uri, cached.response, opts, &block)
end
- # TODO should we skip caching if neither last-modified nor etag are present?
- now = Time.new
- u = Hash.new
- u = Hash.new
- u[:body] = resp.body
- u[:last_modified] = nil
- u[:last_modified] = Time.httpdate(resp['date']) if resp.key?('date')
- u[:last_modified] = Time.httpdate(resp['last-modified']) if resp.key?('last-modified')
- u[:expires] = now
- u[:expires] = Time.httpdate(resp['expires']) if resp.key?('expires')
- u[:revalidate] = false
- if resp.key?('cache-control')
- # TODO max-age
- case resp['cache-control']
- when /no-cache|must-revalidate/
- u[:revalidate] = true
- end
+ end
+
+ headers = @headers.dup.merge(opts[:headers] || {})
+ headers['Range'] = opts[:range] if opts[:range]
+
+ cached.setup_headers(headers) if cached && (req_class == Net::HTTP::Get)
+ req = req_class.new(uri.request_uri, headers)
+ req.basic_auth(uri.user, uri.password) if uri.user && uri.password
+ req.body = opts[:body] if req_class == Net::HTTP::Post
+ debug "prepared request: #{req.to_hash.inspect}"
+
+ get_proxy(uri, opts).start do |http|
+ http.request(req) do |resp|
+ if Net::HTTPNotModified === resp
+ debug "not modified"
+ begin
+ cached.revalidate(resp)
+ rescue Exception => e
+ error e.message
+ error e.backtrace.join("\n")
+ end
+ debug "reusing cached"
+ resp = cached.response
+ elsif Net::HTTPServerError === resp || Net::HTTPClientError === resp
+ debug "http error, deleting cached obj" if cached
+ @cache.delete(cache_key)
+ elsif opts[:cache] && cached = CachedObject.maybe_new(resp) rescue nil
+ debug "storing to cache"
+ @cache[cache_key] = cached
+ end
+ return handle_response(uri, resp, opts, &block)
end
- u[:etag] = ""
- u[:etag] = resp['etag'] if resp.key?('etag')
- u[:count] = 1
- u[:first_use] = now
- u[:last_use] = now
- rescue => e
- error "Failed to cache #{k}/#{resp.to_hash.inspect}: #{e.inspect}"
- return
end
- @cache[k] = u
- debug "Cached #{k}/#{resp.to_hash.inspect}: #{u.inspect_no_body}"
- debug "#{@cache.size} pages (#{@cache.keys.join(', ')}) cached up to now"
end
- # For debugging purposes
- class ::Hash
- def inspect_no_body
- temp = self.dup
- temp.delete(:body)
- temp.inspect
+ # uri:: uri to query (Uri object)
+ #
+ # simple get request, returns (if possible) response body following redirs
+ # and caching if requested
+ def get(uri, opts = {}, &block)
+ begin
+ resp = get_response(uri, opts, &block)
+ raise "http error: #{resp}" unless Net::HTTPOK === resp ||
+ Net::HTTPPartialContent === resp
+ return resp.body
+ rescue Exception => e
+ error e.message
+ error e.backtrace.join("\n")
end
+ return nil
end
- def expired?(uri, readtimeout, opentimeout)
- k = uri.to_s
- debug "Checking cache validity for #{k}"
+ def head(uri, options = {}, &block)
+ opts = {:method => :head}.merge(options)
begin
- return true unless @cache.key?(k)
- u = @cache[k]
-
- # TODO we always revalidate for the time being
-
- if u[:etag].empty? and u[:last_modified].nil?
- # TODO max-age
- return true
- end
-
- proxy = get_proxy(uri)
- proxy.open_timeout = opentimeout
- proxy.read_timeout = readtimeout
-
- proxy.start() {|http|
- yield uri.request_uri() if block_given?
- headers = @headers.dup
- headers['If-None-Match'] = u[:etag] unless u[:etag].empty?
- headers['If-Modified-Since'] = u[:last_modified].rfc2822 if u[:last_modified]
- debug "Cache HEAD request headers: #{headers.inspect}"
- # FIXME TODO We might want to use a Get here
- # because if a 200 OK is returned we would get the new body
- # with one connection less ...
- req = Net::HTTP::Head.new(uri.request_uri(), headers)
- if uri.user and uri.password
- req.basic_auth(uri.user, uri.password)
- end
- resp = http.request(req)
- debug "Checking cache validity of #{u.inspect_no_body} against #{resp.inspect}/#{resp.to_hash.inspect}"
- case resp
- when Net::HTTPNotModified
- return false
- else
- return true
- end
- }
- rescue => e
- error "Failed to check cache validity for #{uri}: #{e.inspect}"
- return true
+ resp = get_response(uri, opts, &block)
+ raise "http error #{resp}" if Net::HTTPClientError === resp ||
+ Net::HTTPServerError == resp
+ return resp
+ rescue Exception => e
+ error e.message
+ error e.backtrace.join("\n")
end
+ return nil
end
- # gets a page from the cache if it's still (assumed to be) valid
- # TODO remove stale cached pages, except when called with noexpire=true
- def get_cached(uri_or_str, readtimeout=10, opentimeout=5,
- max_redir=@bot.config['http.max_redir'],
- noexpire=@bot.config['http.no_expire_cache'])
- if uri_or_str.kind_of?(URI)
- uri = uri_or_str
- else
- uri = URI.parse(uri_or_str.to_s)
- end
- debug "Getting cached #{uri}"
-
- if expired?(uri, readtimeout, opentimeout)
- debug "Cache expired"
- bod = get(uri, readtimeout, opentimeout, max_redir, [noexpire])
- bod.instance_variable_set(:@cached,false)
- else
- k = uri.to_s
- debug "Using cache"
- @cache[k][:count] += 1
- @cache[k][:last_use] = Time.now
- bod = @cache[k][:body]
- bod.instance_variable_set(:@cached,true)
- end
- unless noexpire
- remove_stale_cache
- end
- unless bod.respond_to?(:cached?)
- def bod.cached?
- return @cached
- end
+ def post(uri, data, options = {}, &block)
+ opts = {:method => :post, :body => data, :cache => false}.merge(options)
+ begin
+ resp = get_response(uri, opts, &block)
+ raise 'http error' unless Net::HTTPOK === resp
+ return resp
+ rescue Exception => e
+ error e.message
+ error e.backtrace.join("\n")
end
- return bod
+ return nil
end
- # We consider a page to be manually expired if it has no
- # etag and no last-modified and if any of the expiration
- # conditions are met (expire_time, max_cache_time, Expires)
- def manually_expired?(hash, time)
- auto = hash[:etag].empty? and hash[:last_modified].nil?
- # TODO max-age
- manual = (time - hash[:last_use] > @bot.config['http.expire_time']*60) or
- (time - hash[:first_use] > @bot.config['http.max_cache_time']*60) or
- (hash[:expires] < time)
- return (auto and manual)
+ def get_partial(uri, nbytes = @bot.config['http.info_bytes'], options = {}, &block)
+ opts = {:range => "bytes=0-#{nbytes}"}.merge(options)
+ return get(uri, opts, &block)
end
def remove_stale_cache
debug "Removing stale cache"
+ now = Time.new
+ max_last = @bot.config['http.expire_time'] * 60
+ max_first = @bot.config['http.max_cache_time'] * 60
debug "#{@cache.size} pages before"
begin
- now = Time.new
- @cache.reject! { |k, val|
- manually_expired?(val, now)
- }
+ @cache.reject! { |k, val|
+ (now - val.last_used > max_last) || (now - val.first_used > max_first)
+ }
rescue => e
error "Failed to remove stale cache: #{e.inspect}"
end
debug "#{@cache.size} pages after"
end
+
end
end
end
+
+class HttpUtilPlugin < CoreBotModule
+ def initialize(*a)
+ super(*a)
+ debug 'initializing httputil'
+ @bot.httputil = Irc::Utils::HttpUtil.new(@bot)
+ end
+
+ def cleanup
+ debug 'shutting down httputil'
+ @bot.httputil.cleanup
+ @bot.httputil = nil
+ end
+end
+
+HttpUtilPlugin.new
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index 251e7986..717630e3 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -318,6 +318,7 @@ module ::Irc
end
def Utils.bot=(b)
+ debug "initializing utils"
@@bot = b
@@safe_save_dir = "#{@@bot.botclass}/safe_save"
end
@@ -523,7 +524,7 @@ module ::Irc
# FIXME what happens if some big file is returned? We should share
# code with the url plugin to only retrieve partial file content!
- xml = self.bot.httputil.get_cached(url)
+ xml = self.bot.httputil.get(url)
if xml.nil?
debug "Unable to retrieve #{url}"
next
@@ -549,3 +550,5 @@ module ::Irc
end
end
+
+Irc::Utils.bot = Irc::Plugins.manager.bot
diff --git a/lib/rbot/ircbot.rb b/lib/rbot/ircbot.rb
index 93d65200..54782f70 100644
--- a/lib/rbot/ircbot.rb
+++ b/lib/rbot/ircbot.rb
@@ -124,7 +124,7 @@ class Bot
# bot's httputil help object, for fetching resources via http. Sets up
# proxies etc as defined by the bot configuration/environment
- attr_reader :httputil
+ attr_accessor :httputil
# server we are connected to
# TODO multiserver
@@ -452,9 +452,6 @@ class Bot
@plugins.scan
- Utils.bot = self
- @httputil = Utils::HttpUtil.new(self)
-
# Channels where we are quiet
# Array of channels names where the bot should be quiet
# '*' means all channels