diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-04-11 18:15:15 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-04-11 18:15:15 +0000 |
commit | 053eb36e834429af37555e7506ef3c6162ea4648 (patch) | |
tree | fc5a0d8cd9fb9dc1741527acccc367839556c881 | |
parent | a9b32eae21a10254f67653c8ce92076300ba670b (diff) |
url plugin: revert to block get_response and partial_body to work around sites which return 400 on partial content
-rw-r--r-- | data/rbot/plugins/url.rb | 82 |
1 files changed, 41 insertions, 41 deletions
diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb index a1a325eb..0a5ef74e 100644 --- a/data/rbot/plugins/url.rb +++ b/data/rbot/plugins/url.rb @@ -36,56 +36,56 @@ class UrlPlugin < Plugin return if url.scheme !~ /https?/ title = nil + extra = String.new begin - range = @bot.config['http.info_bytes'] - response = @bot.httputil.get_response(url, :range => "bytes=0-#{range}") - if response.code != "206" && response.code != "200" - return "Error getting link (#{response.code} - #{response.message})" - end - extra = String.new - - if response['content-type'] =~ /^text\// - - body = response.body.slice(0, range) - title = String.new - - # since the content is 'text/*' and is small enough to - # be a webpage, retrieve the title from the page - debug "+ getting #{url.request_uri}" - - title = get_title_from_html(body) - if @bot.config['url.first_par'] - first_par = Utils.ircify_first_html_par(body, :strip => title) - extra << ", #{Bold}text#{Bold}: #{first_par}" unless first_par.empty? - return "#{Bold}title#{Bold}: #{title}#{extra}" if title - else - return "#{Bold}title#{Bold}: #{title}" if title - end - - # if nothing was found, provide more basic info - end + debug "+ getting #{url.request_uri}" + @bot.httputil.get_response(url) { |resp| + case resp + when Net::HTTPSuccess + + if resp['content-type'] =~ /^text\/|(?:x|ht)ml/ + # The page is text or HTML, so we can try finding a title and, if + # requested, the first par. + # + # We act differently depending on whether we want the first par or + # not: in the first case we download the initial part and the parse + # it; in the second case we only download as much as we need to find + # the title + # + if @bot.config['url.first_par'] + partial = resp.partial_body(@bot.config['http.info_bytes']) + title = get_title_from_html(partial) + first_par = Utils.ircify_first_html_par(partial, :strip => title) + extra << ", #{Bold}text#{Bold}: #{first_par}" unless first_par.empty? + return "#{Bold}title#{Bold}: #{title}#{extra}" if title + else + resp.partial_body(@bot.config['http.info_bytes']) { |part| + title = get_title_from_html(part) + return "#{Bold}title#{Bold}: #{title}" if title + } + end + # if nothing was found, provide more basic info, as for non-html pages + end - debug response.to_hash.inspect + debug resp.to_hash.inspect - enc = response['content-encoding'] + enc = resp['content-encoding'] - extra << ", #{Bold}encoding#{Bold}: #{enc}" if enc + extra << ", #{Bold}encoding#{Bold}: #{enc}" if enc - unless @bot.config['url.titles_only'] - # content doesn't have title, just display info. - size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil - if response.code == '206' - if response['content-range'] =~ /bytes\s*[^\/]+\/(\d+)/ - size = $1.to_s.reverse.scan(/\d{1,3}/).join(',').reverse + unless @bot.config['url.titles_only'] + # content doesn't have title, just display info. + size = resp['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil + size = size ? ", #{Bold}size#{Bold}: #{size} bytes" : "" + return "#{Bold}type#{Bold}: #{resp['content-type']}#{size}#{extra}" end + else + return "Error getting link (#{resp.code} - #{resp.message})" end - size = size ? ", #{Bold}size#{Bold}: #{size} bytes" : "" - return "#{Bold}type#{Bold}: #{response['content-type']}#{size}#{extra}" - end + } rescue Exception => e - error e.inspect - debug e.backtrace.join("\n") + error e return "Error connecting to site (#{e.message})" end end |