From 616ce7081f6edef31c60e3aed465de3a3a3d8fd2 Mon Sep 17 00:00:00 2001 From: Chris Gahan Date: Wed, 22 Feb 2006 16:17:05 +0000 Subject: Fixed some bugs (should stop showing # objects, handles MovedPermanently which is a different errorcode from a Redirect...) --- data/rbot/plugins/url.rb | 78 ++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 36 deletions(-) (limited to 'data') diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb index 1e72a3a1..821b0cc7 100644 --- a/data/rbot/plugins/url.rb +++ b/data/rbot/plugins/url.rb @@ -312,31 +312,31 @@ class UrlPlugin < Plugin title = title[0..255] if title.length > 255 "[Link Info] title: #{title}" end - - def read_data_from_response(response, amount) - - amount_read = 0 - chunks = [] - - response.read_body do |chunk| # read body now - - amount_read += chunk.length - - if amount_read > amount - amount_of_overflow = amount_read - amount - chunk = chunk[0...-amount_of_overflow] - end - - chunks << chunk - - break if amount_read >= amount - - end - - chunks.join('') - - end - + + def read_data_from_response(response, amount) + + amount_read = 0 + chunks = [] + + response.read_body do |chunk| # read body now + + amount_read += chunk.length + + if amount_read > amount + amount_of_overflow = amount_read - amount + chunk = chunk[0...-amount_of_overflow] + end + + chunks << chunk + + break if amount_read >= amount + + end + + chunks.join('') + + end + def get_title_for_url(uri_str, depth=10) # This god-awful mess is what the ruby http library has reduced me to. @@ -349,28 +349,30 @@ class UrlPlugin < Plugin puts "+ Getting #{uri_str}" url = URI.parse(uri_str) return if url.scheme !~ /https?/ + + title = nil puts "+ connecting to #{url.host}:#{url.port}" http = @bot.httputil.get_proxy(url) - title = http.start { |http| - url.path = '/' if url.path == '' - - http.request_get(url.path, "User-Agent" => "rbot-url_plugin/666.666") { |response| + http.start { |http| + url.path = '/' if url.path == '' + + http.request_get(url.path, "User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)") { |response| case response - when Net::HTTPRedirection then + when Net::HTTPRedirection, Net::HTTPMovedPermanently then # call self recursively if this is a redirect redirect_to = response['location'] || './' puts "+ redirect location: #{redirect_to.inspect}" url = URI.join url.to_s, redirect_to puts "+ whee, redirecting to #{url.to_s}!" - title = get_title_for_url(url.to_s, depth-1) + return get_title_for_url(url.to_s, depth-1) when Net::HTTPSuccess then if response['content-type'] =~ /^text\// # since the content is 'text/*' and is small enough to # be a webpage, retrieve the title from the page - puts "+ getting #{url.request_uri}" - data = read_data_from_response(response, 50000) + puts "+ getting #{url.request_uri}" + data = read_data_from_response(response, 50000) return get_title_from_html(data) else # content doesn't have title, just display info. @@ -381,10 +383,14 @@ class UrlPlugin < Plugin return "[Link Info] Error getting link (#{response.code} - #{response.message})" when Net::HTTPServerError then return "[Link Info] Error getting link (#{response.code} - #{response.message})" - end # end of "case response" - + else + return nil + end # end of "case response" + } # end of request block - } # end of http start block + } # end of http start block + + return title rescue SocketError => e return "[Link Info] Error connecting to site (#{e.message})" -- cgit v1.2.3