diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-03-25 00:22:00 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-03-25 00:22:00 +0000 |
commit | b4d55669782d34c59688e7413402ab489bb0791e (patch) | |
tree | 47217c55a5778a088212a265b785220cb157ad94 | |
parent | 0423812d31e5c533468a7d4c284932bfec6fcceb (diff) |
url plugin: customizable max amount of data to retrieve to look for a title. return standard header info if no title found
-rw-r--r-- | data/rbot/plugins/url.rb | 20 | ||||
-rw-r--r-- | lib/rbot/core/utils/httputil.rb | 7 |
2 files changed, 14 insertions, 13 deletions
diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb index 0d85d473..f9e64efb 100644 --- a/data/rbot/plugins/url.rb +++ b/data/rbot/plugins/url.rb @@ -47,20 +47,18 @@ class UrlPlugin < Plugin debug "+ getting #{url.request_uri}" # we look for the title in the first 4k bytes - # TODO make the amount of data configurable - response.partial_body(4096) { |part| + response.partial_body(@bot.config['http.info_bytes']) { |part| title = get_title_from_html(part) return title if title } - # if nothing was found, return nothing - return - else - unless @bot.config['url.titles_only'] - # content doesn't have title, just display info. - size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') - size = size ? ", size: #{size} bytes" : "" - return "type: #{response['content-type']}#{size}" - end + # if nothing was found, provide more basic info + end + debug response.to_hash.inspect + unless @bot.config['url.titles_only'] + # content doesn't have title, just display info. + size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil + size = size ? ", size: #{size} bytes" : "" + return "type: #{response['content-type']}#{size}" end when Net::HTTPResponse return "Error getting link (#{response.code} - #{response.message})" diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb index 6ca12d5b..904e1941 100644 --- a/lib/rbot/core/utils/httputil.rb +++ b/lib/rbot/core/utils/httputil.rb @@ -23,14 +23,14 @@ module ::Net class HTTPResponse # Read chunks from the body until we have at least _size_ bytes, yielding # the partial text at each chunk. Return the partial body. - def partial_body(size, &block) + def partial_body(size=0, &block) partial = String.new self.read_body { |chunk| partial << chunk yield partial - break if size and partial.length >= size + break if size and size > 0 and partial.length >= size } return partial @@ -75,6 +75,9 @@ class HttpUtil BotConfig.register BotConfigIntegerValue.new('http.no_expire_cache', :default => false, :desc => "Set this to true if you want the bot to never expire the cached pages") + BotConfig.register BotConfigIntegerValue.new('http.info_bytes', + :default => 4096, + :desc => "How many bytes to download from a web page to find some information. Set to 0 to let the bot download the whole page.") def initialize(bot) @bot = bot |