summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-25 00:22:00 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-25 00:22:00 +0000
commitb4d55669782d34c59688e7413402ab489bb0791e (patch)
tree47217c55a5778a088212a265b785220cb157ad94
parent0423812d31e5c533468a7d4c284932bfec6fcceb (diff)
url plugin: customizable max amount of data to retrieve to look for a title. return standard header info if no title found
-rw-r--r--data/rbot/plugins/url.rb20
-rw-r--r--lib/rbot/core/utils/httputil.rb7
2 files changed, 14 insertions, 13 deletions
diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb
index 0d85d473..f9e64efb 100644
--- a/data/rbot/plugins/url.rb
+++ b/data/rbot/plugins/url.rb
@@ -47,20 +47,18 @@ class UrlPlugin < Plugin
debug "+ getting #{url.request_uri}"
# we look for the title in the first 4k bytes
- # TODO make the amount of data configurable
- response.partial_body(4096) { |part|
+ response.partial_body(@bot.config['http.info_bytes']) { |part|
title = get_title_from_html(part)
return title if title
}
- # if nothing was found, return nothing
- return
- else
- unless @bot.config['url.titles_only']
- # content doesn't have title, just display info.
- size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
- size = size ? ", size: #{size} bytes" : ""
- return "type: #{response['content-type']}#{size}"
- end
+ # if nothing was found, provide more basic info
+ end
+ debug response.to_hash.inspect
+ unless @bot.config['url.titles_only']
+ # content doesn't have title, just display info.
+ size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
+ size = size ? ", size: #{size} bytes" : ""
+ return "type: #{response['content-type']}#{size}"
end
when Net::HTTPResponse
return "Error getting link (#{response.code} - #{response.message})"
diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb
index 6ca12d5b..904e1941 100644
--- a/lib/rbot/core/utils/httputil.rb
+++ b/lib/rbot/core/utils/httputil.rb
@@ -23,14 +23,14 @@ module ::Net
class HTTPResponse
# Read chunks from the body until we have at least _size_ bytes, yielding
# the partial text at each chunk. Return the partial body.
- def partial_body(size, &block)
+ def partial_body(size=0, &block)
partial = String.new
self.read_body { |chunk|
partial << chunk
yield partial
- break if size and partial.length >= size
+ break if size and size > 0 and partial.length >= size
}
return partial
@@ -75,6 +75,9 @@ class HttpUtil
BotConfig.register BotConfigIntegerValue.new('http.no_expire_cache',
:default => false,
:desc => "Set this to true if you want the bot to never expire the cached pages")
+ BotConfig.register BotConfigIntegerValue.new('http.info_bytes',
+ :default => 4096,
+ :desc => "How many bytes to download from a web page to find some information. Set to 0 to let the bot download the whole page.")
def initialize(bot)
@bot = bot