From 48fc730b582aebc7f2a8a57e07e1d27914de1e55 Mon Sep 17 00:00:00 2001
From: Chris Gahan <chris@ill-logic.com>
Date: Mon, 30 Jan 2006 21:45:54 +0000
Subject: A fix for the case where a crappy webserver labels a giant file (like
 a .rar) as text/plain and the bot tries to download it.

---
 data/rbot/plugins/url.rb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb
index f46cb205..2b5b468e 100644
--- a/data/rbot/plugins/url.rb
+++ b/data/rbot/plugins/url.rb
@@ -52,14 +52,14 @@ class UrlPlugin < Plugin
           puts "+ whee, redirecting to #{url.to_s}!"
           title = get_title_for_url(url.to_s)
         when Net::HTTPSuccess then
-          if head['content-type'] =~ /^text\//
-            # content is 'text/*'
-            # retrieve the title from the page
+          if head['content-type'] =~ /^text\// and (not head['content-length'] or head['content-length'].to_i < 400000)
+            # since the content is 'text/*' and is small enough to
+            # be a webpage, retrieve the title from the page
             puts "+ getting #{url.request_uri}"
             response = http.request_get(url.request_uri)
             return get_title_from_html(response.body)
           else
-            # content isn't 'text/*'... display info about the file.
+            # content doesn't have title, just display info.
             size = head['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
             #lastmod = head['last-modified']
             return "[Link Info] type: #{head['content-type']}#{size ? ", size: #{size} bytes" : ""}"
-- 
cgit v1.2.3