summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Kim <dmitry point kim at gmail point com>2007-04-26 22:56:14 +0000
committerDmitry Kim <dmitry point kim at gmail point com>2007-04-26 22:56:14 +0000
commitc9298e20540b63bed9bf994a523750fa8d1d7c42 (patch)
tree8aa331c6c7ea74a9fdeef259da17f41eab157c8c
parenta2e8fbf19bf49b75d1f1edee82880e0f2643aa5d (diff)
+ (httputil) bruteforce truncated chars when guessing the encoding
-rw-r--r--lib/rbot/core/utils/httputil.rb20
1 files changed, 14 insertions, 6 deletions
diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb
index 3c949686..448c8da1 100644
--- a/lib/rbot/core/utils/httputil.rb
+++ b/lib/rbot/core/utils/httputil.rb
@@ -61,13 +61,21 @@ module ::Net
def body_to_utf(str)
charsets = self.body_charset(str) or return str
- charsets.reverse_each { |charset|
- begin
- return Iconv.iconv('utf-8//ignore', charset, str).first
- rescue
- debug "conversion failed for #{charset}"
+ charsets.reverse_each do |charset|
+ # XXX: this one is really ugly, but i don't know how to make it better
+ # -jsn
+
+ 0.upto(5) do |off|
+ begin
+ debug "trying #{charset} / offset #{off}"
+ return Iconv.iconv('utf-8//ignore',
+ charset,
+ str.slice(0 .. (-1 - off))).first
+ rescue
+ debug "conversion failed for #{charset} / offset #{off}"
+ end
end
- }
+ end
return str
end