diff options
author | Dmitry Kim <dmitry point kim at gmail point com> | 2007-04-26 22:56:14 +0000 |
---|---|---|
committer | Dmitry Kim <dmitry point kim at gmail point com> | 2007-04-26 22:56:14 +0000 |
commit | c9298e20540b63bed9bf994a523750fa8d1d7c42 (patch) | |
tree | 8aa331c6c7ea74a9fdeef259da17f41eab157c8c | |
parent | a2e8fbf19bf49b75d1f1edee82880e0f2643aa5d (diff) |
+ (httputil) bruteforce truncated chars when guessing the encoding
-rw-r--r-- | lib/rbot/core/utils/httputil.rb | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb index 3c949686..448c8da1 100644 --- a/lib/rbot/core/utils/httputil.rb +++ b/lib/rbot/core/utils/httputil.rb @@ -61,13 +61,21 @@ module ::Net def body_to_utf(str) charsets = self.body_charset(str) or return str - charsets.reverse_each { |charset| - begin - return Iconv.iconv('utf-8//ignore', charset, str).first - rescue - debug "conversion failed for #{charset}" + charsets.reverse_each do |charset| + # XXX: this one is really ugly, but i don't know how to make it better + # -jsn + + 0.upto(5) do |off| + begin + debug "trying #{charset} / offset #{off}" + return Iconv.iconv('utf-8//ignore', + charset, + str.slice(0 .. (-1 - off))).first + rescue + debug "conversion failed for #{charset} / offset #{off}" + end end - } + end return str end |