diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-07-07 23:14:24 +0200 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-07-07 23:17:40 +0200 |
commit | 61a22b729532316193612eac4f948ff11aa57f50 (patch) | |
tree | 440fac36aa27f62c70b28e354eff9f4b97a75d3b /lib | |
parent | be2e4072dcc4a2324067a571096592bd075c63e4 (diff) |
httputil: don't fail when b0rked servers put the charset in the content-encoding
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rbot/core/utils/httputil.rb | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb index 9df8fc2c..ae16416d 100644 --- a/lib/rbot/core/utils/httputil.rb +++ b/lib/rbot/core/utils/httputil.rb @@ -109,7 +109,16 @@ module ::Net # TODO # debug "full inflation failed (#{e}), trying to recover as much as possible" end + when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i + # B0rked servers (Freshmeat being one of them) sometimes return the charset + # in the content-encoding; in this case we assume that the document has + # a standarc content-encoding + old_hsh = self.to_hash + self['content-type']= self['content-type']+"; charset="+method.downcase + warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}" + return str else + debug self.to_hash raise "Unhandled content encoding #{method}" end end |