summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Hecker <apoc@sixserv.org>2013-12-06 04:28:25 +0000
committerMatthias Hecker <apoc@sixserv.org>2013-12-06 04:28:25 +0000
commitbe96e7c59beff09af772e6a82431bb31dfb39b02 (patch)
treefb97c2b7538e1e86f40218aad087d56674d2f348
parent91d35a361431873f406bc13e39018894347ffa28 (diff)
httputil: fixes encoding issues with 1.9+
(incompatible with 1.8)
-rw-r--r--lib/rbot/core/utils/httputil.rb33
1 files changed, 14 insertions, 19 deletions
diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb
index be5c085a..1b4c3526 100644
--- a/lib/rbot/core/utils/httputil.rb
+++ b/lib/rbot/core/utils/httputil.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
#-- vim:sw=2:et
#++
#
@@ -10,12 +11,6 @@
require 'resolv'
require 'net/http'
require 'cgi'
-begin
- require 'iconv'
-rescue LoadError => e
- error "Couldn't load 'iconv': #{e}"
- error "Non-UTF-8 webpages will not be properly supported"
-end
begin
require 'net/https'
@@ -72,23 +67,22 @@ module ::Net
def body_to_utf(str)
charsets = self.body_charset(str) or return str
- return str unless defined? Iconv
charsets.reverse_each do |charset|
- # XXX: this one is really ugly, but i don't know how to make it better
- # -jsn
-
- 0.upto(5) do |off|
- begin
- debug "trying #{charset} / offset #{off}"
- return Iconv.iconv('utf-8//ignore',
- charset,
- str.slice(0 .. (-1 - off))).first
- rescue
- debug "conversion failed for #{charset} / offset #{off}"
+ begin
+ debug "try decoding using #{charset}"
+ str.force_encoding(charset)
+ tmp = str.encode(Encoding::UTF_8)
+ if tmp
+ str = tmp
+ break
end
+ rescue
+ error 'failed to use encoding'
+ error $!
end
end
+
return str
end
@@ -105,7 +99,8 @@ module ::Net
# If we can't unpack the whole stream (e.g. because we're doing a
# partial read
debug "full gunzipping failed (#{e}), trying to recover as much as possible"
- ret = ""
+ ret = ''
+ ret.force_encoding(Encoding::ASCII_8BIT)
begin
Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
ret << byte