From e935773b3e115d2d33e6d32f488578c650428ed2 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Tue, 6 Feb 2007 11:27:38 +0000 Subject: Create an utils subdir in core, which will store all utility files that can be reloaded without quitting the bot, and move httputil and utils to it --- lib/rbot/core/utils/httputil.rb | 404 ++++++++++++++++++++++++++++++++++++++ lib/rbot/core/utils/utils.rb | 419 ++++++++++++++++++++++++++++++++++++++++ lib/rbot/httputil.rb | 404 -------------------------------------- lib/rbot/ircbot.rb | 12 +- lib/rbot/utils.rb | 419 ---------------------------------------- 5 files changed, 830 insertions(+), 828 deletions(-) create mode 100644 lib/rbot/core/utils/httputil.rb create mode 100644 lib/rbot/core/utils/utils.rb delete mode 100644 lib/rbot/httputil.rb delete mode 100644 lib/rbot/utils.rb (limited to 'lib/rbot') diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb new file mode 100644 index 00000000..42b7fdb8 --- /dev/null +++ b/lib/rbot/core/utils/httputil.rb @@ -0,0 +1,404 @@ +module ::Irc +module Utils + +require 'resolv' +require 'net/http' +require 'net/https' +Net::HTTP.version_1_2 + +# class for making http requests easier (mainly for plugins to use) +# this class can check the bot proxy configuration to determine if a proxy +# needs to be used, which includes support for per-url proxy configuration. +class HttpUtil + BotConfig.register BotConfigBooleanValue.new('http.use_proxy', + :default => false, :desc => "should a proxy be used for HTTP requests?") + BotConfig.register BotConfigStringValue.new('http.proxy_uri', :default => false, + :desc => "Proxy server to use for HTTP requests (URI, e.g http://proxy.host:port)") + BotConfig.register BotConfigStringValue.new('http.proxy_user', + :default => nil, + :desc => "User for authenticating with the http proxy (if required)") + BotConfig.register BotConfigStringValue.new('http.proxy_pass', + :default => nil, + :desc => "Password for authenticating with the http proxy (if required)") + BotConfig.register BotConfigArrayValue.new('http.proxy_include', + :default => [], + :desc => "List of regexps to check against a URI's hostname/ip to see if we should use the proxy to access this URI. All URIs are proxied by default if the proxy is set, so this is only required to re-include URIs that might have been excluded by the exclude list. e.g. exclude /.*\.foo\.com/, include bar\.foo\.com") + BotConfig.register BotConfigArrayValue.new('http.proxy_exclude', + :default => [], + :desc => "List of regexps to check against a URI's hostname/ip to see if we should use avoid the proxy to access this URI and access it directly") + BotConfig.register BotConfigIntegerValue.new('http.max_redir', + :default => 5, + :desc => "Maximum number of redirections to be used when getting a document") + BotConfig.register BotConfigIntegerValue.new('http.expire_time', + :default => 60, + :desc => "After how many minutes since last use a cached document is considered to be expired") + BotConfig.register BotConfigIntegerValue.new('http.max_cache_time', + :default => 60*24, + :desc => "After how many minutes since first use a cached document is considered to be expired") + BotConfig.register BotConfigIntegerValue.new('http.no_expire_cache', + :default => false, + :desc => "Set this to true if you want the bot to never expire the cached pages") + + def initialize(bot) + @bot = bot + @cache = Hash.new + @headers = { + 'User-Agent' => "rbot http util #{$version} (http://linuxbrit.co.uk/rbot/)", + } + @last_response = nil + end + attr_reader :last_response + attr_reader :headers + + # if http_proxy_include or http_proxy_exclude are set, then examine the + # uri to see if this is a proxied uri + # the in/excludes are a list of regexps, and each regexp is checked against + # the server name, and its IP addresses + def proxy_required(uri) + use_proxy = true + if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty? + return use_proxy + end + + list = [uri.host] + begin + list.concat Resolv.getaddresses(uri.host) + rescue StandardError => err + warning "couldn't resolve host uri.host" + end + + unless @bot.config["http.proxy_exclude"].empty? + re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)} + re.each do |r| + list.each do |item| + if r.match(item) + use_proxy = false + break + end + end + end + end + unless @bot.config["http.proxy_include"].empty? + re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)} + re.each do |r| + list.each do |item| + if r.match(item) + use_proxy = true + break + end + end + end + end + debug "using proxy for uri #{uri}?: #{use_proxy}" + return use_proxy + end + + # uri:: Uri to create a proxy for + # + # return a net/http Proxy object, which is configured correctly for + # proxying based on the bot's proxy configuration. + # This will include per-url proxy configuration based on the bot config + # +http_proxy_include/exclude+ options. + def get_proxy(uri) + proxy = nil + proxy_host = nil + proxy_port = nil + proxy_user = nil + proxy_pass = nil + + if @bot.config["http.use_proxy"] + if (ENV['http_proxy']) + proxy = URI.parse ENV['http_proxy'] rescue nil + end + if (@bot.config["http.proxy_uri"]) + proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil + end + if proxy + debug "proxy is set to #{proxy.host} port #{proxy.port}" + if proxy_required(uri) + proxy_host = proxy.host + proxy_port = proxy.port + proxy_user = @bot.config["http.proxy_user"] + proxy_pass = @bot.config["http.proxy_pass"] + end + end + end + + h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_port) + h.use_ssl = true if uri.scheme == "https" + return h + end + + # uri:: uri to query (Uri object) + # readtimeout:: timeout for reading the response + # opentimeout:: timeout for opening the connection + # + # simple get request, returns (if possible) response body following redirs + # and caching if requested + # if a block is given, it yields the urls it gets redirected to + # TODO we really need something to implement proper caching + def get(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"], cache=false) + if uri_or_str.kind_of?(URI) + uri = uri_or_str + else + uri = URI.parse(uri_or_str.to_s) + end + debug "Getting #{uri}" + + proxy = get_proxy(uri) + proxy.open_timeout = opentimeout + proxy.read_timeout = readtimeout + + begin + proxy.start() {|http| + yield uri.request_uri() if block_given? + req = Net::HTTP::Get.new(uri.request_uri(), @headers) + if uri.user and uri.password + req.basic_auth(uri.user, uri.password) + end + resp = http.request(req) + case resp + when Net::HTTPSuccess + if cache + debug "Caching #{uri.to_s}" + cache_response(uri.to_s, resp) + end + return resp.body + when Net::HTTPRedirection + if resp.key?('location') + new_loc = URI.join(uri, resp['location']) + debug "Redirecting #{uri} to #{new_loc}" + yield new_loc if block_given? + if max_redir > 0 + # If cache is an Array, we assume get was called by get_cached + # because of a cache miss and that the first value of the Array + # was the noexpire value. Since the cache miss might have been + # caused by a redirection, we want to try get_cached again + # TODO FIXME look at Python's httplib2 for a most likely + # better way to handle all this mess + if cache.kind_of?(Array) + return get_cached(new_loc, readtimeout, opentimeout, max_redir-1, cache[0]) + else + return get(new_loc, readtimeout, opentimeout, max_redir-1, cache) + end + else + warning "Max redirection reached, not going to #{new_loc}" + end + else + warning "Unknown HTTP redirection #{resp.inspect}" + end + else + debug "HttpUtil.get return code #{resp.code} #{resp.body}" + end + @last_response = resp + return nil + } + rescue StandardError, Timeout::Error => e + error "HttpUtil.get exception: #{e.inspect}, while trying to get #{uri}" + debug e.backtrace.join("\n") + end + @last_response = nil + return nil + end + + # just like the above, but only gets the head + def head(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"]) + if uri_or_str.kind_of?(URI) + uri = uri_or_str + else + uri = URI.parse(uri_or_str.to_s) + end + + proxy = get_proxy(uri) + proxy.open_timeout = opentimeout + proxy.read_timeout = readtimeout + + begin + proxy.start() {|http| + yield uri.request_uri() if block_given? + req = Net::HTTP::Head.new(uri.request_uri(), @headers) + if uri.user and uri.password + req.basic_auth(uri.user, uri.password) + end + resp = http.request(req) + case resp + when Net::HTTPSuccess + return resp + when Net::HTTPRedirection + debug "Redirecting #{uri} to #{resp['location']}" + yield resp['location'] if block_given? + if max_redir > 0 + return head( URI.parse(resp['location']), readtimeout, opentimeout, max_redir-1) + else + warning "Max redirection reached, not going to #{resp['location']}" + end + else + debug "HttpUtil.head return code #{resp.code}" + end + @last_response = resp + return nil + } + rescue StandardError, Timeout::Error => e + error "HttpUtil.head exception: #{e.inspect}, while trying to get #{uri}" + debug e.backtrace.join("\n") + end + @last_response = nil + return nil + end + + def cache_response(k, resp) + begin + if resp.key?('pragma') and resp['pragma'] == 'no-cache' + debug "Not caching #{k}, it has Pragma: no-cache" + return + end + # TODO should we skip caching if neither last-modified nor etag are present? + now = Time.new + u = Hash.new + u = Hash.new + u[:body] = resp.body + u[:last_modified] = nil + u[:last_modified] = Time.httpdate(resp['date']) if resp.key?('date') + u[:last_modified] = Time.httpdate(resp['last-modified']) if resp.key?('last-modified') + u[:expires] = now + u[:expires] = Time.httpdate(resp['expires']) if resp.key?('expires') + u[:revalidate] = false + if resp.key?('cache-control') + # TODO max-age + case resp['cache-control'] + when /no-cache|must-revalidate/ + u[:revalidate] = true + end + end + u[:etag] = "" + u[:etag] = resp['etag'] if resp.key?('etag') + u[:count] = 1 + u[:first_use] = now + u[:last_use] = now + rescue => e + error "Failed to cache #{k}/#{resp.to_hash.inspect}: #{e.inspect}" + return + end + @cache[k] = u + debug "Cached #{k}/#{resp.to_hash.inspect}: #{u.inspect_no_body}" + debug "#{@cache.size} pages (#{@cache.keys.join(', ')}) cached up to now" + end + + # For debugging purposes + class ::Hash + def inspect_no_body + temp = self.dup + temp.delete(:body) + temp.inspect + end + end + + def expired?(uri, readtimeout, opentimeout) + k = uri.to_s + debug "Checking cache validity for #{k}" + begin + return true unless @cache.key?(k) + u = @cache[k] + + # TODO we always revalidate for the time being + + if u[:etag].empty? and u[:last_modified].nil? + # TODO max-age + return true + end + + proxy = get_proxy(uri) + proxy.open_timeout = opentimeout + proxy.read_timeout = readtimeout + + proxy.start() {|http| + yield uri.request_uri() if block_given? + headers = @headers.dup + headers['If-None-Match'] = u[:etag] unless u[:etag].empty? + headers['If-Modified-Since'] = u[:last_modified].rfc2822 if u[:last_modified] + debug "Cache HEAD request headers: #{headers.inspect}" + # FIXME TODO We might want to use a Get here + # because if a 200 OK is returned we would get the new body + # with one connection less ... + req = Net::HTTP::Head.new(uri.request_uri(), headers) + if uri.user and uri.password + req.basic_auth(uri.user, uri.password) + end + resp = http.request(req) + debug "Checking cache validity of #{u.inspect_no_body} against #{resp.inspect}/#{resp.to_hash.inspect}" + case resp + when Net::HTTPNotModified + return false + else + return true + end + } + rescue => e + error "Failed to check cache validity for #{uri}: #{e.inspect}" + return true + end + end + + # gets a page from the cache if it's still (assumed to be) valid + # TODO remove stale cached pages, except when called with noexpire=true + def get_cached(uri_or_str, readtimeout=10, opentimeout=5, + max_redir=@bot.config['http.max_redir'], + noexpire=@bot.config['http.no_expire_cache']) + if uri_or_str.kind_of?(URI) + uri = uri_or_str + else + uri = URI.parse(uri_or_str.to_s) + end + debug "Getting cached #{uri}" + + if expired?(uri, readtimeout, opentimeout) + debug "Cache expired" + bod = get(uri, readtimeout, opentimeout, max_redir, [noexpire]) + bod.instance_variable_set(:@cached,false) + else + k = uri.to_s + debug "Using cache" + @cache[k][:count] += 1 + @cache[k][:last_use] = Time.now + bod = @cache[k][:body] + bod.instance_variable_set(:@cached,true) + end + unless noexpire + remove_stale_cache + end + unless bod.respond_to?(:cached?) + def bod.cached? + return @cached + end + end + return bod + end + + # We consider a page to be manually expired if it has no + # etag and no last-modified and if any of the expiration + # conditions are met (expire_time, max_cache_time, Expires) + def manually_expired?(hash, time) + auto = hash[:etag].empty? and hash[:last_modified].nil? + # TODO max-age + manual = (time - hash[:last_use] > @bot.config['http.expire_time']*60) or + (time - hash[:first_use] > @bot.config['http.max_cache_time']*60) or + (hash[:expires] < time) + return (auto and manual) + end + + def remove_stale_cache + debug "Removing stale cache" + debug "#{@cache.size} pages before" + begin + now = Time.new + @cache.reject! { |k, val| + manually_expired?(val, now) + } + rescue => e + error "Failed to remove stale cache: #{e.inspect}" + end + debug "#{@cache.size} pages after" + end +end +end +end diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb new file mode 100644 index 00000000..fc89e1c3 --- /dev/null +++ b/lib/rbot/core/utils/utils.rb @@ -0,0 +1,419 @@ +require 'net/http' +require 'uri' +require 'tempfile' + +begin + $we_have_html_entities_decoder = require 'htmlentities' +rescue LoadError + $we_have_html_entities_decoder = false + module ::Irc + module Utils + UNESCAPE_TABLE = { + 'laquo' => '<<', + 'raquo' => '>>', + 'quot' => '"', + 'apos' => '\'', + 'micro' => 'u', + 'copy' => '(c)', + 'trade' => '(tm)', + 'reg' => '(R)', + '#174' => '(R)', + '#8220' => '"', + '#8221' => '"', + '#8212' => '--', + '#39' => '\'', + 'amp' => '&', + 'lt' => '<', + 'gt' => '>', + 'hellip' => '...', + 'nbsp' => ' ', +=begin + # extras codes, for future use... + 'zwnj' => '‌', + 'aring' => '\xe5', + 'gt' => '>', + 'yen' => '\xa5', + 'ograve' => '\xf2', + 'Chi' => 'Χ', + 'bull' => '•', + 'Egrave' => '\xc8', + 'Ntilde' => '\xd1', + 'upsih' => 'ϒ', + 'Yacute' => '\xdd', + 'asymp' => '≈', + 'radic' => '√', + 'otimes' => '⊗', + 'nabla' => '∇', + 'aelig' => '\xe6', + 'oelig' => 'œ', + 'equiv' => '≡', + 'Psi' => 'Ψ', + 'auml' => '\xe4', + 'circ' => 'ˆ', + 'Acirc' => '\xc2', + 'Epsilon' => 'Ε', + 'Yuml' => 'Ÿ', + 'Eta' => 'Η', + 'Icirc' => '\xce', + 'Upsilon' => 'Υ', + 'ndash' => '–', + 'there4' => '∴', + 'Prime' => '″', + 'prime' => '′', + 'psi' => 'ψ', + 'Kappa' => 'Κ', + 'rsaquo' => '›', + 'Tau' => 'Τ', + 'darr' => '↓', + 'ocirc' => '\xf4', + 'lrm' => '‎', + 'zwj' => '‍', + 'cedil' => '\xb8', + 'Ecirc' => '\xca', + 'not' => '\xac', + 'AElig' => '\xc6', + 'oslash' => '\xf8', + 'acute' => '\xb4', + 'lceil' => '⌈', + 'shy' => '\xad', + 'rdquo' => '”', + 'ge' => '≥', + 'Igrave' => '\xcc', + 'Ograve' => '\xd2', + 'euro' => '€', + 'dArr' => '⇓', + 'sdot' => '⋅', + 'nbsp' => '\xa0', + 'lfloor' => '⌊', + 'lArr' => '⇐', + 'Auml' => '\xc4', + 'larr' => '←', + 'Atilde' => '\xc3', + 'Otilde' => '\xd5', + 'szlig' => '\xdf', + 'clubs' => '♣', + 'diams' => '♦', + 'agrave' => '\xe0', + 'Ocirc' => '\xd4', + 'Iota' => 'Ι', + 'Theta' => 'Θ', + 'Pi' => 'Π', + 'OElig' => 'Œ', + 'Scaron' => 'Š', + 'frac14' => '\xbc', + 'egrave' => '\xe8', + 'sub' => '⊂', + 'iexcl' => '\xa1', + 'frac12' => '\xbd', + 'sbquo' => '‚', + 'ordf' => '\xaa', + 'sum' => '∑', + 'prop' => '∝', + 'Uuml' => '\xdc', + 'ntilde' => '\xf1', + 'sup' => '⊃', + 'theta' => 'θ', + 'prod' => '∏', + 'nsub' => '⊄', + 'hArr' => '⇔', + 'rlm' => '‏', + 'THORN' => '\xde', + 'infin' => '∞', + 'yuml' => '\xff', + 'Mu' => 'Μ', + 'le' => '≤', + 'Eacute' => '\xc9', + 'thinsp' => ' ', + 'ecirc' => '\xea', + 'bdquo' => '„', + 'Sigma' => 'Σ', + 'fnof' => 'ƒ', + 'Aring' => '\xc5', + 'tilde' => '˜', + 'frac34' => '\xbe', + 'emsp' => ' ', + 'mdash' => '—', + 'uarr' => '↑', + 'permil' => '‰', + 'Ugrave' => '\xd9', + 'rarr' => '→', + 'Agrave' => '\xc0', + 'chi' => 'χ', + 'forall' => '∀', + 'eth' => '\xf0', + 'rceil' => '⌉', + 'iuml' => '\xef', + 'gamma' => 'γ', + 'lambda' => 'λ', + 'harr' => '↔', + 'rang' => '〉', + 'xi' => 'ξ', + 'dagger' => '†', + 'divide' => '\xf7', + 'Ouml' => '\xd6', + 'image' => 'ℑ', + 'alefsym' => 'ℵ', + 'igrave' => '\xec', + 'otilde' => '\xf5', + 'Oacute' => '\xd3', + 'sube' => '⊆', + 'alpha' => 'α', + 'frasl' => '⁄', + 'ETH' => '\xd0', + 'lowast' => '∗', + 'Nu' => 'Ν', + 'plusmn' => '\xb1', + 'Euml' => '\xcb', + 'real' => 'ℜ', + 'sup1' => '\xb9', + 'sup2' => '\xb2', + 'sup3' => '\xb3', + 'Oslash' => '\xd8', + 'Aacute' => '\xc1', + 'cent' => '\xa2', + 'oline' => '‾', + 'Beta' => 'Β', + 'perp' => '⊥', + 'Delta' => 'Δ', + 'loz' => '◊', + 'pi' => 'π', + 'iota' => 'ι', + 'empty' => '∅', + 'euml' => '\xeb', + 'brvbar' => '\xa6', + 'iacute' => '\xed', + 'para' => '\xb6', + 'micro' => '\xb5', + 'cup' => '∪', + 'weierp' => '℘', + 'uuml' => '\xfc', + 'part' => '∂', + 'icirc' => '\xee', + 'delta' => 'δ', + 'omicron' => 'ο', + 'upsilon' => 'υ', + 'Iuml' => '\xcf', + 'Lambda' => 'Λ', + 'Xi' => 'Ξ', + 'kappa' => 'κ', + 'ccedil' => '\xe7', + 'Ucirc' => '\xdb', + 'cap' => '∩', + 'mu' => 'μ', + 'scaron' => 'š', + 'lsquo' => '‘', + 'isin' => '∈', + 'Zeta' => 'Ζ', + 'supe' => '⊇', + 'deg' => '\xb0', + 'and' => '∧', + 'tau' => 'τ', + 'pound' => '\xa3', + 'hellip' => '…', + 'curren' => '\xa4', + 'int' => '∫', + 'ucirc' => '\xfb', + 'rfloor' => '⌋', + 'ensp' => ' ', + 'crarr' => '↵', + 'ugrave' => '\xf9', + 'notin' => '∉', + 'exist' => '∃', + 'uArr' => '⇑', + 'cong' => '≅', + 'Dagger' => '‡', + 'oplus' => '⊕', + 'times' => '\xd7', + 'atilde' => '\xe3', + 'piv' => 'ϖ', + 'ni' => '∋', + 'Phi' => 'Φ', + 'lsaquo' => '‹', + 'Uacute' => '\xda', + 'Omicron' => 'Ο', + 'ang' => '∠', + 'ne' => '≠', + 'iquest' => '\xbf', + 'eta' => 'η', + 'yacute' => '\xfd', + 'Rho' => 'Ρ', + 'uacute' => '\xfa', + 'Alpha' => 'Α', + 'zeta' => 'ζ', + 'Omega' => 'Ω', + 'nu' => 'ν', + 'sim' => '∼', + 'sect' => '\xa7', + 'phi' => 'φ', + 'sigmaf' => 'ς', + 'macr' => '\xaf', + 'minus' => '−', + 'Ccedil' => '\xc7', + 'ordm' => '\xba', + 'epsilon' => 'ε', + 'beta' => 'β', + 'rArr' => '⇒', + 'rho' => 'ρ', + 'aacute' => '\xe1', + 'eacute' => '\xe9', + 'omega' => 'ω', + 'middot' => '\xb7', + 'Gamma' => 'Γ', + 'Iacute' => '\xcd', + 'lang' => '〈', + 'spades' => '♠', + 'rsquo' => '’', + 'uml' => '\xa8', + 'thorn' => '\xfe', + 'ouml' => '\xf6', + 'thetasym' => 'ϑ', + 'or' => '∨', + 'raquo' => '\xbb', + 'acirc' => '\xe2', + 'ldquo' => '“', + 'hearts' => '♥', + 'sigma' => 'σ', + 'oacute' => '\xf3', +=end + } + end + end +end + + +module ::Irc + + # miscellaneous useful functions + module Utils + SEC_PER_MIN = 60 + SEC_PER_HR = SEC_PER_MIN * 60 + SEC_PER_DAY = SEC_PER_HR * 24 + SEC_PER_MNTH = SEC_PER_DAY * 30 + SEC_PER_YR = SEC_PER_MNTH * 12 + + def Utils.secs_to_string_case(array, var, string, plural) + case var + when 1 + array << "1 #{string}" + else + array << "#{var} #{plural}" + end + end + + # turn a number of seconds into a human readable string, e.g + # 2 days, 3 hours, 18 minutes, 10 seconds + def Utils.secs_to_string(secs) + ret = [] + years, secs = secs.divmod SEC_PER_YR + secs_to_string_case(ret, years, "year", "years") if years > 0 + months, secs = secs.divmod SEC_PER_MNTH + secs_to_string_case(ret, months, "month", "months") if months > 0 + days, secs = secs.divmod SEC_PER_DAY + secs_to_string_case(ret, days, "day", "days") if days > 0 + hours, secs = secs.divmod SEC_PER_HR + secs_to_string_case(ret, hours, "hour", "hours") if hours > 0 + mins, secs = secs.divmod SEC_PER_MIN + secs_to_string_case(ret, mins, "minute", "minutes") if mins > 0 + secs_to_string_case(ret, secs, "second", "seconds") if secs > 0 or ret.empty? + case ret.length + when 0 + raise "Empty ret array!" + when 1 + return ret.to_s + else + return [ret[0, ret.length-1].join(", ") , ret[-1]].join(" and ") + end + end + + + def Utils.safe_exec(command, *args) + IO.popen("-") {|p| + if(p) + return p.readlines.join("\n") + else + begin + $stderr = $stdout + exec(command, *args) + rescue Exception => e + puts "exec of #{command} led to exception: #{e.inspect}" + Kernel::exit! 0 + end + puts "exec of #{command} failed" + Kernel::exit! 0 + end + } + end + + + @@safe_save_dir = nil + def Utils.set_safe_save_dir(str) + @@safe_save_dir = str.dup + end + + def Utils.safe_save(file) + raise 'No safe save directory defined!' if @@safe_save_dir.nil? + basename = File.basename(file) + temp = Tempfile.new(basename,@@safe_save_dir) + temp.binmode + yield temp if block_given? + temp.close + File.rename(temp.path, file) + end + + + # returns a string containing the result of an HTTP GET on the uri + def Utils.http_get(uristr, readtimeout=8, opentimeout=4) + + # ruby 1.7 or better needed for this (or 1.6 and debian unstable) + Net::HTTP.version_1_2 + # (so we support the 1_1 api anyway, avoids problems) + + uri = URI.parse uristr + query = uri.path + if uri.query + query += "?#{uri.query}" + end + + proxy_host = nil + proxy_port = nil + if(ENV['http_proxy'] && proxy_uri = URI.parse(ENV['http_proxy'])) + proxy_host = proxy_uri.host + proxy_port = proxy_uri.port + end + + begin + http = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port) + http.open_timeout = opentimeout + http.read_timeout = readtimeout + + http.start {|http| + resp = http.get(query) + if resp.code == "200" + return resp.body + end + } + rescue => e + # cheesy for now + error "Utils.http_get exception: #{e.inspect}, while trying to get #{uristr}" + return nil + end + end + + def Utils.decode_html_entities(str) + if $we_have_html_entities_decoder + return HTMLEntities.decode_entities(str) + else + str.gsub(/(&(.+?);)/) { + symbol = $2 + # remove the 0-paddng from unicode integers + if symbol =~ /#(.+)/ + symbol = "##{$1.to_i.to_s}" + end + + # output the symbol's irc-translated character, or a * if it's unknown + UNESCAPE_TABLE[symbol] || '*' + } + end + end + end +end diff --git a/lib/rbot/httputil.rb b/lib/rbot/httputil.rb deleted file mode 100644 index d89fa2e9..00000000 --- a/lib/rbot/httputil.rb +++ /dev/null @@ -1,404 +0,0 @@ -module Irc -module Utils - -require 'resolv' -require 'net/http' -require 'net/https' -Net::HTTP.version_1_2 - -# class for making http requests easier (mainly for plugins to use) -# this class can check the bot proxy configuration to determine if a proxy -# needs to be used, which includes support for per-url proxy configuration. -class HttpUtil - BotConfig.register BotConfigBooleanValue.new('http.use_proxy', - :default => false, :desc => "should a proxy be used for HTTP requests?") - BotConfig.register BotConfigStringValue.new('http.proxy_uri', :default => false, - :desc => "Proxy server to use for HTTP requests (URI, e.g http://proxy.host:port)") - BotConfig.register BotConfigStringValue.new('http.proxy_user', - :default => nil, - :desc => "User for authenticating with the http proxy (if required)") - BotConfig.register BotConfigStringValue.new('http.proxy_pass', - :default => nil, - :desc => "Password for authenticating with the http proxy (if required)") - BotConfig.register BotConfigArrayValue.new('http.proxy_include', - :default => [], - :desc => "List of regexps to check against a URI's hostname/ip to see if we should use the proxy to access this URI. All URIs are proxied by default if the proxy is set, so this is only required to re-include URIs that might have been excluded by the exclude list. e.g. exclude /.*\.foo\.com/, include bar\.foo\.com") - BotConfig.register BotConfigArrayValue.new('http.proxy_exclude', - :default => [], - :desc => "List of regexps to check against a URI's hostname/ip to see if we should use avoid the proxy to access this URI and access it directly") - BotConfig.register BotConfigIntegerValue.new('http.max_redir', - :default => 5, - :desc => "Maximum number of redirections to be used when getting a document") - BotConfig.register BotConfigIntegerValue.new('http.expire_time', - :default => 60, - :desc => "After how many minutes since last use a cached document is considered to be expired") - BotConfig.register BotConfigIntegerValue.new('http.max_cache_time', - :default => 60*24, - :desc => "After how many minutes since first use a cached document is considered to be expired") - BotConfig.register BotConfigIntegerValue.new('http.no_expire_cache', - :default => false, - :desc => "Set this to true if you want the bot to never expire the cached pages") - - def initialize(bot) - @bot = bot - @cache = Hash.new - @headers = { - 'User-Agent' => "rbot http util #{$version} (http://linuxbrit.co.uk/rbot/)", - } - @last_response = nil - end - attr_reader :last_response - attr_reader :headers - - # if http_proxy_include or http_proxy_exclude are set, then examine the - # uri to see if this is a proxied uri - # the in/excludes are a list of regexps, and each regexp is checked against - # the server name, and its IP addresses - def proxy_required(uri) - use_proxy = true - if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty? - return use_proxy - end - - list = [uri.host] - begin - list.concat Resolv.getaddresses(uri.host) - rescue StandardError => err - warning "couldn't resolve host uri.host" - end - - unless @bot.config["http.proxy_exclude"].empty? - re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)} - re.each do |r| - list.each do |item| - if r.match(item) - use_proxy = false - break - end - end - end - end - unless @bot.config["http.proxy_include"].empty? - re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)} - re.each do |r| - list.each do |item| - if r.match(item) - use_proxy = true - break - end - end - end - end - debug "using proxy for uri #{uri}?: #{use_proxy}" - return use_proxy - end - - # uri:: Uri to create a proxy for - # - # return a net/http Proxy object, which is configured correctly for - # proxying based on the bot's proxy configuration. - # This will include per-url proxy configuration based on the bot config - # +http_proxy_include/exclude+ options. - def get_proxy(uri) - proxy = nil - proxy_host = nil - proxy_port = nil - proxy_user = nil - proxy_pass = nil - - if @bot.config["http.use_proxy"] - if (ENV['http_proxy']) - proxy = URI.parse ENV['http_proxy'] rescue nil - end - if (@bot.config["http.proxy_uri"]) - proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil - end - if proxy - debug "proxy is set to #{proxy.host} port #{proxy.port}" - if proxy_required(uri) - proxy_host = proxy.host - proxy_port = proxy.port - proxy_user = @bot.config["http.proxy_user"] - proxy_pass = @bot.config["http.proxy_pass"] - end - end - end - - h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_port) - h.use_ssl = true if uri.scheme == "https" - return h - end - - # uri:: uri to query (Uri object) - # readtimeout:: timeout for reading the response - # opentimeout:: timeout for opening the connection - # - # simple get request, returns (if possible) response body following redirs - # and caching if requested - # if a block is given, it yields the urls it gets redirected to - # TODO we really need something to implement proper caching - def get(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"], cache=false) - if uri_or_str.kind_of?(URI) - uri = uri_or_str - else - uri = URI.parse(uri_or_str.to_s) - end - debug "Getting #{uri}" - - proxy = get_proxy(uri) - proxy.open_timeout = opentimeout - proxy.read_timeout = readtimeout - - begin - proxy.start() {|http| - yield uri.request_uri() if block_given? - req = Net::HTTP::Get.new(uri.request_uri(), @headers) - if uri.user and uri.password - req.basic_auth(uri.user, uri.password) - end - resp = http.request(req) - case resp - when Net::HTTPSuccess - if cache - debug "Caching #{uri.to_s}" - cache_response(uri.to_s, resp) - end - return resp.body - when Net::HTTPRedirection - if resp.key?('location') - new_loc = URI.join(uri, resp['location']) - debug "Redirecting #{uri} to #{new_loc}" - yield new_loc if block_given? - if max_redir > 0 - # If cache is an Array, we assume get was called by get_cached - # because of a cache miss and that the first value of the Array - # was the noexpire value. Since the cache miss might have been - # caused by a redirection, we want to try get_cached again - # TODO FIXME look at Python's httplib2 for a most likely - # better way to handle all this mess - if cache.kind_of?(Array) - return get_cached(new_loc, readtimeout, opentimeout, max_redir-1, cache[0]) - else - return get(new_loc, readtimeout, opentimeout, max_redir-1, cache) - end - else - warning "Max redirection reached, not going to #{new_loc}" - end - else - warning "Unknown HTTP redirection #{resp.inspect}" - end - else - debug "HttpUtil.get return code #{resp.code} #{resp.body}" - end - @last_response = resp - return nil - } - rescue StandardError, Timeout::Error => e - error "HttpUtil.get exception: #{e.inspect}, while trying to get #{uri}" - debug e.backtrace.join("\n") - end - @last_response = nil - return nil - end - - # just like the above, but only gets the head - def head(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"]) - if uri_or_str.kind_of?(URI) - uri = uri_or_str - else - uri = URI.parse(uri_or_str.to_s) - end - - proxy = get_proxy(uri) - proxy.open_timeout = opentimeout - proxy.read_timeout = readtimeout - - begin - proxy.start() {|http| - yield uri.request_uri() if block_given? - req = Net::HTTP::Head.new(uri.request_uri(), @headers) - if uri.user and uri.password - req.basic_auth(uri.user, uri.password) - end - resp = http.request(req) - case resp - when Net::HTTPSuccess - return resp - when Net::HTTPRedirection - debug "Redirecting #{uri} to #{resp['location']}" - yield resp['location'] if block_given? - if max_redir > 0 - return head( URI.parse(resp['location']), readtimeout, opentimeout, max_redir-1) - else - warning "Max redirection reached, not going to #{resp['location']}" - end - else - debug "HttpUtil.head return code #{resp.code}" - end - @last_response = resp - return nil - } - rescue StandardError, Timeout::Error => e - error "HttpUtil.head exception: #{e.inspect}, while trying to get #{uri}" - debug e.backtrace.join("\n") - end - @last_response = nil - return nil - end - - def cache_response(k, resp) - begin - if resp.key?('pragma') and resp['pragma'] == 'no-cache' - debug "Not caching #{k}, it has Pragma: no-cache" - return - end - # TODO should we skip caching if neither last-modified nor etag are present? - now = Time.new - u = Hash.new - u = Hash.new - u[:body] = resp.body - u[:last_modified] = nil - u[:last_modified] = Time.httpdate(resp['date']) if resp.key?('date') - u[:last_modified] = Time.httpdate(resp['last-modified']) if resp.key?('last-modified') - u[:expires] = now - u[:expires] = Time.httpdate(resp['expires']) if resp.key?('expires') - u[:revalidate] = false - if resp.key?('cache-control') - # TODO max-age - case resp['cache-control'] - when /no-cache|must-revalidate/ - u[:revalidate] = true - end - end - u[:etag] = "" - u[:etag] = resp['etag'] if resp.key?('etag') - u[:count] = 1 - u[:first_use] = now - u[:last_use] = now - rescue => e - error "Failed to cache #{k}/#{resp.to_hash.inspect}: #{e.inspect}" - return - end - @cache[k] = u - debug "Cached #{k}/#{resp.to_hash.inspect}: #{u.inspect_no_body}" - debug "#{@cache.size} pages (#{@cache.keys.join(', ')}) cached up to now" - end - - # For debugging purposes - class ::Hash - def inspect_no_body - temp = self.dup - temp.delete(:body) - temp.inspect - end - end - - def expired?(uri, readtimeout, opentimeout) - k = uri.to_s - debug "Checking cache validity for #{k}" - begin - return true unless @cache.key?(k) - u = @cache[k] - - # TODO we always revalidate for the time being - - if u[:etag].empty? and u[:last_modified].nil? - # TODO max-age - return true - end - - proxy = get_proxy(uri) - proxy.open_timeout = opentimeout - proxy.read_timeout = readtimeout - - proxy.start() {|http| - yield uri.request_uri() if block_given? - headers = @headers.dup - headers['If-None-Match'] = u[:etag] unless u[:etag].empty? - headers['If-Modified-Since'] = u[:last_modified].rfc2822 if u[:last_modified] - debug "Cache HEAD request headers: #{headers.inspect}" - # FIXME TODO We might want to use a Get here - # because if a 200 OK is returned we would get the new body - # with one connection less ... - req = Net::HTTP::Head.new(uri.request_uri(), headers) - if uri.user and uri.password - req.basic_auth(uri.user, uri.password) - end - resp = http.request(req) - debug "Checking cache validity of #{u.inspect_no_body} against #{resp.inspect}/#{resp.to_hash.inspect}" - case resp - when Net::HTTPNotModified - return false - else - return true - end - } - rescue => e - error "Failed to check cache validity for #{uri}: #{e.inspect}" - return true - end - end - - # gets a page from the cache if it's still (assumed to be) valid - # TODO remove stale cached pages, except when called with noexpire=true - def get_cached(uri_or_str, readtimeout=10, opentimeout=5, - max_redir=@bot.config['http.max_redir'], - noexpire=@bot.config['http.no_expire_cache']) - if uri_or_str.kind_of?(URI) - uri = uri_or_str - else - uri = URI.parse(uri_or_str.to_s) - end - debug "Getting cached #{uri}" - - if expired?(uri, readtimeout, opentimeout) - debug "Cache expired" - bod = get(uri, readtimeout, opentimeout, max_redir, [noexpire]) - bod.instance_variable_set(:@cached,false) - else - k = uri.to_s - debug "Using cache" - @cache[k][:count] += 1 - @cache[k][:last_use] = Time.now - bod = @cache[k][:body] - bod.instance_variable_set(:@cached,true) - end - unless noexpire - remove_stale_cache - end - unless bod.respond_to?(:cached?) - def bod.cached? - return @cached - end - end - return bod - end - - # We consider a page to be manually expired if it has no - # etag and no last-modified and if any of the expiration - # conditions are met (expire_time, max_cache_time, Expires) - def manually_expired?(hash, time) - auto = hash[:etag].empty? and hash[:last_modified].nil? - # TODO max-age - manual = (time - hash[:last_use] > @bot.config['http.expire_time']*60) or - (time - hash[:first_use] > @bot.config['http.max_cache_time']*60) or - (hash[:expires] < time) - return (auto and manual) - end - - def remove_stale_cache - debug "Removing stale cache" - debug "#{@cache.size} pages before" - begin - now = Time.new - @cache.reject! { |k, val| - manually_expired?(val, now) - } - rescue => e - error "Failed to remove stale cache: #{e.inspect}" - end - debug "#{@cache.size} pages after" - end -end -end -end diff --git a/lib/rbot/ircbot.rb b/lib/rbot/ircbot.rb index c744ca51..54513ab4 100644 --- a/lib/rbot/ircbot.rb +++ b/lib/rbot/ircbot.rb @@ -69,7 +69,7 @@ $interrupted = 0 # these first require 'rbot/rbotconfig' require 'rbot/config' -require 'rbot/utils' +# require 'rbot/utils' require 'rbot/irc' require 'rbot/rfc2812' @@ -82,7 +82,7 @@ require 'rbot/message' require 'rbot/language' require 'rbot/dbhash' require 'rbot/registry' -require 'rbot/httputil' +# require 'rbot/httputil' module Irc @@ -279,7 +279,6 @@ class IrcBot Dir.mkdir("#{botclass}/logs") unless File.exist?("#{botclass}/logs") Dir.mkdir("#{botclass}/registry") unless File.exist?("#{botclass}/registry") Dir.mkdir("#{botclass}/safe_save") unless File.exist?("#{botclass}/safe_save") - Utils.set_safe_save_dir("#{botclass}/safe_save") # Time at which the last PING was sent @last_ping = nil @@ -364,8 +363,6 @@ class IrcBot @logs = Hash.new - @httputil = Utils::HttpUtil.new(self) - @plugins = nil @lang = Language::Language.new(self, @config['core.language']) @@ -385,11 +382,16 @@ class IrcBot Dir.mkdir("#{botclass}/plugins") unless File.exist?("#{botclass}/plugins") @plugins = Plugins::pluginmanager @plugins.bot_associate(self) + @plugins.add_botmodule_dir(Config::coredir + "/utils") @plugins.add_botmodule_dir(Config::coredir) @plugins.add_botmodule_dir("#{botclass}/plugins") @plugins.add_botmodule_dir(Config::datadir + "/plugins") @plugins.scan + Utils.set_safe_save_dir("#{botclass}/safe_save") + @httputil = Utils::HttpUtil.new(self) + + @socket = IrcSocket.new(@config['server.name'], @config['server.port'], @config['server.bindhost'], @config['server.sendq_delay'], @config['server.sendq_burst'], :ssl => @config['server.ssl']) @client = IrcClient.new myself.nick = @config['irc.nick'] diff --git a/lib/rbot/utils.rb b/lib/rbot/utils.rb deleted file mode 100644 index 557ca738..00000000 --- a/lib/rbot/utils.rb +++ /dev/null @@ -1,419 +0,0 @@ -require 'net/http' -require 'uri' -require 'tempfile' - -begin - $we_have_html_entities_decoder = require 'htmlentities' -rescue LoadError - $we_have_html_entities_decoder = false - module Irc - module Utils - UNESCAPE_TABLE = { - 'laquo' => '<<', - 'raquo' => '>>', - 'quot' => '"', - 'apos' => '\'', - 'micro' => 'u', - 'copy' => '(c)', - 'trade' => '(tm)', - 'reg' => '(R)', - '#174' => '(R)', - '#8220' => '"', - '#8221' => '"', - '#8212' => '--', - '#39' => '\'', - 'amp' => '&', - 'lt' => '<', - 'gt' => '>', - 'hellip' => '...', - 'nbsp' => ' ', -=begin - # extras codes, for future use... - 'zwnj' => '‌', - 'aring' => '\xe5', - 'gt' => '>', - 'yen' => '\xa5', - 'ograve' => '\xf2', - 'Chi' => 'Χ', - 'bull' => '•', - 'Egrave' => '\xc8', - 'Ntilde' => '\xd1', - 'upsih' => 'ϒ', - 'Yacute' => '\xdd', - 'asymp' => '≈', - 'radic' => '√', - 'otimes' => '⊗', - 'nabla' => '∇', - 'aelig' => '\xe6', - 'oelig' => 'œ', - 'equiv' => '≡', - 'Psi' => 'Ψ', - 'auml' => '\xe4', - 'circ' => 'ˆ', - 'Acirc' => '\xc2', - 'Epsilon' => 'Ε', - 'Yuml' => 'Ÿ', - 'Eta' => 'Η', - 'Icirc' => '\xce', - 'Upsilon' => 'Υ', - 'ndash' => '–', - 'there4' => '∴', - 'Prime' => '″', - 'prime' => '′', - 'psi' => 'ψ', - 'Kappa' => 'Κ', - 'rsaquo' => '›', - 'Tau' => 'Τ', - 'darr' => '↓', - 'ocirc' => '\xf4', - 'lrm' => '‎', - 'zwj' => '‍', - 'cedil' => '\xb8', - 'Ecirc' => '\xca', - 'not' => '\xac', - 'AElig' => '\xc6', - 'oslash' => '\xf8', - 'acute' => '\xb4', - 'lceil' => '⌈', - 'shy' => '\xad', - 'rdquo' => '”', - 'ge' => '≥', - 'Igrave' => '\xcc', - 'Ograve' => '\xd2', - 'euro' => '€', - 'dArr' => '⇓', - 'sdot' => '⋅', - 'nbsp' => '\xa0', - 'lfloor' => '⌊', - 'lArr' => '⇐', - 'Auml' => '\xc4', - 'larr' => '←', - 'Atilde' => '\xc3', - 'Otilde' => '\xd5', - 'szlig' => '\xdf', - 'clubs' => '♣', - 'diams' => '♦', - 'agrave' => '\xe0', - 'Ocirc' => '\xd4', - 'Iota' => 'Ι', - 'Theta' => 'Θ', - 'Pi' => 'Π', - 'OElig' => 'Œ', - 'Scaron' => 'Š', - 'frac14' => '\xbc', - 'egrave' => '\xe8', - 'sub' => '⊂', - 'iexcl' => '\xa1', - 'frac12' => '\xbd', - 'sbquo' => '‚', - 'ordf' => '\xaa', - 'sum' => '∑', - 'prop' => '∝', - 'Uuml' => '\xdc', - 'ntilde' => '\xf1', - 'sup' => '⊃', - 'theta' => 'θ', - 'prod' => '∏', - 'nsub' => '⊄', - 'hArr' => '⇔', - 'rlm' => '‏', - 'THORN' => '\xde', - 'infin' => '∞', - 'yuml' => '\xff', - 'Mu' => 'Μ', - 'le' => '≤', - 'Eacute' => '\xc9', - 'thinsp' => ' ', - 'ecirc' => '\xea', - 'bdquo' => '„', - 'Sigma' => 'Σ', - 'fnof' => 'ƒ', - 'Aring' => '\xc5', - 'tilde' => '˜', - 'frac34' => '\xbe', - 'emsp' => ' ', - 'mdash' => '—', - 'uarr' => '↑', - 'permil' => '‰', - 'Ugrave' => '\xd9', - 'rarr' => '→', - 'Agrave' => '\xc0', - 'chi' => 'χ', - 'forall' => '∀', - 'eth' => '\xf0', - 'rceil' => '⌉', - 'iuml' => '\xef', - 'gamma' => 'γ', - 'lambda' => 'λ', - 'harr' => '↔', - 'rang' => '〉', - 'xi' => 'ξ', - 'dagger' => '†', - 'divide' => '\xf7', - 'Ouml' => '\xd6', - 'image' => 'ℑ', - 'alefsym' => 'ℵ', - 'igrave' => '\xec', - 'otilde' => '\xf5', - 'Oacute' => '\xd3', - 'sube' => '⊆', - 'alpha' => 'α', - 'frasl' => '⁄', - 'ETH' => '\xd0', - 'lowast' => '∗', - 'Nu' => 'Ν', - 'plusmn' => '\xb1', - 'Euml' => '\xcb', - 'real' => 'ℜ', - 'sup1' => '\xb9', - 'sup2' => '\xb2', - 'sup3' => '\xb3', - 'Oslash' => '\xd8', - 'Aacute' => '\xc1', - 'cent' => '\xa2', - 'oline' => '‾', - 'Beta' => 'Β', - 'perp' => '⊥', - 'Delta' => 'Δ', - 'loz' => '◊', - 'pi' => 'π', - 'iota' => 'ι', - 'empty' => '∅', - 'euml' => '\xeb', - 'brvbar' => '\xa6', - 'iacute' => '\xed', - 'para' => '\xb6', - 'micro' => '\xb5', - 'cup' => '∪', - 'weierp' => '℘', - 'uuml' => '\xfc', - 'part' => '∂', - 'icirc' => '\xee', - 'delta' => 'δ', - 'omicron' => 'ο', - 'upsilon' => 'υ', - 'Iuml' => '\xcf', - 'Lambda' => 'Λ', - 'Xi' => 'Ξ', - 'kappa' => 'κ', - 'ccedil' => '\xe7', - 'Ucirc' => '\xdb', - 'cap' => '∩', - 'mu' => 'μ', - 'scaron' => 'š', - 'lsquo' => '‘', - 'isin' => '∈', - 'Zeta' => 'Ζ', - 'supe' => '⊇', - 'deg' => '\xb0', - 'and' => '∧', - 'tau' => 'τ', - 'pound' => '\xa3', - 'hellip' => '…', - 'curren' => '\xa4', - 'int' => '∫', - 'ucirc' => '\xfb', - 'rfloor' => '⌋', - 'ensp' => ' ', - 'crarr' => '↵', - 'ugrave' => '\xf9', - 'notin' => '∉', - 'exist' => '∃', - 'uArr' => '⇑', - 'cong' => '≅', - 'Dagger' => '‡', - 'oplus' => '⊕', - 'times' => '\xd7', - 'atilde' => '\xe3', - 'piv' => 'ϖ', - 'ni' => '∋', - 'Phi' => 'Φ', - 'lsaquo' => '‹', - 'Uacute' => '\xda', - 'Omicron' => 'Ο', - 'ang' => '∠', - 'ne' => '≠', - 'iquest' => '\xbf', - 'eta' => 'η', - 'yacute' => '\xfd', - 'Rho' => 'Ρ', - 'uacute' => '\xfa', - 'Alpha' => 'Α', - 'zeta' => 'ζ', - 'Omega' => 'Ω', - 'nu' => 'ν', - 'sim' => '∼', - 'sect' => '\xa7', - 'phi' => 'φ', - 'sigmaf' => 'ς', - 'macr' => '\xaf', - 'minus' => '−', - 'Ccedil' => '\xc7', - 'ordm' => '\xba', - 'epsilon' => 'ε', - 'beta' => 'β', - 'rArr' => '⇒', - 'rho' => 'ρ', - 'aacute' => '\xe1', - 'eacute' => '\xe9', - 'omega' => 'ω', - 'middot' => '\xb7', - 'Gamma' => 'Γ', - 'Iacute' => '\xcd', - 'lang' => '〈', - 'spades' => '♠', - 'rsquo' => '’', - 'uml' => '\xa8', - 'thorn' => '\xfe', - 'ouml' => '\xf6', - 'thetasym' => 'ϑ', - 'or' => '∨', - 'raquo' => '\xbb', - 'acirc' => '\xe2', - 'ldquo' => '“', - 'hearts' => '♥', - 'sigma' => 'σ', - 'oacute' => '\xf3', -=end - } - end - end -end - - -module Irc - - # miscellaneous useful functions - module Utils - SEC_PER_MIN = 60 - SEC_PER_HR = SEC_PER_MIN * 60 - SEC_PER_DAY = SEC_PER_HR * 24 - SEC_PER_MNTH = SEC_PER_DAY * 30 - SEC_PER_YR = SEC_PER_MNTH * 12 - - def Utils.secs_to_string_case(array, var, string, plural) - case var - when 1 - array << "1 #{string}" - else - array << "#{var} #{plural}" - end - end - - # turn a number of seconds into a human readable string, e.g - # 2 days, 3 hours, 18 minutes, 10 seconds - def Utils.secs_to_string(secs) - ret = [] - years, secs = secs.divmod SEC_PER_YR - secs_to_string_case(ret, years, "year", "years") if years > 0 - months, secs = secs.divmod SEC_PER_MNTH - secs_to_string_case(ret, months, "month", "months") if months > 0 - days, secs = secs.divmod SEC_PER_DAY - secs_to_string_case(ret, days, "day", "days") if days > 0 - hours, secs = secs.divmod SEC_PER_HR - secs_to_string_case(ret, hours, "hour", "hours") if hours > 0 - mins, secs = secs.divmod SEC_PER_MIN - secs_to_string_case(ret, mins, "minute", "minutes") if mins > 0 - secs_to_string_case(ret, secs, "second", "seconds") if secs > 0 or ret.empty? - case ret.length - when 0 - raise "Empty ret array!" - when 1 - return ret.to_s - else - return [ret[0, ret.length-1].join(", ") , ret[-1]].join(" and ") - end - end - - - def Utils.safe_exec(command, *args) - IO.popen("-") {|p| - if(p) - return p.readlines.join("\n") - else - begin - $stderr = $stdout - exec(command, *args) - rescue Exception => e - puts "exec of #{command} led to exception: #{e.inspect}" - Kernel::exit! 0 - end - puts "exec of #{command} failed" - Kernel::exit! 0 - end - } - end - - - @@safe_save_dir = nil - def Utils.set_safe_save_dir(str) - @@safe_save_dir = str.dup - end - - def Utils.safe_save(file) - raise 'No safe save directory defined!' if @@safe_save_dir.nil? - basename = File.basename(file) - temp = Tempfile.new(basename,@@safe_save_dir) - temp.binmode - yield temp if block_given? - temp.close - File.rename(temp.path, file) - end - - - # returns a string containing the result of an HTTP GET on the uri - def Utils.http_get(uristr, readtimeout=8, opentimeout=4) - - # ruby 1.7 or better needed for this (or 1.6 and debian unstable) - Net::HTTP.version_1_2 - # (so we support the 1_1 api anyway, avoids problems) - - uri = URI.parse uristr - query = uri.path - if uri.query - query += "?#{uri.query}" - end - - proxy_host = nil - proxy_port = nil - if(ENV['http_proxy'] && proxy_uri = URI.parse(ENV['http_proxy'])) - proxy_host = proxy_uri.host - proxy_port = proxy_uri.port - end - - begin - http = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port) - http.open_timeout = opentimeout - http.read_timeout = readtimeout - - http.start {|http| - resp = http.get(query) - if resp.code == "200" - return resp.body - end - } - rescue => e - # cheesy for now - error "Utils.http_get exception: #{e.inspect}, while trying to get #{uristr}" - return nil - end - end - - def Utils.decode_html_entities(str) - if $we_have_html_entities_decoder - return HTMLEntities.decode_entities(str) - else - str.gsub(/(&(.+?);)/) { - symbol = $2 - # remove the 0-paddng from unicode integers - if symbol =~ /#(.+)/ - symbol = "##{$1.to_i.to_s}" - end - - # output the symbol's irc-translated character, or a * if it's unknown - UNESCAPE_TABLE[symbol] || '*' - } - end - end - end -end -- cgit v1.2.3