From 367087a817dd7e7b0c2c03a8172972dbc31bedb2 Mon Sep 17 00:00:00 2001 From: Dmitry Kim Date: Sat, 24 Nov 2007 12:25:44 +0000 Subject: * (plugins/urban) route around urbanwap acting smaert wrt IP addresses (back to html scraping) --- data/rbot/plugins/urban.rb | 111 ++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 66 deletions(-) (limited to 'data') diff --git a/data/rbot/plugins/urban.rb b/data/rbot/plugins/urban.rb index 0c7d9838..21eb374c 100644 --- a/data/rbot/plugins/urban.rb +++ b/data/rbot/plugins/urban.rb @@ -1,13 +1,50 @@ class UrbanPlugin < Plugin + URBAN = 'http://www.urbandictionary.com/define.php?term=' def help( plugin, topic="") "urban [word] [n]: give the [n]th definition of [word] from urbandictionary.com. urbanday: give the word-of-the-day at urban" end + def get_def(m, word, n = nil) + n = n.to_i if n + u = URBAN + CGI.escape(word) + u += '&skip=' + n.to_s if n + s = @bot.httputil.get(u) + + notfound = s.match %r{
.*? isn't defined} + + if s.sub!(%r{
(\d+)\s*definition.*$}m, '') + total = $1.to_i + else + total = 1 + end + + n = total if n && n > total + + rv = Array.new + + s.scan(%r{]*>(\d+)\..*?(?:)?([^>]+)(?:)?.*?
.*?

(.+?)

.*?

300) + " " + + "#{a4}".ircify_html(:limit => 100) + ) unless (n && n != a1.to_i) || rv.size >= 3 + end + + if notfound + if rv.empty? + m.reply "#{word} not found" + else + m.reply "#{word} not found. maybe you mean:" + rv.each { |s| m.reply s } + end + else + rv.each { |s| m.reply s } + end + end + def urban(m, params) words = params[:words].to_s - n = params[:n].nil? ? 1 : params[:n].to_i rescue 1 - if words.empty? resp = @bot.httputil.head('http://www.urbandictionary.com/random.php', :max_redir => -1, @@ -16,81 +53,23 @@ class UrbanPlugin < Plugin words = URI.unescape(loc.match(/define.php\?term=(.*)$/)[1]) rescue nil end end - # we give a very high 'skip' because this will allow us to get the number of definitions by retrieving the previous definition - uri = "http://www.urbanwap.com/search.php?term=#{CGI.escape words}&skip=65536" - page = @bot.httputil.get(uri) - if page.nil? - m.reply "Couldn't retrieve an urban dictionary definition of #{words}" - return - end - if page =~ / is undefined<\/card><\/wml>/ - m.reply "There is no urban dictionary definition of #{words}" - return - end - if page =~ /&skip=(\d+)">prev<\/a>/ - numdefs = $1.to_i + 1 - else - numdefs = 1 - end - n = numdefs + n + 1 if n < 0 - if n > numdefs - m.reply "Urban dictionary only has #{numdefs} definitions for '#{words}'" - n = numdefs - end - if n < numdefs - uri = "http://www.urbanwap.com/search.php?term=#{CGI.escape words}&skip=#{n-1}" - page = @bot.httputil.get(uri) - if page.nil? - case n % 10 - when 1 - ord = 'st' - when 2 - ord = 'nd' - when 3 - ord = 'rd' - else - ord = 'th' - end - m.reply "Couldn't retrieve the #{n}#{ord} urban dictionary definition of #{words}" - return - end - end - m.reply "#{get_def(page)} (#{n}/#{numdefs})" - end - - def get_def(text) - # Start by removing the prev/home/next links - t = text.gsub(/(?:]*>prev<\/a> )?]*>home<\/a>(?: ]*>next<\/a>)?/,'') - # Close up paragraphs - t.gsub!(/<\/?p>/, ' ') - t.gsub!("\n", ' ') - # Reverse headings - t.gsub!(/<\/?b>/,"#{Reverse}") - # Enbolden links - t.gsub!(/<\/?a(?: [^>]*)?>/,"#{Bold}") - # Reverse examples - t.gsub!(/<\/?(?:i|em)>/,"#{Underline}") - # Clear anything else - t.gsub!(/<.*?>/, '') - - Utils.decode_html_entities t.strip + get_def(m, words, params[:n]) end def uotd(m, params) - home = @bot.httputil.get("http://www.urbanwap.com/") + home = @bot.httputil.get("http://www.urbandictionary.com/daily.php") if home.nil? m.reply "Couldn't get the urban dictionary word of the day" return end - home.match(/Word of the Day: .*?<\/a>/) + home.match(%r{href="/define.php\?term=.*?">(.*?)<}) wotd = $1 debug "Urban word of the day: #{wotd}" - page = @bot.httputil.get(wotd) - if page.nil? + if !wotd m.reply "Couldn't get the urban dictionary word of the day" - else - m.reply get_def(page) + return end + get_def(m, wotd, 1) end end -- cgit v1.2.3