#-- vim:sw=2:et
#++
#
# :title: Dictionary lookup plugin for rbot
#
# Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
# Copyright:: (C) 2006-2007 Giuseppe Bilotta
# License:: GPL v2
#
# Provides a link to the definition of a word in one of the supported
# dictionaries. Currently available are
#   * the Oxford dictionary for (British) English
#   * the De Mauro/Paravia dictionary for Italian
#   * the Chambers dictionary for English (accepts both US and UK)
#   * the Littré dictionary for French
#
# Other plugins can use this one to check if a given word is valid in italian
# or english or french by using the is_italian?, is_british?, is_english?,
# is_french? methods
#
# TODO: cache results and reuse them if get_cached returns a cache copy

DEMAURO_LEMMA = /<anchor>(.*?)(?: - (.*?))<go href="lemma.php\?ID=(\d+)"\/><\/anchor>/
CHAMBERS_LEMMA = /<p><span class="hwd">(.*?)<\/span> <span class="psa">(.*?)<\/span>(.*?)<\/p>/

class DictPlugin < Plugin
  Config.register Config::IntegerValue.new('dict.hits',
    :default => 3,
    :desc => "Number of hits to return from a dictionary lookup")
  Config.register Config::IntegerValue.new('dict.first_par',
    :default => 0,
    :desc => "When set to n > 0, the bot will return the first paragraph from the first n dictionary hits")

  def demauro_filter(s)
    # check if it's a page we can handle
    loc = Utils.check_location(s, @dmurlrx)
    # the location might be not good, but we might still be able to handle the
    # page
    if !loc and s[:text] !~ /<!-- Il dizionario della lingua italiana Paravia: /
      debug "not our business"
      return
    end
    # we want to grab the content from the WAP page, since it's in a much
    # cleaner HTML, so first try to get the word ID
    if s[:text] !~ %r{<li><a href="(\d+)" title="vai al lemma precedente" accesskey="p">lemma precedente</a></li>}
      return
    end
    id = $1.to_i + 1
    title = s[:text].ircify_html_title
    content = @bot.filter(:htmlinfo, URI.parse(@dmwaplemma % id))[:content]
    return {:title => title, :content => content.sub(/^\S+\s+-\s+/,'')}
  end

  def initialize
    super
    @dmurl = "http://www.demauroparavia.it/"
    @dmurlrx = %r{http://(?:www\.)?demauroparavia\.it/(\d+)}
    @dmwapurl = "http://wap.demauroparavia.it/index.php?lemma=%s"
    @dmwaplemma = "http://wap.demauroparavia.it/lemma.php?ID=%s"
    @oxurl = "http://www.askoxford.com/concise_oed/%s"
    @chambersurl = "http://www.chambersharrap.co.uk/chambers/features/chref/chref.py/main?query=%s&title=21st"
    @littreurl = "http://francois.gannaz.free.fr/Littre/xmlittre.php?requete=%s"

    @bot.register_filter(:demauro, :htmlinfo) { |s| demauro_filter(s) }
  end


  def help(plugin, topic="")
    case topic
    when "demauro"
      return "demauro <word> => provides a link to the definition of <word> from the De Mauro/Paravia dictionary"
    when "oxford"
      return "oxford <word> => provides a link to the definition of <word> (it can also be an expression) from the Concise Oxford dictionary"
    when "chambers"
      return "chambers <word> => provides a link to the definition of <word> (it can also be an expression) from the Chambers 21st Century Dictionary"
    when "littre"
      return "littre <word> => provides a link to the definition of <word> (it can also be an expression) from the Littré online dictionary"
    end
    return "<dictionary> <word>: check for <word> on <dictionary> where <dictionary> can be one of: demauro, oxford, chambers, littre"
  end

  def demauro(m, params)
    justcheck = params[:justcheck]

    word = params[:word].downcase
    url = @dmwapurl % CGI.escape(word)
    xml = nil
    info = @bot.httputil.get_response(url) rescue nil
    xml = info.body if info
    if xml.nil?
      info = info ? " (#{info.code} - #{info.message})" : ""
      return false if justcheck
      m.reply "An error occurred while looking for #{word}#{info}"
      return
    end
    if xml=~ /Non ho trovato occorrenze per/
      return false if justcheck
      m.reply "Nothing found for #{word}"
      return
    end
    entries = xml.scan(DEMAURO_LEMMA)
    text = word
    urls = []
    if not entries.transpose.first.grep(/\b#{word}\b/)
      return false if justcheck
      text += " not found. Similar words"
    end
    return true if justcheck
    text += ": "
    n = 0
    hits = @bot.config['dict.hits']
    text += entries[0...hits].map { |ar|
      n += 1
      urls << @dmwaplemma % ar[2]
      "#{n}. #{Bold}#{ar[0]}#{Bold} - #{ar[1].gsub(/<\/?em>/,'')}: #{@dmurl}#{ar[2]}"
    }.join(" | ")
    m.reply text

    first_pars = @bot.config['dict.first_par']

    return unless first_pars > 0

    Utils.get_first_pars urls, first_pars, :message => m,
      :strip => /^.+?\s+-\s+/

  end

  def is_italian?(word)
    return demauro(nil, :word => word, :justcheck => true)
  end


  def oxford(m, params)
    justcheck = params[:justcheck]

    word = params[:word].join
    [word, word + "_1"].each { |check|
      url = @oxurl % CGI.escape(check)
      if params[:british]
        url << "?view=uk"
      end
      h = @bot.httputil.get(url, :max_redir => 5)
      if h and h.match(%r!<h2>#{word}(?:<sup>1</sup>)?</h2>!)
        m.reply("#{word} : #{url}") unless justcheck
        defn = $'
        m.reply("#{Bold}%s#{Bold}: %s" % [word, defn.ircify_html(:nbsp => :space)], :overlong => :truncate)
        return true
      end
    }
    return false if justcheck
    m.reply "#{word} not found"
  end

  def is_british?(word)
    return oxford(nil, :word => word, :justcheck => true, :british => true)
  end


  def chambers(m, params)
    justcheck = params[:justcheck]

    word = params[:word].to_s.downcase
    url = @chambersurl % CGI.escape(word)
    xml = nil
    info = @bot.httputil.get_response(url) rescue nil
    xml = info.body if info
    case xml
    when nil
      info = info ? " (#{info.code} - #{info.message})" : ""
      return false if justcheck
      m.reply "An error occurred while looking for #{word}#{info}"
      return
    when /Sorry, no entries for <b>.*?<\/b> were found./
      return false if justcheck
      m.reply "Nothing found for #{word}"
      return
    when /No exact matches for <b>.*?<\/b>, but the following may be helpful./
      return false if justcheck
      m.reply "Nothing found for #{word}, but see #{url} for possible suggestions"
      return
    end
    # Else, we have a hit
    return true if justcheck
    m.reply "#{word}: #{url}"
    entries = xml.scan(CHAMBERS_LEMMA)
    hits = @bot.config['dict.hits']
    entries[0...hits].map { |ar|
      m.reply(("#{Bold}%s#{Bold} #{Underline}%s#{Underline}%s" % ar).ircify_html, :overlong => :truncate)
    }
  end

  def is_english?(word)
    return chambers(nil, :word => word, :justcheck => true)
  end

  def littre(m, params)
    justcheck = params[:justcheck]

    word = params[:word].to_s.downcase
    url = @littreurl % CGI.escape(word)
    xml = nil
    info = @bot.httputil.get_response(url) rescue nil
    xml = info.body if info
    head ||= xml.match(/<div class="entree">(.*?)<\/div>/)[1] rescue nil
    case xml
    when nil
      info = info ? " (#{info.code} - #{info.message})" : ""
      return false if justcheck
      m.reply "An error occurred while looking for #{word}#{info}"
      return
    when /Erreur : le mot <STRONG>.*?<\/STRONG> n'a pas./
      return false if justcheck
      if head
        m.reply "Nothing found for #{word}, I'll assume you meant #{head}"
      else
        m.reply "Nothing found for #{word}"
        return
      end
    end
    return true if justcheck
    entete = xml.match(/<div class="entete">(.*?)<\/div>/m)[1] rescue nil
    m.reply "#{head}: #{url} : #{entete.ircify_html rescue nil}"
    entries = xml.scan(/<span class="variante">(.*?)<\!--variante-->/m)
    hits = @bot.config['dict.hits']
    n = 0
    entries[0...hits].map { |ar|
      n += 1
      m.reply(("#{Bold}#{n}#{Bold} %s" % ar).ircify_html, :overlong => :truncate)
    }
  end

  def is_french?(word)
    return littre(nil, :word => word, :justcheck => true)
  end

end

plugin = DictPlugin.new
plugin.map 'demauro :word', :action => 'demauro', :thread => true
plugin.map 'oxford *word', :action => 'oxford', :thread => true
plugin.map 'chambers *word', :action => 'chambers', :thread => true
plugin.map 'littre *word', :action => 'littre', :thread => true