summaryrefslogtreecommitdiff
path: root/data/rbot/plugins/dict.rb
blob: 098d4ebb639425a5797c6dcd174a60ee4a09e0a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#-- vim:sw=2:et
#++
#
# :title: Dictionary lookup plugin for rbot
#
# Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
# Copyright:: (C) 2006-2007 Giuseppe Bilotta
# License:: GPL v2
#
# Provides a link to the definition of a word in one of the supported
# dictionaries. Currently available are
#   * the Oxford dictionary for (British) English
#   * the De Mauro/Paravia dictionary for Italian
#   * the Chambers dictionary for English (accepts both US and UK)
#   * the Littré dictionary for French
#
# Other plugins can use this one to check if a given word is valid in italian
# or english or french by using the is_italian?, is_british?, is_english?,
# is_french? methods
#
# TODO: cache results and reuse them if get_cached returns a cache copy

DEMAURO_LEMMA = /<anchor>(.*?)(?: - (.*?))<go href="lemma.php\?ID=(\d+)"\/><\/anchor>/
CHAMBERS_LEMMA = /<p><span class="hwd">(.*?)<\/span> <span class="psa">(.*?)<\/span>(.*?)<\/p>/

class DictPlugin < Plugin
  Config.register Config::IntegerValue.new('dict.hits',
    :default => 3,
    :desc => "Number of hits to return from a dictionary lookup")
  Config.register Config::IntegerValue.new('dict.first_par',
    :default => 0,
    :desc => "When set to n > 0, the bot will return the first paragraph from the first n dictionary hits")

  def demauro_filter(s)
    # check if it's a page we can handle
    loc = Utils.check_location(s, @dmurlrx)
    # the location might be not good, but we might still be able to handle the
    # page
    if !loc and s[:text] !~ /<!-- Il dizionario della lingua italiana Paravia: /
      debug "not our business"
      return
    end
    # we want to grab the content from the WAP page, since it's in a much
    # cleaner HTML, so first try to get the word ID
    if s[:text] !~ %r{<li><a href="(\d+)" title="vai al lemma precedente" accesskey="p">lemma precedente</a></li>}
      return
    end
    id = $1.to_i + 1
    title = s[:text].ircify_html_title
    content = @bot.filter(:htmlinfo, URI.parse(@dmwaplemma % id))[:content]
    return {:title => title, :content => content.sub(/^\S+\s+-\s+/,'')}
  end

  def initialize
    super
    @dmurl = "http://www.demauroparavia.it/"
    @dmurlrx = %r{http://(?:www\.)?demauroparavia\.it/(\d+)}
    @dmwapurl = "http://wap.demauroparavia.it/index.php?lemma=%s"
    @dmwaplemma = "http://wap.demauroparavia.it/lemma.php?ID=%s"
    @oxurl = "http://www.askoxford.com/concise_oed/%s"
    @chambersurl = "http://www.chambersharrap.co.uk/chambers/features/chref/chref.py/main?query=%s&title=21st"
    @littreurl = "http://francois.gannaz.free.fr/Littre/xmlittre.php?requete=%s"

    @bot.register_filter(:demauro, :htmlinfo) { |s| demauro_filter(s) }
  end


  def help(plugin, topic="")
    case topic
    when "demauro"
      return "demauro <word> => provides a link to the definition of <word> from the De Mauro/Paravia dictionary"
    when "oxford"
      return "oxford <word> => provides a link to the definition of <word> (it can also be an expression) from the Concise Oxford dictionary"
    when "chambers"
      return "chambers <word> => provides a link to the definition of <word> (it can also be an expression) from the Chambers 21st Century Dictionary"
    when "littre"
      return "littre <word> => provides a link to the definition of <word> (it can also be an expression) from the Littré online dictionary"
    end
    return "<dictionary> <word>: check for <word> on <dictionary> where <dictionary> can be one of: demauro, oxford, chambers, littre"
  end

  def demauro(m, params)
    justcheck = params[:justcheck]

    word = params[:word].downcase
    url = @dmwapurl % CGI.escape(word)
    xml = nil
    info = @bot.httputil.get_response(url) rescue nil
    xml = info.body if info
    if xml.nil?
      info = info ? " (#{info.code} - #{info.message})" : ""
      return false if justcheck
      m.reply "An error occurred while looking for #{word}#{info}"
      return
    end
    if xml=~ /Non ho trovato occorrenze per/
      return false if justcheck
      m.reply "Nothing found for #{word}"
      return
    end
    entries = xml.scan(DEMAURO_LEMMA)
    text = word
    urls = []
    if not entries.transpose.first.grep(/\b#{word}\b/)
      return false if justcheck
      text += " not found. Similar words"
    end
    return true if justcheck
    text += ": "
    n = 0
    hits = @bot.config['dict.hits']
    text += entries[0...hits].map { |ar|
      n += 1
      urls << @dmwaplemma % ar[2]
      "#{n}. #{Bold}#{ar[0]}#{Bold} - #{ar[1].gsub(/<\/?em>/,'')}: #{@dmurl}#{ar[2]}"
    }.join(" | ")
    m.reply text

    first_pars = @bot.config['dict.first_par']

    return unless first_pars > 0

    Utils.get_first_pars urls, first_pars, :message => m,
      :strip => /^.+?\s+-\s+/

  end

  def is_italian?(word)
    return demauro(nil, :word => word, :justcheck => true)
  end


  def oxford(m, params)
    justcheck = params[:justcheck]

    word = params[:word].join
    [word, word + "_1"].each { |check|
      url = @oxurl % CGI.escape(check)
      if params[:british]
        url << "?view=uk"
      end
      h = @bot.httputil.get(url, :max_redir => 5)
      if h and h.match(/<h2>#{word}<\/h2>(.*)Perform/m)
        m.reply("#{word} : #{url}") unless justcheck
        defn = $1
        m.reply("#{Bold}%s#{Bold}: %s" % [word, defn.ircify_html(:nbsp => :space)], :overlong => :truncate)
        return true
      end
    }
    return false if justcheck
    m.reply "#{word} not found"
  end

  def is_british?(word)
    return oxford(nil, :word => word, :justcheck => true, :british => true)
  end


  def chambers(m, params)
    justcheck = params[:justcheck]

    word = params[:word].to_s.downcase
    url = @chambersurl % CGI.escape(word)
    xml = nil
    info = @bot.httputil.get_response(url) rescue nil
    xml = info.body if info
    case xml
    when nil
      info = info ? " (#{info.code} - #{info.message})" : ""
      return false if justcheck
      m.reply "An error occurred while looking for #{word}#{info}"
      return
    when /Sorry, no entries for <b>.*?<\/b> were found./
      return false if justcheck
      m.reply "Nothing found for #{word}"
      return
    when /No exact matches for <b>.*?<\/b>, but the following may be helpful./
      return false if justcheck
      m.reply "Nothing found for #{word}, but see #{url} for possible suggestions"
      return
    end
    # Else, we have a hit
    return true if justcheck
    m.reply "#{word}: #{url}"
    entries = xml.scan(CHAMBERS_LEMMA)
    hits = @bot.config['dict.hits']
    entries[0...hits].map { |ar|
      m.reply(("#{Bold}%s#{Bold} #{Underline}%s#{Underline}%s" % ar).ircify_html, :overlong => :truncate)
    }
  end

  def is_english?(word)
    return chambers(nil, :word => word, :justcheck => true)
  end

  def littre(m, params)
    justcheck = params[:justcheck]

    word = params[:word].to_s.downcase
    url = @littreurl % CGI.escape(word)
    xml = nil
    info = @bot.httputil.get_response(url) rescue nil
    xml = info.body if info
    head ||= xml.match(/<div class="entree">(.*?)<\/div>/)[1] rescue nil
    case xml
    when nil
      info = info ? " (#{info.code} - #{info.message})" : ""
      return false if justcheck
      m.reply "An error occurred while looking for #{word}#{info}"
      return
    when /Erreur : le mot <STRONG>.*?<\/STRONG> n'a pas./
      return false if justcheck
      if head
        m.reply "Nothing found for #{word}, I'll assume you meant #{head}"
      else
        m.reply "Nothing found for #{word}"
        return
      end
    end
    return true if justcheck
    entete = xml.match(/<div class="entete">(.*?)<\/div>/m)[1] rescue nil
    m.reply "#{head}: #{url} : #{entete.ircify_html rescue nil}"
    entries = xml.scan(/<span class="variante">(.*?)<\!--variante-->/m)
    hits = @bot.config['dict.hits']
    n = 0
    entries[0...hits].map { |ar|
      n += 1
      m.reply(("#{Bold}#{n}#{Bold} %s" % ar).ircify_html, :overlong => :truncate)
    }
  end

  def is_french?(word)
    return littre(nil, :word => word, :justcheck => true)
  end

end

plugin = DictPlugin.new
plugin.map 'demauro :word', :action => 'demauro', :thread => true
plugin.map 'oxford *word', :action => 'oxford', :thread => true
plugin.map 'chambers *word', :action => 'chambers', :thread => true
plugin.map 'littre *word', :action => 'littre', :thread => true