diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-03-17 01:49:13 +0100 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-03-17 01:49:13 +0100 |
commit | e974d55bd3acfabde77f4e19b305d5445ea6df90 (patch) | |
tree | a5f43cb1c610fb16624396a30ba8a81daa6413be /data | |
parent | 9f0d842ed98fd6fceb4ccb505d42441231528827 (diff) |
dict plugin: provide htmlinfo filter for demauro
Diffstat (limited to 'data')
-rw-r--r-- | data/rbot/plugins/dict.rb | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/data/rbot/plugins/dict.rb b/data/rbot/plugins/dict.rb index 5176591a..edadd3c7 100644 --- a/data/rbot/plugins/dict.rb +++ b/data/rbot/plugins/dict.rb @@ -31,14 +31,37 @@ class DictPlugin < Plugin :default => 0, :desc => "When set to n > 0, the bot will return the first paragraph from the first n dictionary hits") + def demauro_filter(s) + # check if it's a page we can handle + loc = Utils.check_location(s, @dmurlrx) + # the location might be not good, but we might still be able to handle the + # page + if !loc and s[:text] !~ /<!-- Il dizionario della lingua italiana Paravia: / + debug "not our business" + return + end + # we want to grab the content from the WAP page, since it's in a much + # cleaner HTML, so first try to get the word ID + if s[:text] !~ %r{<li><a href="(\d+)" title="vai al lemma precedente" accesskey="p">lemma precedente</a></li>} + return + end + id = $1.to_i + 1 + title = s[:text].ircify_html_title + content = @bot.filter(:htmlinfo, URI.parse(@dmwaplemma % id))[:content] + return {:title => title, :content => content.sub(/^\S+\s+-\s+/,'')} + end + def initialize super @dmurl = "http://www.demauroparavia.it/" + @dmurlrx = %r{http://(?:www\.)?demauroparavia\.it/(\d+)} @dmwapurl = "http://wap.demauroparavia.it/index.php?lemma=%s" @dmwaplemma = "http://wap.demauroparavia.it/lemma.php?ID=%s" @oxurl = "http://www.askoxford.com/concise_oed/%s" @chambersurl = "http://www.chambersharrap.co.uk/chambers/features/chref/chref.py/main?query=%s&title=21st" @littreurl = "http://francois.gannaz.free.fr/Littre/xmlittre.php?requete=%s" + + @bot.register_filter(:demauro, :htmlinfo) { |s| demauro_filter(s) } end |