diff options
Diffstat (limited to 'data')
-rw-r--r-- | data/rbot/plugins/translator.rb | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/data/rbot/plugins/translator.rb b/data/rbot/plugins/translator.rb new file mode 100644 index 00000000..ea1b8988 --- /dev/null +++ b/data/rbot/plugins/translator.rb @@ -0,0 +1,285 @@ +#-- vim:sw=2:et +#++ +# +# :title: Translator plugin for rbot +# +# Author:: Yaohan Chen <yaohan.chen@gmail.com> +# Copyright:: (C) 2007 Yaohan Chen +# License:: GPLv2 +# +# This plugin allows using rbot to translate text on a few translation services + +require 'set' +require 'timeout' + +# base class for implementing a translation service +# = Attributes +# direction:: supported translation directions, a hash where each key is a source +# language name, and each value is Set of target language names. The +# methods in the Direction module are convenient for initializing this +# attribute +class Translator + + class UnsupportedDirectionError < ArgumentError + end + + attr_reader :directions, :cache + + def initialize(directions, cache={}) + @directions = directions + @cache = cache + end + + + # whether the translator supports this direction + def support?(from, to) + from != to && @directions[from].include?(to) + end + + # this implements checking of languages and caching. subclasses should define the + # do_translate method to implement actual translation + def translate(text, from, to) + raise UnsupportedDirectionError unless support?(from, to) + @cache[[text, from, to]] ||= do_translate(text, from, to) + end + + module Direction + # given the set of supported languages, return a hash suitable for the directions + # attribute which includes any language to any other language + def self.all_to_all(languages) + directions = all_to_all(languages) + languages.each {|l| directions[l] = languages.to_set} + directions + end + + # a hash suitable for the directions attribute which includes any language from/to + # the given set of languages (center_languages) + def self.all_from_to(languages, center_languages) + directions = all_to_none(languages) + center_languages.each {|l| directions[l] = languages - [l]} + (languages - center_languages).each {|l| directions[l] = center_languages.to_set} + directions + end + + # get a hash from a list of pairs + def self.pairs(list_of_pairs) + languages = list_of_pairs.flatten.to_set + directions = all_to_none(languages) + list_of_pairs.each do |(from, to)| + directions[from] << to + end + directions + end + + # an empty hash with empty sets as default values + def self.all_to_none(languages) + Hash.new do |h, k| + # always return empty set when the key is non-existent, but put empty set in the + # hash only if the key is one of the languages + if languages.include? k + h[k] = Set.new + else + Set.new + end + end + end + end +end + + +class NiftyTranslator < Translator + def initialize(cache={}) + require 'mechanize' + super(Translator::Direction.all_from_to(%w[ja en zh_CN ko], %w[ja]), cache) + @form = WWW::Mechanize.new. + get('http://nifty.amikai.com/amitext/indexUTF8.jsp'). + forms.name('translateForm').first + end + + def do_translate(text, from, to) + @form.radiobuttons.name('langpair').value = "#{from},#{to}".upcase + @form.fields.name('sourceText').value = text + + @form.submit(@form.buttons.name('translate')). + forms.name('translateForm').fields.name('translatedText').value + end +end + + +class ExciteTranslator < Translator + + def initialize(cache={}) + require 'mechanize' + require 'iconv' + + super(Translator::Direction.all_from_to(%w[ja en zh_CN zh_TW ko], %w[ja]), cache) + + @forms = Hash.new do |h, k| + case k + when 'en' + h[k] = open_form('english') + when 'zh_CN', 'zh_TW' + # this way we don't need to fetch the same page twice + h['zh_CN'] = h['zh_TW'] = open_form('chinese') + when 'ko' + h[k] = open_form('korean') + end + end + end + + def open_form(name) + WWW::Mechanize.new.get("http://www.excite.co.jp/world/#{name}"). + forms.name('world').first + end + + def do_translate(text, from, to) + non_ja_language = from != 'ja' ? from : to + form = @forms[non_ja_language] + + if non_ja_language =~ /zh_(CN|TW)/ + form.fields.name('wb_lp').value = "#{from}#{to}".sub(/_(?:CN|TW)/, '').upcase + form.fields.name('big5').value = ($1 == 'TW' ? 'yes' : 'no') + else + # the en<->ja page is in Shift_JIS while other pages are UTF-8 + text = Iconv.iconv('Shift_JIS', 'UTF-8', text) if non_ja_language == 'en' + form.fields.name('wb_lp').value = "#{from}#{to}".upcase + end + form.fields.name('before').value = text + result = form.submit.forms.name('world').fields.name('after').value + # the en<->ja page is in Shift_JIS while other pages are UTF-8 + if non_ja_language == 'en' + Iconv.iconv('UTF-8', 'Shift_JIS', result) + else + result + end + + end +end + + +class GoogleTranslator < Translator + def initialize(cache={}) + require 'mechanize' + load_form! + language_pairs = @lang_list.options.map do |o| + # these options have values like "en|zh-CN"; map to things like ['en', 'zh_CN']. + o.value.split('|').map {|l| l.sub('-', '_')} + end + super(Translator::Direction.pairs(language_pairs), cache) + end + + def load_form! + agent = WWW::Mechanize.new + # without faking the user agent, Google Translate will serve non-UTF-8 text + agent.user_agent_alias = 'Linux Konqueror' + @form = agent.get('http://www.google.com/translate_t'). + forms.action('/translate_t').first + @lang_list = @form.fields.name('langpair') + end + + def do_translate(text, from, to) + load_form! + + @lang_list.value = "#{from}|#{to}".sub('_', '-') + @form.fields.name('text').value = text + @form.submit.parser.search('div#result_box').inner_html + end +end + + +class BabelfishTranslator < Translator + def initialize(cache) + require 'mechanize' + + @form = WWW::Mechanize.new.get('http://babelfish.altavista.com/babelfish/'). + forms.name('frmTrText').first + @lang_list = @form.fields.name('lp') + language_pairs = @lang_list.options.map {|o| o.value.split('_')}. + reject {|p| p.empty?} + super(Translator::Direction.pairs(language_pairs), cache) + end + + def translate(text, from, to) + super + if @form.fields.name('trtext').empty? + @form.add_field!('trtext', text) + else + @form.fields.name('trtext').value = text + end + @lang_list.value = "#{from}_#{to}" + @form.submit.parser.search("td.s/div[@style]").inner_html + end +end + +class TranslatorPlugin < Plugin + BotConfig.register BotConfigIntegerValue.new('translate.timeout', + :default => 30, :validate => Proc.new{|v| v > 0}, + :desc => _("Number of seconds to wait for the translation service before timeout")) + + def initialize + super + translator_classes = { + 'nifty' => NiftyTranslator, + 'excite' => ExciteTranslator, + 'google_translate' => GoogleTranslator, + 'babelfish' => BabelfishTranslator + } + + @translators = {} + + translator_classes.each_pair do |name, c| + begin + @translators[name] = c.new(@registry.sub_registry(name)) + map "#{name} :from :to *phrase", :action => :cmd_translate + rescue + warning _("Translator %{name} cannot be used: %{reason}") % + {:name => name, :reason => $!} + end + end + end + + def help(plugin, topic=nil) + if @translators.has_key?(topic) + _('Supported directions of translation for %{translator}: %{directions}') % { + :translator => topic, + :directions => @translators[topic].directions.map do |source, targets| + _('%{source} -> %{targets}') % + {:source => source, :targets => targets.to_a.join(', ')} + end.join(' | ') + } + else + _('Command: <translator> <from> <to> <phrase>, where <translator> is one of: %{translators}. Use help <translator> to look up supported from and to languages') % + {:translators => @translators.keys.join(', ')} + end + end + + def cmd_translate(m, params) + # get the first word of the command + translator = @translators[m.message[/\A(\w+)\s/, 1]] + from, to, phrase = params[:from], params[:to], params[:phrase].to_s + if translator + begin + if translator.support?(from, to) + translation = Timeout.timeout(@bot.config['translate.timeout']) do + translator.translate(phrase, from, to) + end + if translation.empty? + m.reply _('No translation returned') + else + m.reply translation + end + else + m.reply _("%{translator} doesn't support translating from %{source} to %{target}") % + {:source => from, :target => to} + end + rescue Timeout::Error + m.reply _('The translator timed out') + end + else + m.reply _('No translator called %{name}') % {:name => translator} + end + end +end + +plugin = TranslatorPlugin.new + |