mirror of
https://github.com/Oreolek/Togataltu.git
synced 2024-05-30 06:48:07 +03:00
Initial commit -- partially working
This commit is contained in:
commit
6058c05e18
20
console.rb
Executable file
20
console.rb
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env ruby
|
||||
#encoding: utf-8
|
||||
#Консольный интерфейс. Не правда ли, намного проще? :-)
|
||||
$rlyDEBUG = true
|
||||
$:.unshift File.dirname(__FILE__) #добавить текущую директорию к $LOAD_FILE - чтобы не вызывать ruby -I.
|
||||
require "source.rb"
|
||||
class Log
|
||||
def << (string) puts string end
|
||||
end
|
||||
@log = Log.new;
|
||||
#$source = Source.new("Dankon pro la mondkreintoj\nkiuj per heroestimo\ndonos al ni pliegigon\nlaux verkitaj plimensiloj.",@log)
|
||||
$source = Source.new("I ásked no óther thing,\nNo óther was deníed.\nI óffered Béing for it;\nThe míghty mérchant smíled.",@log)#Эмили Дикинсон
|
||||
#заметьте: в слове denied ударение на второй слог, в слове offered — на первый
|
||||
#$source.find_rhymes()
|
||||
$source.replace()
|
||||
result = $source.translate()
|
||||
if result != false then
|
||||
$source.arrange()
|
||||
@log << $source.print()
|
||||
end
|
120
main.rb
Executable file
120
main.rb
Executable file
|
@ -0,0 +1,120 @@
|
|||
#!/usr/bin/env ruby
|
||||
#encoding: utf-8
|
||||
begin
|
||||
require "wx"
|
||||
rescue LoadError
|
||||
require "rubygems"
|
||||
require "wx"
|
||||
end
|
||||
$:.unshift File.dirname(__FILE__)
|
||||
require "source.rb"
|
||||
include Wx
|
||||
$rlyDEBUG = false
|
||||
class MainFrame < Frame
|
||||
def initialize(title)
|
||||
super( nil, :title => title, :size => [400, 300] )
|
||||
menu = Wx::MenuBar.new
|
||||
file = Wx::Menu.new
|
||||
file.append( Wx::ID_OPEN, "&Открыть\tAlt-O", "Открыть файл источника" )
|
||||
file.append( Wx::ID_SAVE, "&Сохранить\tAlt-O", "Сохранить файл перевода" )
|
||||
file.append( Wx::ID_CLOSE, "&Закрыть\tAlt-C", "Закрыть файлы" )
|
||||
menu.append( file, "&Файл" )
|
||||
program = Wx::Menu.new
|
||||
program.append( Wx::ID_EXIT, "В&ыход\tAlt-X", "Выход" )
|
||||
program.append( Wx::ID_ABOUT, "&О программе...\tF1", "Показать информацию о программе" )
|
||||
menu.append( program, "&Программа" )
|
||||
self.menu_bar = menu
|
||||
evt_menu( Wx::ID_EXIT, :on_quit )
|
||||
evt_menu( Wx::ID_ABOUT, :on_about )
|
||||
evt_menu( Wx::ID_OPEN, :on_open )
|
||||
evt_menu( Wx::ID_SAVE, :on_save )
|
||||
evt_menu( Wx::ID_CLOSE, :on_close )
|
||||
button_start = Wx::Button.new(self, -1, 'Начать перевод')
|
||||
@button_pause = Wx::Button.new(self, -1, 'Приостановить перевод')
|
||||
#StaticText.new(self,300,"Hello World")
|
||||
@original = Wx::TextCtrl.new(self, -1, 'Здесь быть оригиналу',Wx::DEFAULT_POSITION, Wx::DEFAULT_SIZE,Wx::TE_MULTILINE);
|
||||
@translation = Wx::TextCtrl.new(self, -1, 'Здесь быть переводу',Wx::DEFAULT_POSITION, Wx::DEFAULT_SIZE,Wx::TE_MULTILINE);
|
||||
@log = Wx::TextCtrl.new(self, -1, 'Здесь будет лог',Wx::DEFAULT_POSITION, Wx::DEFAULT_SIZE,Wx::TE_MULTILINE);
|
||||
sizer_textctrls = BoxSizer.new(Wx::HORIZONTAL)
|
||||
sizer_textctrls.add(@original, 1, Wx::GROW|Wx::ALL, 2)
|
||||
sizer_textctrls.add(@translation, 1, Wx::GROW|Wx::ALL, 2)
|
||||
sizer_buttons = BoxSizer.new(Wx::HORIZONTAL)
|
||||
sizer_buttons.add(button_start, 0, Wx::ALL|Wx::ALIGN_CENTER_HORIZONTAL, 2)
|
||||
#sizer_buttons.add(@button_pause, 0, Wx::ALL|Wx::ALIGN_CENTER_HORIZONTAL, 2) #не работает, значит, не показываем
|
||||
sizer = BoxSizer.new(Wx::VERTICAL)
|
||||
sizer.add(sizer_textctrls, 1, Wx::GROW|Wx::ALL, 2)
|
||||
sizer.add(sizer_buttons, 0, Wx::ALL|Wx::ALIGN_CENTER_HORIZONTAL, 2)
|
||||
sizer.add(@log, 0, Wx::GROW|Wx::ALL, 2)
|
||||
self.set_sizer(sizer)
|
||||
evt_button(button_start.get_id, :on_start)
|
||||
evt_button(@button_pause.get_id, :on_pause)
|
||||
if ($rlyDEBUG) then on_start() end
|
||||
@opendialog = Wx::FileDialog.new(nil,"Открыть файл")
|
||||
@savedialog = Wx::FileDialog.new(nil,"Открыть файл",:style => Wx::FD_SAVE | Wx::FD_OVERWRITE_PROMPT)
|
||||
end
|
||||
def on_quit
|
||||
close
|
||||
end
|
||||
def on_start
|
||||
@original.clear();
|
||||
#@original<<"Dankon pro la mondkreintoj\nkiuj per heroestimo\ndonos al ni pliegigon\nlaux verkitaj plimensiloj.";
|
||||
@original<<"I ásked no óther thing,\nNo óther was deníed.\nI óffered Béing for it;\nThe míghty mérchant smíled.";
|
||||
$source = Source.new(@original.value,@log)
|
||||
@log.clear();
|
||||
$source.replace()
|
||||
$source.translate()
|
||||
$source.arrange()
|
||||
@translation.clear()
|
||||
@translation << $source.print()
|
||||
end
|
||||
def on_pause
|
||||
if Program.paused == false
|
||||
Program.paused = true
|
||||
@log << "\nПеревод приостановлен."
|
||||
@button_pause.set_label('Продолжить перевод')
|
||||
else
|
||||
Program.paused = false
|
||||
@log << "\nПеревод возобновлён."
|
||||
@button_pause.set_label('Приостановить перевод')
|
||||
end
|
||||
end
|
||||
def on_about
|
||||
Wx::about_box(
|
||||
:name => self.title,
|
||||
:version => "1.1",
|
||||
:description => "Автор - Александр Яковлев a.k.a. Oreolek"
|
||||
)
|
||||
end
|
||||
def on_open
|
||||
if @opendialog.show_modal == Wx::ID_OK
|
||||
@original.load_file(@opendialog.filename)
|
||||
@log << "\nОткрыт файл: #{@opendialog.filename}"
|
||||
end
|
||||
end
|
||||
def on_close
|
||||
@original.clear()
|
||||
@translation.clear()
|
||||
Program.paused = false
|
||||
@log << "\nФайл закрыт."
|
||||
end
|
||||
def on_save
|
||||
if @savedialog.show_modal == Wx::ID_OK
|
||||
@translation.save_file(@savedialog.filename)
|
||||
@log << "\nФайл перевода сохранён в #{@savedialog.filename}."
|
||||
end
|
||||
end
|
||||
end
|
||||
class Translate < App
|
||||
def on_init
|
||||
@frame = MainFrame.new("Переводчик стихов");
|
||||
if (!$rlyDEBUG) then @frame.show() end #для отладки не стоит показывать GUI, достаточно вызывать функции
|
||||
end
|
||||
end
|
||||
class Options
|
||||
attr_accessor :paused
|
||||
def initialize
|
||||
@paused = false
|
||||
end
|
||||
end
|
||||
Program = Options.new
|
||||
Translate.new.main_loop
|
19
morphology.rb
Normal file
19
morphology.rb
Normal file
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/env ruby
|
||||
#encoding: utf-8
|
||||
require 'net/http'
|
||||
require 'uri'
|
||||
class Morphology
|
||||
def initialize (log)
|
||||
@word = "".force_encoding("UTF-8")
|
||||
@log = log
|
||||
end
|
||||
def process(word)
|
||||
#res = Net::HTTP.post_form( URI.parse('http://www.morphology.ru/'), { "word" => URI.escape("#{word}")})
|
||||
#returning = res.body.scan(/<li>(\w+)<\/li>/)
|
||||
#if returning.empty? then return false end
|
||||
#return returning
|
||||
res = %x[php phpmorphy/cli.php "#{word}"]
|
||||
if res.match("Error") then return false end
|
||||
return res.split(", ")
|
||||
end
|
||||
end
|
388
source.rb
Normal file
388
source.rb
Normal file
|
@ -0,0 +1,388 @@
|
|||
#!/usr/bin/env ruby
|
||||
#encoding: utf-8
|
||||
require "translator.rb"
|
||||
require "morphology.rb"
|
||||
class Source
|
||||
def initialize (text,log)
|
||||
@text = text.encode("UTF-8")
|
||||
@log = log
|
||||
@translation = ""
|
||||
@pattern = ""
|
||||
@rhymed = Array.new
|
||||
@@translated_words = Hash.new
|
||||
@@forms = Hash.new
|
||||
@text.each_line do |line|
|
||||
line.downcase.split.each do |word|
|
||||
vowels = word.count("aeioyuáéíóúý´")
|
||||
if (vowels==1) then #в односложных словах всё тривиально
|
||||
@pattern << "! "
|
||||
unless word.match(/[áéíóúý´]/) then #знак ударения не проставлен, но гласная единственная — проставляем
|
||||
{"a" => "á","e" => "é","i" => "í","u" => "ú","y" => "ý"}.each do |key, value| word.gsub!(key,value) end
|
||||
end #end unless
|
||||
next
|
||||
end #if vowels == 1
|
||||
word.each_char do |char|
|
||||
if (char.match(/[áéíóúý´]/)) then @pattern = @pattern+"!"
|
||||
elsif (char.match(/[aeioyu]/)) then @pattern = @pattern+"-"
|
||||
end
|
||||
end #each char in word
|
||||
@pattern = @pattern + " "
|
||||
end #each word in line
|
||||
@pattern = @pattern + "\n"
|
||||
end #@text.each_line
|
||||
end
|
||||
def find_rhymes()
|
||||
last_words = Array.new
|
||||
@text.each_line do |line|
|
||||
last_words << line[/([abcdefghijklmnopqrstuvwxyzáéíóúý]+\s[abcdefghijklmnopqrstuvwxyzáéíóúý]+)\W*$/, 1] #массив двух последних слов строк
|
||||
end
|
||||
last_words.each_with_index do |word, word_index|
|
||||
current_vowels = word.match(/([áéíóúý]).*([aeioy])*/)
|
||||
last_words.each_with_index do |word2,word2_index|
|
||||
vowels = word2.scan(/([áéíóúý]).*([aeioy])*/)
|
||||
if vowels == current_vowels then
|
||||
temp = word.gsub(/.*[áéíóúý]/,"")
|
||||
end
|
||||
@rhymed.push(""+word_index.to_s+" "+word2_index.to_s+vowels.length.to_s+temp.length.to_s)
|
||||
end
|
||||
end
|
||||
@log << @rhymed.to_s
|
||||
end
|
||||
def replace()
|
||||
phrases = {
|
||||
"l'" => "la",
|
||||
"o´" => "ó",
|
||||
"a´" => "á",
|
||||
"e´" => "é",
|
||||
"i´" => "í",
|
||||
"u´" => "ú",
|
||||
"y´" => "ý"
|
||||
}
|
||||
phrases.each do |key, value|
|
||||
@text.gsub!(/#{key}/,value)
|
||||
end
|
||||
end
|
||||
def translate()
|
||||
translator = Translator.new(@log);
|
||||
@text.split.each do |word|
|
||||
if @@translated_words[word].nil? then
|
||||
word_translation = translator.process(word)
|
||||
if word_translation == true then next end
|
||||
if word_translation != false then
|
||||
@@translated_words[word] = word_translation#.force_encoding("UTF-8")
|
||||
else
|
||||
@log << "Перевод не удался. Прекращение работы."
|
||||
return false
|
||||
end
|
||||
end
|
||||
#@log << "Полученный перевод: "+@@translated_words[word]
|
||||
end
|
||||
end
|
||||
def arrange()
|
||||
morphology = Morphology.new(@log);
|
||||
@text.downcase.split.each do |word|
|
||||
vowels = word.count("аеиоуыэюяё")# - word.length/2 #считаем слоги; Ruby считает побайтово, поэтому приходится его поправлять (thx source777)
|
||||
#fixed with ruby1.9
|
||||
met_vowels = 0
|
||||
piece = "" #текущий слог
|
||||
index = 0
|
||||
while (met_vowels <= vowels and word[index]) do
|
||||
if word[index] =~ /[аеиоуыэюяё]/ then
|
||||
met_vowels = met_vowels+1
|
||||
if piece.match("аеиоуыэюяё") then #на один слог может быть только одна гласная
|
||||
check_piece(piece)
|
||||
piece = word[index]
|
||||
end
|
||||
end
|
||||
piece = piece + word[index].to_s
|
||||
index = index + 1
|
||||
end
|
||||
end
|
||||
@log << "Паттерн:\n"
|
||||
@log << @pattern
|
||||
##############################################зд. надо взять каждое слово и получить его словоформы, а также проставить каждой словоформе ударение
|
||||
##############################################затем устроить Большой Перебор по паттерну
|
||||
@text.each_line do |line|
|
||||
line.split.each do |word|
|
||||
next if @@translated_words[word].nil?
|
||||
@pattern.split.each do |word_pattern|
|
||||
piece = ""
|
||||
result = ""
|
||||
@@translated_words[word].to_s.each_char do |char|
|
||||
if char =~ /[аеиоуыэюяё]/ then
|
||||
if piece =~ /[аеиоуыэюяё]/ then
|
||||
result << check_piece(piece)
|
||||
piece = char
|
||||
end
|
||||
end
|
||||
piece << char
|
||||
end #@@translated_words[word].to_s.each_char
|
||||
if (word_pattern == result) then
|
||||
word = @@translated_words[word]
|
||||
else
|
||||
@@forms[word] = morphology.process(@@translated_words[word])
|
||||
if not @@forms[word] == false
|
||||
changed = false;
|
||||
@@forms[word].each do |form|
|
||||
piece = ""
|
||||
result = ""
|
||||
form.each_char do |char|
|
||||
if char =~ /[аеиоуыэюяё]/ then
|
||||
if piece =~ /[аеиоуыэюяё]/ then
|
||||
result << check_piece(piece)
|
||||
piece = char
|
||||
end
|
||||
end
|
||||
piece << char
|
||||
end
|
||||
if (word_pattern == result) then
|
||||
word = form
|
||||
changed = true
|
||||
end
|
||||
end
|
||||
if not changed then word = @@forms[word][rand(@@forms[word].size)] end #если форма не подобрана, ставим наугад — чтобы не потерять слово
|
||||
else #формы не получены, выбирать не из чего
|
||||
word = @@translated_words[word]
|
||||
end
|
||||
end #if (word_pattern == result)
|
||||
@translation << word.to_s
|
||||
end #@text.split.each.to_s do |word|
|
||||
@translation << " "
|
||||
end
|
||||
@translation << "\n"
|
||||
end
|
||||
end
|
||||
#проверка русских слогов на ударение
|
||||
def check_piece(piece)
|
||||
probability_acute_sec={
|
||||
"все"=>0.14,
|
||||
"че"=>0.0675,
|
||||
"ква"=>0.0375,
|
||||
"ме"=>0.0275,
|
||||
"ми"=>0.025,
|
||||
"ви"=>0.0175,
|
||||
"ак"=>0.015,
|
||||
"сле"=>0.01,
|
||||
"на"=>0.01,
|
||||
"ки"=>0.01,
|
||||
"не"=>0.01,
|
||||
"ра"=>0.01,
|
||||
}
|
||||
probability_acute={
|
||||
"ве"=>0.0255724374063771,
|
||||
"по"=>0.0209715386261502,
|
||||
"го"=>0.0206505456879949,
|
||||
"вы"=>0.017547613952493,
|
||||
"до"=>0.0117697410656966,
|
||||
"са"=>0.0114487481275412,
|
||||
"во"=>0.0111277551893858,
|
||||
"сто"=>0.0106997646051787,
|
||||
"ма"=>0.00802482345388401,
|
||||
"ко"=>0.00727583993152151,
|
||||
"ду"=>0.00706184463941793,
|
||||
"те"=>0.00674085170126257,
|
||||
"ме"=>0.006526856409159,
|
||||
"бо"=>0.006526856409159,
|
||||
"на"=>0.00641985876310721,
|
||||
"де"=>0.00588487053284828,
|
||||
"то"=>0.00588487053284828,
|
||||
"ра"=>0.00534988230258934,
|
||||
"за"=>0.00513588701048577,
|
||||
"мо"=>0.00513588701048577,
|
||||
"ка"=>0.00513588701048577,
|
||||
"це"=>0.00502888936443398,
|
||||
"па"=>0.00502888936443398,
|
||||
"ли"=>0.00481489407233041,
|
||||
"сте"=>0.00449390113417505,
|
||||
"ви"=>0.00449390113417505,
|
||||
"пра"=>0.00449390113417505,
|
||||
"пе"=>0.00449390113417505,
|
||||
"сло"=>0.00438690348812326,
|
||||
"про"=>0.00427990584207147,
|
||||
"но"=>0.0040659105499679,
|
||||
"ре"=>0.00395891290391611,
|
||||
"гла"=>0.00374491761181254,
|
||||
"ла"=>0.00363791996576075,
|
||||
"су"=>0.00363791996576075,
|
||||
"ле"=>0.00363791996576075,
|
||||
"зе"=>0.00363791996576075,
|
||||
"стра"=>0.00363791996576075,
|
||||
"со"=>0.00353092231970897,
|
||||
"зна"=>0.00353092231970897,
|
||||
"пи"=>0.00353092231970897,
|
||||
"се"=>0.00342392467365718,
|
||||
"тре"=>0.00342392467365718,
|
||||
"хо"=>0.00331692702760539,
|
||||
"при"=>0.00320992938155361,
|
||||
"ска"=>0.00320992938155361,
|
||||
"не"=>0.00320992938155361,
|
||||
"ро"=>0.00320992938155361,
|
||||
"чи"=>0.00299593408945003,
|
||||
"бе"=>0.00299593408945003,
|
||||
"ча"=>0.00299593408945003,
|
||||
"ва"=>0.00288893644339825,
|
||||
"кру"=>0.00267494115129467,
|
||||
"ми"=>0.00256794350524288,
|
||||
"пу"=>0.0024609458591911,
|
||||
"ты"=>0.00235394821313931,
|
||||
"ру"=>0.00213995292103574,
|
||||
"же"=>0.00213995292103574,
|
||||
"да"=>0.00203295527498395,
|
||||
"че"=>0.00203295527498395,
|
||||
"зме"=>0.00203295527498395,
|
||||
"ста"=>0.00192595762893216,
|
||||
"жа"=>0.00192595762893216,
|
||||
"ну"=>0.00192595762893216,
|
||||
"ку"=>0.00181895998288038,
|
||||
"ге"=>0.00181895998288038,
|
||||
"кра"=>0.00181895998288038,
|
||||
"си"=>0.00181895998288038,
|
||||
"тра"=>0.00181895998288038,
|
||||
"ба"=>0.00181895998288038,
|
||||
"ти"=>0.00181895998288038,
|
||||
"ха"=>0.00181895998288038,
|
||||
"гра"=>0.00171196233682859,
|
||||
"тру"=>0.00171196233682859,
|
||||
"та"=>0.0016049646907768,
|
||||
"бу"=>0.0016049646907768,
|
||||
"га"=>0.0016049646907768,
|
||||
"тро"=>0.0016049646907768,
|
||||
"чу"=>0.00149796704472502,
|
||||
"тю"=>0.00149796704472502,
|
||||
"хло"=>0.00149796704472502,
|
||||
"ни"=>0.00139096939867323,
|
||||
"му"=>0.00139096939867323,
|
||||
"ту"=>0.00139096939867323,
|
||||
"цве"=>0.00128397175262144,
|
||||
"ло"=>0.00128397175262144,
|
||||
"кла"=>0.00128397175262144,
|
||||
"зо"=>0.00128397175262144,
|
||||
"ке"=>0.00117697410656966,
|
||||
"фо"=>0.00106997646051787,
|
||||
"сме"=>0.00106997646051787,
|
||||
"мэ"=>0.00106997646051787,
|
||||
"ша"=>0.00106997646051787,
|
||||
"пла"=>0.00106997646051787,
|
||||
"све"=>0.00106997646051787,
|
||||
"ки"=>0.00106997646051787,
|
||||
}
|
||||
probability_no = {
|
||||
"от"=>0.0443044406056295,
|
||||
"дев"=>0.0436947464688548,
|
||||
"ятс"=>0.0409307997154761,
|
||||
"ов"=>0.0121938827354944,
|
||||
"ом"=>0.0120516207702469,
|
||||
"ат"=>0.0112386952545473,
|
||||
"цат"=>0.00945025912000813,
|
||||
"од"=>0.0088608881211259,
|
||||
"ал"=>0.00780408495071639,
|
||||
"ог"=>0.00766182298546896,
|
||||
"мин"=>0.00709277512447922,
|
||||
"ит"=>0.00646275784981201,
|
||||
"ан"=>0.00640178843613454,
|
||||
"ут"=>0.00597500254039224,
|
||||
"он"=>0.00534498526572503,
|
||||
"ор"=>0.00526369271415507,
|
||||
"ет"=>0.00481658368052027,
|
||||
"ен"=>0.00481658368052027,
|
||||
"ид"=>0.00449141347424042,
|
||||
"ок"=>0.00438979778477797,
|
||||
"ят"=>0.00434915150899299,
|
||||
"ер"=>0.00430850523320801,
|
||||
"ес"=>0.00414592013006808,
|
||||
"ин"=>0.00414592013006808,
|
||||
"ка"=>0.0041052738542831,
|
||||
"ни"=>0.00408495071639061,
|
||||
"ол"=>0.00400365816482065,
|
||||
"ил"=>0.00400365816482065,
|
||||
"ла"=>0.00363784168275582,
|
||||
"ый"=>0.00347525657961589,
|
||||
"ел"=>0.00339396402804593,
|
||||
"сто"=>0.00317040951122853,
|
||||
"ой"=>0.0030281475459811,
|
||||
"ос"=>0.00300782440808861,
|
||||
"ты"=>0.00292653185651865,
|
||||
"им"=>0.00284523930494868,
|
||||
"ев"=>0.00276394675337872,
|
||||
"сам"=>0.00272330047759374,
|
||||
"ик"=>0.00268265420180876,
|
||||
"пер"=>0.00268265420180876,
|
||||
"нач"=>0.00256071537445382,
|
||||
"ем"=>0.00254039223656133,
|
||||
"дес"=>0.00249974596077634,
|
||||
"ар"=>0.00235748399552891,
|
||||
"оп"=>0.00231683771974393,
|
||||
"ав"=>0.00229651458185144,
|
||||
"ам"=>0.00225586830606646,
|
||||
"ир"=>0.00225586830606646,
|
||||
"том"=>0.00213392947871151,
|
||||
"ак"=>0.00209328320292653,
|
||||
"ив"=>0.00207296006503404,
|
||||
"пол"=>0.00199166751346408,
|
||||
"об"=>0.00197134437557159,
|
||||
"чет"=>0.0019510212376791,
|
||||
"ед"=>0.00193069809978661,
|
||||
"тых"=>0.00186972868610914,
|
||||
"ис"=>0.00184940554821664,
|
||||
"ва"=>0.00178843613453917,
|
||||
"ад"=>0.00178843613453917,
|
||||
"ятн"=>0.00176811299664668,
|
||||
"ать"=>0.00174778985875419,
|
||||
"пят"=>0.00174778985875419,
|
||||
"ны"=>0.00164617416929174,
|
||||
"дор"=>0.00164617416929174,
|
||||
"сор"=>0.00162585103139925,
|
||||
"век"=>0.00160552789350676,
|
||||
"ли"=>0.00152423534193679,
|
||||
"ур"=>0.00152423534193679,
|
||||
"ах"=>0.0015039122040443,
|
||||
"ей"=>0.00144294279036683,
|
||||
"ич"=>0.00142261965247434,
|
||||
"ек"=>0.00134132710090438,
|
||||
"те"=>0.00134132710090438,
|
||||
"дом"=>0.00132100396301189,
|
||||
"ул"=>0.00132100396301189,
|
||||
"гор"=>0.0013006808251194,
|
||||
"етр"=>0.0013006808251194,
|
||||
"ян"=>0.00128035768722691,
|
||||
"та"=>0.00128035768722691,
|
||||
"ду"=>0.00128035768722691,
|
||||
"аб"=>0.00121938827354944,
|
||||
"ас"=>0.00121938827354944,
|
||||
"душ"=>0.00119906513565695,
|
||||
"на"=>0.00119906513565695,
|
||||
"ант"=>0.00119906513565695,
|
||||
"ма"=>0.00117874199776445,
|
||||
"сем"=>0.00115841885987196,
|
||||
"оз"=>0.00115841885987196,
|
||||
"две"=>0.00115841885987196,
|
||||
"ост"=>0.00115841885987196,
|
||||
"ры"=>0.00111777258408698,
|
||||
"за"=>0.00111777258408698,
|
||||
"ров"=>0.00111777258408698,
|
||||
"нац"=>0.00111777258408698,
|
||||
"ых"=>0.00109744944619449,
|
||||
"из"=>0.00109744944619449,
|
||||
"ком"=>0.00109744944619449,
|
||||
"ент"=>0.001077126308302,
|
||||
"ци"=>0.00105680317040951,
|
||||
"стран"=>0.00105680317040951,
|
||||
"тся"=>0.00103648003251702,
|
||||
"восм"=>0.00103648003251702,
|
||||
"гол"=>0.00103648003251702,
|
||||
"пар"=>0.00101615689462453,
|
||||
}
|
||||
if probability_acute_sec[piece].nil? then probability_acute_sec[piece] = 0 end
|
||||
if probability_acute[piece].nil? then probability_acute[piece] = 0 end
|
||||
if probability_no[piece].nil? then probability_no[piece] = 0 end
|
||||
if (probability_acute_sec[piece] + probability_acute[piece] - probability_no[piece]>0) then
|
||||
if (probability_acute_sec[piece] > probability_acute[piece]) then
|
||||
return 'П'
|
||||
else
|
||||
return 'У'
|
||||
end
|
||||
end
|
||||
return '-'
|
||||
end
|
||||
def print() return @translation end
|
||||
end
|
118
translator.rb
Normal file
118
translator.rb
Normal file
|
@ -0,0 +1,118 @@
|
|||
#!/usr/bin/env ruby
|
||||
#encoding: utf-8
|
||||
require 'net/http'
|
||||
$use_internet = true # Интернет пока что даёт _намного_ лучшие результаты
|
||||
class Translator
|
||||
def initialize (log)
|
||||
@translation="".force_encoding("UTF-8")
|
||||
@log = log
|
||||
end
|
||||
def process(word)
|
||||
if ($use_internet) then
|
||||
req = Net::HTTP::Get.new('http://mymemory.translated.net/api/get?q='+word+'&langpair=en|ru&of=tmx')
|
||||
begin
|
||||
res = Net::HTTP.start('mymemory.translated.net',80) {|http|http.request(req)}
|
||||
rescue SocketError,Timeout::Error
|
||||
@log << "Отсутствует соединение с Интернетом."
|
||||
$use_internet = false
|
||||
return process(word)
|
||||
end
|
||||
res.body.match(/RU.*<seg>(.*)<\/seg>/m)
|
||||
temp = $1
|
||||
# Если вернули больше одного слова — берём первое. Это чтобы целыми фразами не оперировать, транслятор может быть эпически туп.
|
||||
temp.gsub!(/&\w*;/,"")
|
||||
if temp.match(/\s/) then
|
||||
temp = temp.split(/\s/).at(0)
|
||||
end
|
||||
@translation = temp
|
||||
else
|
||||
# beginning = word[0..1].downcase
|
||||
# if beginning.length==1 then beginning = beginning+"-" end
|
||||
# start_from = 0
|
||||
# File.open("Mueller/Mueller.hash", File::RDONLY).each_line { |line|
|
||||
# if line =~ /^#{beginning}/ then
|
||||
# line.match(/^#{beginning}(\d+)/)
|
||||
# start_from = $1
|
||||
# break
|
||||
# end
|
||||
# }
|
||||
# word.downcase!
|
||||
#здесь ОЧЕНЬ выгодно было бы получать инфинитив слова
|
||||
accents = {
|
||||
"ó" => "o",
|
||||
"á" => "a",
|
||||
"´" => "",
|
||||
"é" => "e",
|
||||
"í" => "i",
|
||||
"ú" => "u",
|
||||
"ý" => "y",
|
||||
'ied\b' => "y", #да, хаки
|
||||
'led\b' => "le",#грязные
|
||||
'ed\b' => "",
|
||||
"," => "",
|
||||
'\.' => "",
|
||||
";" => "",
|
||||
'\bthe\b' => "",
|
||||
'\ba\b' => ""
|
||||
}
|
||||
accents.each do |key, value|
|
||||
word.gsub!(/#{key}/,value)
|
||||
end
|
||||
if word.empty? then return true end
|
||||
Dir.new("dictionaries").entries.each do |entry|
|
||||
if entry.match(/.txt/) then File.open("dictionaries/"+entry, File::RDONLY) do |dictionary|
|
||||
# dictionary.seek(start_from.to_i, IO::SEEK_SET)
|
||||
# dictionary.readline
|
||||
dictionary.each_line{ |line|
|
||||
if line.match(/^ /) then next end
|
||||
if line.match(/^#{word}\b/i) then
|
||||
return dictionary.readline.scan(/^ (.*)/).at(0).at(0)#возвращаем только один вариант — но достаточно
|
||||
end
|
||||
# if line.match(/^#{word}\b (?:_\w+(?:\.|:) )?(?:(?:_\w|\d(?:\.|>)) )?(\D+)/) then
|
||||
# return $1 #из многих вариантов выберем первый
|
||||
# end
|
||||
}
|
||||
end
|
||||
end
|
||||
end #if entry.match
|
||||
puts word
|
||||
@translation = false
|
||||
end #end if $use_internet
|
||||
#Google Translate — отключено, т.к. меня забанили
|
||||
# require 'rtranslate/rtranslate'
|
||||
# @key = "ABQIAAAA10fXG_CwfNZvQrP8C4erpRROsGTYQpPXiD84IdgAaAE76vGUDRRbu7nAcYQWlyYOMdeZGrJ7EPC5_A" #ключ Google Translate. Действителен на localhost.
|
||||
# @translation = Translate.t(word, nil, "RUSSIAN")
|
||||
#зд. важно: Google Translate не поддерживает перевода с эсперанто. А в любом другом языке ударение не фиксированное.
|
||||
|
||||
# letter = word[0].chr.downcase!
|
||||
# if (word[1].chr!='x')
|
||||
# filename = 'dictionary/'+letter+'.txt'
|
||||
# else
|
||||
# filename = 'dictionary/'+letter+'x.txt'
|
||||
# end
|
||||
# begin
|
||||
# dictionary = File.open(filename, 'r')
|
||||
# rescue
|
||||
# puts filename
|
||||
# @log << "Невозможно открыть файл словаря: #{filename}\n"
|
||||
# next
|
||||
# end
|
||||
# @root = ''
|
||||
# dictionary.each do |line|
|
||||
# if line[0].chr!='['
|
||||
# next
|
||||
# end
|
||||
# line.gsub!('~', @root)
|
||||
# line.match(/(\w+)(\||\/)/)
|
||||
# if $1
|
||||
# @root = $1
|
||||
# end
|
||||
# if word.match(@root)
|
||||
# line.gsub!('_.*_','_')
|
||||
# @translated_words[word]=line.scan(/\] ([\w`]+)/).first;
|
||||
# @log << @translated_words[word]
|
||||
# end
|
||||
# end
|
||||
return @translation
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue