Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Артем Максимов #14

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 201 additions & 0 deletions T.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
/**
* Created by tema on 12.10.16.
*/

var fs = require('fs');
var Transform = require('stream').Transform;

class T extends Transform {

static get translitsRu() {
Copy link

@govorov govorov Oct 24, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Алгоритм можно сильно упростить, если хранить правила в виде массива пар для замены:

[
{'ш' : 'sh'},
...
{'ф' : 'f'},
...
]

Так как правила в обе стороны одинаковые, можно обойтись одним списком. Массив здесь нужен для того, чтобы гарантировать порядок обработки - многобуквенные замены необходимо обработать первыми, иначе, например, sh попадет под s и h, и парсер на sh никогда не наткнется.

return {
'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'jo', 'ж': 'zh', 'з': 'z',
'и': 'i', 'й': 'j', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o', 'п': 'p', 'р': 'r',
'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'h', 'ц': 'c', 'ч': 'ch', 'ш': 'sh', 'щ': 'shh',
'ъ': '#', 'ы': 'y', 'ь': '\'', 'э': 'je', 'ю': 'ju', 'я': 'ja',

'А': 'A', 'Б': 'B', 'В': 'V', 'Г': 'G', 'Д': 'D', 'Е': 'E', 'Ё': 'Jo', 'Ж': 'Zh', 'З': 'Z',
'И': 'I', 'Й': 'J', 'К': 'K', 'Л': 'L', 'М': 'M', 'Н': 'N', 'О': 'O', 'П': 'P', 'Р': 'R',
'С': 'S', 'Т': 'T', 'У': 'U', 'Ф': 'F', 'Х': 'H', 'Ц': 'C', 'Ч': 'Ch', 'Ш': 'Sh', 'Щ': 'Shh',
'Ъ': '#', 'Ы': 'Y', 'Ь': '\'', 'Э': 'Je', 'Ю': 'Ju', 'Я': 'Ja'
};
}

static get translitsEN() {
return {
'A': 'А', 'B': 'Б', 'C': 'Ц', 'D': 'Д', 'E': 'Е', 'F': 'Ф', 'G': 'Г', 'H': 'Х', 'I': 'И',
'J': 'Й', 'K': 'К', 'L': 'Л', 'M': 'М', 'N': 'Н', 'O': 'О', 'P': 'П', 'Q': 'Я', 'R': 'Р',
'S': 'С', 'T': 'Т', 'U': 'У', 'V': 'В', 'W': 'Щ', 'X': 'Х', 'Y': 'Ы', 'Z': 'З', '\'': 'ь',
'#': 'ъ',

'a': 'а', 'b': 'б', 'c': 'ц', 'd': 'д', 'e': 'е', 'f': 'ф', 'g': 'г', 'h': 'х', 'i': 'и',
'j': 'й', 'k': 'к', 'l': 'л', 'm': 'м', 'n': 'н', 'o': 'о', 'p': 'п', 'q': 'я', 'r': 'р',
's': 'с', 't': 'т', 'u': 'у', 'v': 'в', 'w': 'щ', 'x': 'х', 'y': 'ы', 'z': 'з'

};
}

static get additionalEn() {
return {
'Jo': 'Ё', 'Ju': 'Ю', 'Yo': 'Ё', 'Ch': 'Ч',
'Ya': 'Я', 'Je': 'Э', 'Shh': 'Щ', 'Sh': 'Ш', 'Zh': 'Ж', 'Ja': 'Я',

'JO': 'Ё', 'JU': 'Ю', 'YO': 'Ё', 'CH': 'Ч',
'YA': 'Я', 'JE': 'Э', 'SHH': 'Щ', 'SH': 'Ш', 'ZH': 'Ж', 'JA': 'Я',

'jo': 'ё', 'ju': 'ю', 'yo': 'ё', 'ch': 'ч',
'ya': 'я', 'je': 'э', 'shh': 'щ', 'sh': 'ш', 'zh': 'ж', 'ja': 'я'
}
}


constructor(options) {
super(options);
this.multi = false;
this.type = 'utf8';
this.defined = false;
this.translits = undefined;
this.lastChars = '';
this.alreadyFlush = false;
this.first = true;
}

setType(type) {
this.type = type;
}

getStringAfterLastAdditionalChar(str) {

let max = -1;
let val = '';
for(let i in T.additionalEn) {
let ind = str.lastIndexOf(i);
if (ind > max) {
max = ind;
val = i;
}
}

if (max > -1){
this.lastChars = str.slice(max + val.length);
str = str.slice(0, max + val.length);
}
return str;
}

translit(str) {


if (!this.defined) {
let not_rus = isNoRussian(str);
let not_en = isNoEnglish(str);

if (not_en && !not_rus || !not_en && !not_rus && englishIndex(str) > russianIndex(str)) {
this.translits = T.translitsRu;
this.defined = true;
}
else if (not_rus && !not_en || !not_en && !not_rus && englishIndex(str) < russianIndex(str)) {
this.translits = T.translitsEN;
this.defined = true;

}
}

if (this.translits) {

// Только английские
if(!this.translit['B']) {

if (!this.flushing)
str = this.lastChars + str;

this.lastChars = ''

str = this.getStringAfterLastAdditionalChar(str);

for (let i in T.additionalEn) {
str = str.replace(new RegExp(i, 'g'), T.additionalEn[i]);
}

}

for (let i in this.translits) {
str = str.replace(new RegExp(i, 'g'), this.translits[i]);
}

}

if(this.first) {
str = '{ "content" : "' + str;
this.first = false;
}

str = str.replace(new RegExp('\n', 'g'), '\\n');

return str;
}

_flush(callback) {

this.flushing = true;
let str = this.lastChars;

if (this.type == 'base64') {
str = str.toString('base64');
this.push(str);
}
else {
str = str.toString('utf8');
let new_str = this.translit(str);
this.push(new_str + '"}');
}

callback();
}

_transform(chunk, encoding, callback) {

if (this.type == 'base64') {
var str = chunk.toString('base64');
this.push(str);
}
else {
var str = chunk.toString('utf8');
let new_str = this.translit(str);
this.push(new_str);
}

callback();
}
}

function

isNoRussian(str) {
return (/[А-Я-Ё]/gi.test(str) ? false : true);
}

function

isNoEnglish(str) {
return (/[A-Z]/gi.test(str) ? false : true);
}

function

englishIndex(str) {
var res = /[A-Z]/gi.exec(str);
return res.index;
}

function

russianIndex(str) {
var res = /[А-Я-Ё]/gi.exec(str);
return res.index;
}


module
.exports = T; // TODO: kek

17 changes: 17 additions & 0 deletions files/file.big.from.en.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Towards the end of November, during a thaw, at nine o'clock one morning, a train on the Warsaw and Petersburg railway was approaching the latter city at full speed. The morning was so damp and misty that it was only with great difficulty that the day succeeded in breaking; and it was impossible to distinguish anything more than a few yards away from the carriage windows.

Some of the passengers by this particular train were returning from abroad; but the third-class carriages were the best filled, chiefly with insignificant persons of various occupations and degrees, picked up at the different stations nearer town. All of them seemed weary, and most of them had sleepy eyes and a shivering expression, while their complexions generally appeared to have taken on the colour of the fog outside.

When day dawned, two passengers in one of the third-class carriages found themselves opposite each other. Both were young fellows, both were rather poorly dressed, both had remarkable faces, and both were evidently anxious to start a conversation. If they had but known why, at this particular moment, they were both remarkable persons, they would undoubtedly have wondered at the strange chance which had set them down opposite to one another in a third-class carriage of the Warsaw Railway Company.

One of them was a young fellow of about twenty-seven, not tall, with black curling hair, and small, grey, fiery eyes.

His nose was broad and flat, and he had high cheek bones; his thin lips were constantly compressed into an impudent, ironical--it might almost be called a malicious--smile; but his forehead was high and well formed, and atoned for a good deal of the ugliness of the lower part of his face. A special feature of this physiognomy was its death-like pallor, which gave to the whole man an indescribably emaciated appearance in spite of his hard look, and at the same time a sort of passionate and suffering expression which did not harmonize with his impudent, sarcastic smile and keen, self-satisfied bearing. He wore a large fur--or rather astrachan--overcoat, which had kept him warm all night, while his neighbour had been obliged to bear the full severity of a Russian November night entirely unprepared. His wide sleeveless mantle with a large cape to it--the sort of cloak one sees upon travellers during the winter months in Switzerland or North Italy--was by no means adapted to the long cold journey through Russia, from Eydkuhnen to St. Petersburg.

The wearer of this cloak was a young fellow, also of about twenty-six or twenty-seven years of age, slightly above the middle height, very fair, with a thin, pointed and very light coloured beard; his eyes were large and blue, and had an intent look about them, yet that heavy expression which some people affirm to be a peculiarity. as well as evidence, of an epileptic subject. His face was decidedly a pleasant one for all that; refined, but quite colourless, except for the circumstance that at this moment it was blue with cold. He held a bundle made up of an old faded silk handkerchief that apparently contained all his travelling wardrobe, and wore thick shoes and gaiters, his whole appearance being very un-Russian.

His black-haired neighbour inspected these peculiarities, having nothing better to do, and at length remarked, with that rude enjoyment of the discomforts of others which the common classes so often show:

"Cold?"

"Very," said his neighbour, readily. "and this is a thaw, too. Fancy if it had been a hard frost! I never thought it would be so cold in the old country. I've grown quite out of the way of it."
Loading