-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdict.js
71 lines (61 loc) · 2.7 KB
/
dict.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env node
/**
* Module that provides dictionary facilities, partial matching etc.
*
* Created by Andrew Bridge 29/03/2015
*/
var lib = require('./common.js'); // Common functions
var progressUpdateRate = 499;
var ProgressBar = require('progress'); // Feedback helper module
var fs = require('fs'); // FileSystem
function Dict(fileName, wordArr, regCharSet) {
try {
this.dictText = fs.readFileSync(fileName, "utf8");
this.dict = JSON.parse(this.dictText);
} catch (e) {
// If the file doesn't exist. Generate it and create it.
if (e.code == "ENOENT" && wordArr && regCharSet) {
console.log("Creating dictionary from sample text...");
var prog = new ProgressBar("Word :current of :total :bar :percent :eta", {total: wordArr.length, width: 10});
this.dict = wordArr.reduce(this.createDict.bind(this, regCharSet, prog), []);
this.dictText = JSON.stringify(this.dict);
fs.writeFileSync(filName, JSON.stringify(this.dict));
console.log("New dictionary produced and saved.\n"+this.dict.length+" words.");
} else {
throw new Error("An error occurred loading the dictionary: "+ e.message, e.fileName, e.lineNumber);
}
}
}
Dict.prototype.createDict = function(regCharSet, prog, dict, curVal, ind, arr) {
if (ind % progressUpdateRate == 0 || ind == arr.length-1) {
prog.tick(progressUpdateRate);
}
if (lib.charSetTrim(curVal, "a-z").length != 0) {
curVal = lib.charSetTrim(curVal.toLowerCase(), regCharSet);
var vArr = (curVal.indexOf("-") > -1) ? curVal.split("-") : [curVal];
for (var i = 0; i < vArr.length; i++) {
var vIt = vArr[i].replace(/'/g, ""); //Apostrophes are the last exception of punctuation in a word I can think of
//Get rid of words with accented characters, they're not English, so would skew results
//if (!vIt.match(/[\u00C0-\u017F]/g)) {
if (vIt.match(new RegExp("^[" + regCharSet + "]+$", "g"))) {
if (dict.indexOf(vIt) == -1 && (vIt.length > 1 || (vIt == "a" || vIt == "i"))) { // Note this exception, if you want characters to be words, remove it!
dict.push(vIt);
}
}
}
}
return dict;
};
Dict.prototype.checkFullMatch = function(word) {
return (this.dict.indexOf(word) > -1);
};
Dict.prototype.checkPartialMatch = function(word) {
return (this.dictText.indexOf(word) > -1);
};
Dict.prototype.checkStartMatch = function(word) {
return (this.dictText.indexOf('"'+word) > -1);
};
Dict.prototype.checkEndMatch = function(word) {
return (this.dictText.indexOf(word+'"') > -1);
};
module.exports = Dict;