Skip to content

Commit

Permalink
bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Anonyfox committed Jan 30, 2015
1 parent a56c71b commit 740ad04
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 11 deletions.
24 changes: 24 additions & 0 deletions .versions
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
anonyfox:tags@0.0.1
application-configuration@1.0.4
base64@1.0.2
binary-heap@1.0.2
callback-hook@1.0.2
check@1.0.4
coffeescript@1.0.5
ddp@1.0.14
ejson@1.0.5
follower-livedata@1.0.3
geojson-utils@1.0.2
id-map@1.0.2
json@1.0.2
local-test:anonyfox:tags@0.0.1
logging@1.0.6
meteor@1.1.4
minimongo@1.0.6
mongo@1.0.11
ordered-dict@1.0.2
random@1.0.2
retry@1.0.2
tinytest@1.0.4
tracker@1.0.5
underscore@1.0.2
5 changes: 0 additions & 5 deletions anonyfox:tags-tests.js

This file was deleted.

1 change: 0 additions & 1 deletion anonyfox:tags.js

This file was deleted.

14 changes: 10 additions & 4 deletions package.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Package.describe({
Package.onUse(function(api) {
api.versionsFrom('1.0');
// api.addFiles('anonyfox:tags.js');
api.use(['tinytest','coffeescript','underscore'],['client','server']);
api.use(['coffeescript','underscore'],['client','server']);
api.addFiles([
'stopwords/stopwords_de.coffee',
'stopwords/stopwords_en.coffee',
Expand All @@ -20,7 +20,13 @@ Package.onUse(function(api) {
});

Package.onTest(function(api) {
// api.use(['tinytest','coffeescript'],['client','server']);
// api.addFiles(['tags.coffee'],['client','server']);
// api.addFiles('anonyfox:tags-tests.js');
api.use(['tinytest','coffeescript','underscore'],['client','server']);
api.addFiles([
'stopwords/stopwords_de.coffee',
'stopwords/stopwords_en.coffee',
'porter-stemmer.js',
'tags.coffee.md',
'globals.js'
],['client','server']);
api.addFiles('tests.coffee');
});
209 changes: 209 additions & 0 deletions porter-stemmer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// Porter stemmer in Javascript. Few comments, but it's easy to follow against
// the rules in the original paper, in
//
// Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, no. 3,
// pp 130-137,
//
// see also http://www.tartarus.org/~martin/PorterStemmer

// Release 1 be 'andargor', Jul 2004
// Release 2 (substantially revised) by Christopher McKenzie, Aug 2009
//
// CommonJS tweak by jedp

(function() {
var step2list = {
"ational" : "ate",
"tional" : "tion",
"enci" : "ence",
"anci" : "ance",
"izer" : "ize",
"bli" : "ble",
"alli" : "al",
"entli" : "ent",
"eli" : "e",
"ousli" : "ous",
"ization" : "ize",
"ation" : "ate",
"ator" : "ate",
"alism" : "al",
"iveness" : "ive",
"fulness" : "ful",
"ousness" : "ous",
"aliti" : "al",
"iviti" : "ive",
"biliti" : "ble",
"logi" : "log"
};

var step3list = {
"icate" : "ic",
"ative" : "",
"alize" : "al",
"iciti" : "ic",
"ical" : "ic",
"ful" : "",
"ness" : ""
};

var c = "[^aeiou\u00e4\u00f6\u00fc]"; // consonant
var v = "[aeiouy\u00e4\u00f6\u00fc]"; // vowel
var C = c + "[^aeiouy\u00e4\u00f6\u00fc]*"; // consonant sequence
var V = v + "[aeiou\u00e4\u00f6\u00fc]*"; // vowel sequence

var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
var s_v = "^(" + C + ")?" + v; // vowel in stem

function stemmer(w) {
var stem;
var suffix;
var firstch;
var re;
var re2;
var re3;
var re4;
var origword = w;

if (w.length < 3) { return w; }

firstch = w.substr(0,1);
if (firstch == "y") {
w = firstch.toUpperCase() + w.substr(1);
}

// Step 1a
re = /^(.+?)(ss|i)es$/;
re2 = /^(.+?)([^s])s$/;

if (re.test(w)) { w = w.replace(re,"$1$2"); }
else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }

// Step 1b
re = /^(.+?)eed$/;
re2 = /^(.+?)(ed|ing)$/;
if (re.test(w)) {
var fp = re.exec(w);
re = new RegExp(mgr0);
if (re.test(fp[1])) {
re = /.$/;
w = w.replace(re,"");
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = new RegExp(s_v);
if (re2.test(stem)) {
w = stem;
re2 = /(at|bl|iz)$/;
re3 = new RegExp("([^aeiouylsz])\\1$");
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re2.test(w)) { w = w + "e"; }
else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
else if (re4.test(w)) { w = w + "e"; }
}
}

// Step 1c
re = /^(.+?)y$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(s_v);
if (re.test(stem)) { w = stem + "i"; }
}

// Step 2
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem)) {
w = stem + step2list[suffix];
}
}

// Step 3
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem)) {
w = stem + step3list[suffix];
}
}

// Step 4
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
re2 = /^(.+?)(s|t)(ion)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
if (re.test(stem)) {
w = stem;
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = new RegExp(mgr1);
if (re2.test(stem)) {
w = stem;
}
}

// Step 5
re = /^(.+?)e$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
re2 = new RegExp(meq1);
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
w = stem;
}
}

re = /ll$/;
re2 = new RegExp(mgr1);
if (re.test(w) && re2.test(w)) {
re = /.$/;
w = w.replace(re,"");
}

// and turn initial Y back to y

if (firstch == "y") {
w = firstch.toLowerCase() + w.substr(1);
}

return w;
}

// memoize at the module level
var memo = {};
var memoizingStemmer = function(w) {
if (!memo[w]) {
memo[w] = stemmer(w);
}
return memo[w];
}

if (typeof exports != 'undefined' && exports != null) {
exports.stemmer = stemmer;
exports.memoizingStemmer = memoizingStemmer;
} else if (typeof window != 'undefined' && window != null) {
window.stemmer = stemmer;
window.memoizingStemmer = memoizingStemmer;
} else {
this.stemmer = stemmer;
this.memoizingStemmer = memoizingStemmer;
}

})();
2 changes: 1 addition & 1 deletion tags.coffee.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ A simple function that cleans up an array of words.
tags = _.uniq _.compact _.filter list, (t) ->
(t.length > 1) and
(t.length < 30) and
($.trim(t.replace(/\d+/gi,"")).length > 1)
((t.replace(/\d+/gi,"")).trim().length > 1)
tags = _.map tags, (t) -> t.toLowerCase()
tags = _.without(tags, stopwords...)
if excludes?.length
Expand Down
11 changes: 11 additions & 0 deletions tests.coffee
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Tinytest.add 'example', (test) ->
test.equal true, true
str = """
Google spins out Project Tango from its Advanced Technology and Projects group -
At the moment, it’s not quite clear what’s happening to Project Tango, Google’s ambitious plan to
put 3D mapping technology inside your smartphone. Today Google’s Advanced Technology and Projects
group (ATAP) announced that, “after two fast-paced years in ATAP, and many technical successes, the
Tango team is transitioning from ATAP to a new home within Google.”
"""
tags = Tags.findFrom str
test.equal tags, ["google","spins","project","tango","advanced","technology","group","moment","clear","happening","ambitious","plan","mapping","inside","smartphone","today","atap","announced","two","paced","years","technical","successes","team","transitioning","within"]

0 comments on commit 740ad04

Please sign in to comment.