forked from Anonyfox/meteor-tags
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
255 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
anonyfox:tags@0.0.1 | ||
application-configuration@1.0.4 | ||
base64@1.0.2 | ||
binary-heap@1.0.2 | ||
callback-hook@1.0.2 | ||
check@1.0.4 | ||
coffeescript@1.0.5 | ||
ddp@1.0.14 | ||
ejson@1.0.5 | ||
follower-livedata@1.0.3 | ||
geojson-utils@1.0.2 | ||
id-map@1.0.2 | ||
json@1.0.2 | ||
local-test:anonyfox:tags@0.0.1 | ||
logging@1.0.6 | ||
meteor@1.1.4 | ||
minimongo@1.0.6 | ||
mongo@1.0.11 | ||
ordered-dict@1.0.2 | ||
random@1.0.2 | ||
retry@1.0.2 | ||
tinytest@1.0.4 | ||
tracker@1.0.5 | ||
underscore@1.0.2 |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
// Porter stemmer in Javascript. Few comments, but it's easy to follow against | ||
// the rules in the original paper, in | ||
// | ||
// Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, no. 3, | ||
// pp 130-137, | ||
// | ||
// see also http://www.tartarus.org/~martin/PorterStemmer | ||
|
||
// Release 1 be 'andargor', Jul 2004 | ||
// Release 2 (substantially revised) by Christopher McKenzie, Aug 2009 | ||
// | ||
// CommonJS tweak by jedp | ||
|
||
(function() { | ||
var step2list = { | ||
"ational" : "ate", | ||
"tional" : "tion", | ||
"enci" : "ence", | ||
"anci" : "ance", | ||
"izer" : "ize", | ||
"bli" : "ble", | ||
"alli" : "al", | ||
"entli" : "ent", | ||
"eli" : "e", | ||
"ousli" : "ous", | ||
"ization" : "ize", | ||
"ation" : "ate", | ||
"ator" : "ate", | ||
"alism" : "al", | ||
"iveness" : "ive", | ||
"fulness" : "ful", | ||
"ousness" : "ous", | ||
"aliti" : "al", | ||
"iviti" : "ive", | ||
"biliti" : "ble", | ||
"logi" : "log" | ||
}; | ||
|
||
var step3list = { | ||
"icate" : "ic", | ||
"ative" : "", | ||
"alize" : "al", | ||
"iciti" : "ic", | ||
"ical" : "ic", | ||
"ful" : "", | ||
"ness" : "" | ||
}; | ||
|
||
var c = "[^aeiou\u00e4\u00f6\u00fc]"; // consonant | ||
var v = "[aeiouy\u00e4\u00f6\u00fc]"; // vowel | ||
var C = c + "[^aeiouy\u00e4\u00f6\u00fc]*"; // consonant sequence | ||
var V = v + "[aeiou\u00e4\u00f6\u00fc]*"; // vowel sequence | ||
|
||
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 | ||
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 | ||
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 | ||
var s_v = "^(" + C + ")?" + v; // vowel in stem | ||
|
||
function stemmer(w) { | ||
var stem; | ||
var suffix; | ||
var firstch; | ||
var re; | ||
var re2; | ||
var re3; | ||
var re4; | ||
var origword = w; | ||
|
||
if (w.length < 3) { return w; } | ||
|
||
firstch = w.substr(0,1); | ||
if (firstch == "y") { | ||
w = firstch.toUpperCase() + w.substr(1); | ||
} | ||
|
||
// Step 1a | ||
re = /^(.+?)(ss|i)es$/; | ||
re2 = /^(.+?)([^s])s$/; | ||
|
||
if (re.test(w)) { w = w.replace(re,"$1$2"); } | ||
else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } | ||
|
||
// Step 1b | ||
re = /^(.+?)eed$/; | ||
re2 = /^(.+?)(ed|ing)$/; | ||
if (re.test(w)) { | ||
var fp = re.exec(w); | ||
re = new RegExp(mgr0); | ||
if (re.test(fp[1])) { | ||
re = /.$/; | ||
w = w.replace(re,""); | ||
} | ||
} else if (re2.test(w)) { | ||
var fp = re2.exec(w); | ||
stem = fp[1]; | ||
re2 = new RegExp(s_v); | ||
if (re2.test(stem)) { | ||
w = stem; | ||
re2 = /(at|bl|iz)$/; | ||
re3 = new RegExp("([^aeiouylsz])\\1$"); | ||
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); | ||
if (re2.test(w)) { w = w + "e"; } | ||
else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); } | ||
else if (re4.test(w)) { w = w + "e"; } | ||
} | ||
} | ||
|
||
// Step 1c | ||
re = /^(.+?)y$/; | ||
if (re.test(w)) { | ||
var fp = re.exec(w); | ||
stem = fp[1]; | ||
re = new RegExp(s_v); | ||
if (re.test(stem)) { w = stem + "i"; } | ||
} | ||
|
||
// Step 2 | ||
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; | ||
if (re.test(w)) { | ||
var fp = re.exec(w); | ||
stem = fp[1]; | ||
suffix = fp[2]; | ||
re = new RegExp(mgr0); | ||
if (re.test(stem)) { | ||
w = stem + step2list[suffix]; | ||
} | ||
} | ||
|
||
// Step 3 | ||
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; | ||
if (re.test(w)) { | ||
var fp = re.exec(w); | ||
stem = fp[1]; | ||
suffix = fp[2]; | ||
re = new RegExp(mgr0); | ||
if (re.test(stem)) { | ||
w = stem + step3list[suffix]; | ||
} | ||
} | ||
|
||
// Step 4 | ||
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; | ||
re2 = /^(.+?)(s|t)(ion)$/; | ||
if (re.test(w)) { | ||
var fp = re.exec(w); | ||
stem = fp[1]; | ||
re = new RegExp(mgr1); | ||
if (re.test(stem)) { | ||
w = stem; | ||
} | ||
} else if (re2.test(w)) { | ||
var fp = re2.exec(w); | ||
stem = fp[1] + fp[2]; | ||
re2 = new RegExp(mgr1); | ||
if (re2.test(stem)) { | ||
w = stem; | ||
} | ||
} | ||
|
||
// Step 5 | ||
re = /^(.+?)e$/; | ||
if (re.test(w)) { | ||
var fp = re.exec(w); | ||
stem = fp[1]; | ||
re = new RegExp(mgr1); | ||
re2 = new RegExp(meq1); | ||
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); | ||
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { | ||
w = stem; | ||
} | ||
} | ||
|
||
re = /ll$/; | ||
re2 = new RegExp(mgr1); | ||
if (re.test(w) && re2.test(w)) { | ||
re = /.$/; | ||
w = w.replace(re,""); | ||
} | ||
|
||
// and turn initial Y back to y | ||
|
||
if (firstch == "y") { | ||
w = firstch.toLowerCase() + w.substr(1); | ||
} | ||
|
||
return w; | ||
} | ||
|
||
// memoize at the module level | ||
var memo = {}; | ||
var memoizingStemmer = function(w) { | ||
if (!memo[w]) { | ||
memo[w] = stemmer(w); | ||
} | ||
return memo[w]; | ||
} | ||
|
||
if (typeof exports != 'undefined' && exports != null) { | ||
exports.stemmer = stemmer; | ||
exports.memoizingStemmer = memoizingStemmer; | ||
} else if (typeof window != 'undefined' && window != null) { | ||
window.stemmer = stemmer; | ||
window.memoizingStemmer = memoizingStemmer; | ||
} else { | ||
this.stemmer = stemmer; | ||
this.memoizingStemmer = memoizingStemmer; | ||
} | ||
|
||
})(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Tinytest.add 'example', (test) -> | ||
test.equal true, true | ||
str = """ | ||
Google spins out Project Tango from its Advanced Technology and Projects group - | ||
At the moment, it’s not quite clear what’s happening to Project Tango, Google’s ambitious plan to | ||
put 3D mapping technology inside your smartphone. Today Google’s Advanced Technology and Projects | ||
group (ATAP) announced that, “after two fast-paced years in ATAP, and many technical successes, the | ||
Tango team is transitioning from ATAP to a new home within Google.” | ||
""" | ||
tags = Tags.findFrom str | ||
test.equal tags, ["google","spins","project","tango","advanced","technology","group","moment","clear","happening","ambitious","plan","mapping","inside","smartphone","today","atap","announced","two","paced","years","technical","successes","team","transitioning","within"] |