src-d · bzz · Jan 29, 2019 · Jan 29, 2019 · Jan 29, 2019 · Jan 29, 2019
diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go
@@ -1,3 +1,6 @@
+// Package tokenizer implements file tokenization used by the enry content
+// classifier. This package is an implementation detail of enry and should not
+// be imported by other packages.
 package tokenizer
 
 import (
@@ -8,6 +11,9 @@ import (
 
 const byteLimit = 100000
 
+// Tokenize returns language-agnostic lexical tokens from content. The tokens
+// returned should match what the Linguist library returns. At most the first
+// 100KB of content are tokenized.
 func Tokenize(content []byte) []string {
 	if len(content) > byteLimit {
 		content = content[:byteLimit]