diff --git a/build.gradle.kts b/build.gradle.kts
index cf5ed556..ac600654 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -2,14 +2,13 @@ import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
 
 plugins {
     `maven-publish`
-    id("idea")
     kotlin("jvm") version "1.4.32"
     kotlin("plugin.serialization") version "1.4.32"
     id("org.jetbrains.dokka") version "1.4.30"
 }
 
 group = "com.londogard"
-version = "1.0-beta"
+version = "1.0"
 
 repositories {
     mavenCentral()
@@ -27,26 +26,24 @@ dependencies {
     implementation("org.ejml:ejml-simple:0.40")
     implementation("org.ejml:ejml-kotlin:0.40")
 
-    // implementation("com.github.levyfan:sentencepiece-jni:v0.0.2")
     implementation("ai.djl.sentencepiece:sentencepiece:0.10.0")
     implementation("com.github.rholder:snowball-stemmer:1.3.0.581.1")
-
-    // https://mvnrepository.com/artifact/org.apache.commons/commons-compress
     implementation("org.apache.commons:commons-compress:1.20")
 
-    implementation("org.codehaus.plexus:plexus-archiver:4.2.4")
-
     testImplementation("org.amshove.kluent:kluent:$kluentVersion")
     testImplementation("org.jetbrains.kotlin:kotlin-test:1.4.32")
     testImplementation(kotlin("test-junit"))
+    implementation(kotlin("stdlib-jdk8"))
 }
 
 tasks.test {
     useJUnit()
 }
 
-tasks.withType<KotlinCompile> {
-    kotlinOptions.jvmTarget = "1.8"
+tasks.withType<KotlinCompile>().configureEach {
+    kotlinOptions {
+        useIR = true
+    }
 }
 
 publishing {
@@ -65,4 +62,9 @@ publishing {
             from(components["java"])
         }
     }
+}
+
+val compileTestKotlin: KotlinCompile by tasks
+compileTestKotlin.kotlinOptions {
+    jvmTarget = "1.8"
 }
\ No newline at end of file
diff --git a/src/main/kotlin/com/londogard/nlp/embeddings/BpeEmbeddings.kt b/src/main/kotlin/com/londogard/nlp/embeddings/BpeEmbeddings.kt
index 59f29343..b4c81f5a 100644
--- a/src/main/kotlin/com/londogard/nlp/embeddings/BpeEmbeddings.kt
+++ b/src/main/kotlin/com/londogard/nlp/embeddings/BpeEmbeddings.kt
@@ -23,10 +23,14 @@ class BpeEmbeddings(
             ?.avgNorm()
     }
 
+    override fun contains(word: String): Boolean {
+        return tokenizer.split(word).all(embeddings::contains)
+    }
+
     fun subwordVector(subword: String): SimpleMatrix? = embeddings[subword]
 
     companion object {
-        fun toTokenizer(filePath: Path): Tokenizer {
+        @JvmStatic fun toTokenizer(filePath: Path): Tokenizer {
             val rawNameTokens = filePath.fileName.toString().split('.')
 
             val languageSupport = LanguageSupport.valueOf(rawNameTokens.first())
diff --git a/src/main/kotlin/com/londogard/nlp/embeddings/EmbeddingLoader.kt b/src/main/kotlin/com/londogard/nlp/embeddings/EmbeddingLoader.kt
index 49cadc95..c0fd19e4 100644
--- a/src/main/kotlin/com/londogard/nlp/embeddings/EmbeddingLoader.kt
+++ b/src/main/kotlin/com/londogard/nlp/embeddings/EmbeddingLoader.kt
@@ -5,7 +5,6 @@ import com.londogard.nlp.utils.LanguageSupport
 import com.londogard.nlp.utils.useLines
 import org.ejml.simple.SimpleMatrix
 import java.nio.file.Path
-import kotlin.io.path.bufferedReader
 import kotlin.math.min
 
 object EmbeddingLoader {
@@ -24,7 +23,13 @@ object EmbeddingLoader {
         }
     }
 
-    // TODO inline fun <reified T: Embeddings> fromUrl(url: String): Map<String, SimpleMatrix> = TODO("")
+    inline fun <reified T: Embeddings> fromFile(path: Path): T {
+        return when {
+            T::class == LightWordEmbeddings::class -> LightWordEmbeddings(path) as T
+            T::class == BpeEmbeddings::class -> BpeEmbeddings(path) as T
+            else -> WordEmbeddings(path) as T
+        }
+    }
 
     internal fun fromFile(path: Path,
                           delimiter: Char,
@@ -47,9 +52,4 @@ object EmbeddingLoader {
                     }
                     .toMap(LinkedHashMap(numLinesToUse)) // optimization by creating the full map directly
             }
-}
-
-fun main() {
-    val embeddings = EmbeddingLoader.fromLanguageOrNull<LightWordEmbeddings>(LanguageSupport.sv)
-    println(embeddings?.vector("Hej"))
 }
\ No newline at end of file
diff --git a/src/main/kotlin/com/londogard/nlp/embeddings/WordEmbeddings.kt b/src/main/kotlin/com/londogard/nlp/embeddings/WordEmbeddings.kt
index fe749ce9..f553a09a 100644
--- a/src/main/kotlin/com/londogard/nlp/embeddings/WordEmbeddings.kt
+++ b/src/main/kotlin/com/londogard/nlp/embeddings/WordEmbeddings.kt
@@ -89,7 +89,7 @@ class WordEmbeddings(
         /** Pretty print the list of words and their associated scores.
          * @param words List of (word, score) pairs to be printed.
          */
-        fun pprint(words: List<Pair<String, Double>>) {
+        @JvmStatic fun pprint(words: List<Pair<String, Double>>) {
             println("\n%50s${" ".repeat(7)}Cosine distance\n${"-".repeat(72)}".format("Word"))
             println(words.joinToString("\n") { (word, dist) -> "%50s${" ".repeat(7)}%15f".format(word, dist) })
         }
diff --git a/src/main/kotlin/com/londogard/nlp/embeddings/sentence/AverageSentenceEmbeddings.kt b/src/main/kotlin/com/londogard/nlp/embeddings/sentence/AverageSentenceEmbeddings.kt
index e8f8d4bb..68912a2d 100644
--- a/src/main/kotlin/com/londogard/nlp/embeddings/sentence/AverageSentenceEmbeddings.kt
+++ b/src/main/kotlin/com/londogard/nlp/embeddings/sentence/AverageSentenceEmbeddings.kt
@@ -1,6 +1,7 @@
 package com.londogard.nlp.embeddings.sentence
 
 import com.londogard.nlp.embeddings.Embeddings
+import com.londogard.nlp.utils.avgNorm
 import com.londogard.nlp.utils.normalize
 import org.ejml.simple.SimpleMatrix
 
@@ -11,7 +12,6 @@ class AverageSentenceEmbeddings(override val tokenEmbeddings: Embeddings): Sente
     override fun getSentenceEmbedding(sentence: List<String>): SimpleMatrix {
         return tokenEmbeddings
             .traverseVectors(sentence)
-            .reduce { acc, simpleMatrix -> acc + simpleMatrix }
-            .normalize()
+            .avgNorm()
     }
 }
\ No newline at end of file
diff --git a/src/main/kotlin/com/londogard/nlp/embeddings/sentence/USifSentenceEmbeddings.kt b/src/main/kotlin/com/londogard/nlp/embeddings/sentence/USifSentenceEmbeddings.kt
index 8f283065..19caf76a 100644
--- a/src/main/kotlin/com/londogard/nlp/embeddings/sentence/USifSentenceEmbeddings.kt
+++ b/src/main/kotlin/com/londogard/nlp/embeddings/sentence/USifSentenceEmbeddings.kt
@@ -12,7 +12,7 @@ import kotlin.math.pow
 class USifSentenceEmbeddings(
     override val tokenEmbeddings: Embeddings,
     private val wordProb: Map<String, Float>,
-    randomWalkLength: Int, // = n, ~11
+    randomWalkLength: Int = 11, // = n, ~11
     private val numCommonDiscourseVector: Int = 5 // = m, 0 should work. In practise max 5.
 ) : SentenceEmbeddings {
     private val vocabSize = wordProb.size.toFloat()
diff --git a/src/main/kotlin/com/londogard/nlp/stemmer/Stemmer.kt b/src/main/kotlin/com/londogard/nlp/stemmer/Stemmer.kt
index 54cc18ba..e104cc53 100644
--- a/src/main/kotlin/com/londogard/nlp/stemmer/Stemmer.kt
+++ b/src/main/kotlin/com/londogard/nlp/stemmer/Stemmer.kt
@@ -37,7 +37,7 @@ class Stemmer(language: LanguageSupport) {
         var cache: Pair<LanguageSupport, Stemmer>? = null
 
         // Default to PorterStemmer if not supported!
-        fun stem(word: String, language: LanguageSupport): String {
+        @JvmStatic fun stem(word: String, language: LanguageSupport): String {
             val cachedStemmer = cache
 
             return when (cachedStemmer?.first) {
diff --git a/src/main/kotlin/com/londogard/nlp/stopwords/Stopwords.kt b/src/main/kotlin/com/londogard/nlp/stopwords/Stopwords.kt
index 5ef38ef3..c50cc785 100644
--- a/src/main/kotlin/com/londogard/nlp/stopwords/Stopwords.kt
+++ b/src/main/kotlin/com/londogard/nlp/stopwords/Stopwords.kt
@@ -15,6 +15,7 @@ object Stopwords {
     fun isStopword(word: String, language: LanguageSupport): Boolean =
         stopwordsOrNull(language)?.contains(word) == true
 
+    @Throws(IllegalArgumentException::class)
     fun stopwords(language: LanguageSupport): Set<String> =
         stopwordsOrNull(language)
             ?: throw IllegalArgumentException("There exists not stopwords for language ${language.name}. Please try again with one of the supported languages.")
diff --git a/src/main/kotlin/com/londogard/nlp/structures/trie/Trie.kt b/src/main/kotlin/com/londogard/nlp/structures/trie/Trie.kt
index 047e594c..31eef4c7 100644
--- a/src/main/kotlin/com/londogard/nlp/structures/trie/Trie.kt
+++ b/src/main/kotlin/com/londogard/nlp/structures/trie/Trie.kt
@@ -64,20 +64,4 @@ fun findFirstMerger(trie: TrieNode, string: String): String? {
                 .firstOrNull()
         }
     }
-}
-
-fun main() {
-    val vocab = WordFrequencies.getAllWordFrequenciesOrNull(LanguageSupport.sv)?.toVocab() ?: emptyMap()
-    println(vocab.entries.sortedBy { it.value }.reversed().take(5))
-    val trie = Trie(vocab)
-    println(findFirstMerger(trie.rootNode.childNodes.entries.first().value, ""))
-
-    // could use foldRight (goes from other end..!)
-    // val reverseTrie = Trie(vocab.mapKeys { (key,_) -> key.reversed() })
-    // println(reverseTrie.rootNode.childNodes.map { it.key to it.value.count })
-
-    println(trie.rootNode.childNodes.getValue('ä').childNodes.map { it.key to it.value.count })
-    println(trie.rootNode.childNodes.map { it.key to it.value.count })
-    println(trie.rootNode.char)
-    println(trie.rootNode.count)
 }
\ No newline at end of file
diff --git a/src/main/kotlin/com/londogard/nlp/tokenizer/SentencePieceTokenizer.kt b/src/main/kotlin/com/londogard/nlp/tokenizer/SentencePieceTokenizer.kt
index 496d1881..37af1a85 100644
--- a/src/main/kotlin/com/londogard/nlp/tokenizer/SentencePieceTokenizer.kt
+++ b/src/main/kotlin/com/londogard/nlp/tokenizer/SentencePieceTokenizer.kt
@@ -17,12 +17,12 @@ class SentencePieceTokenizer(modelPath: Path, vocabPath: Path? = null): Tokenize
     override fun split(text: String): List<String> = sentencePieceTokenizer.tokenize(text)
 
     companion object {
-        fun fromLanguageSupportOrNull(languageSupport: LanguageSupport): SentencePieceTokenizer? =
+        const val beginningOfWord: Char = '▁'
+        @JvmStatic fun fromLanguageSupportOrNull(languageSupport: LanguageSupport): SentencePieceTokenizer? =
             fromLanguageSupportAndSizeOrNull(languageSupport, VocabSize.v10_000)
-
-        fun fromLanguageSupportAndSizeOrNull(languageSupport: LanguageSupport, vocabSize: VocabSize) =
+        @JvmStatic fun fromLanguageSupportAndSizeOrNull(languageSupport: LanguageSupport, vocabSize: VocabSize) =
             if (languageSupport.hasSentencePiece()) {
-                val (vocab, model) = DownloadHelper.getBpeVocabModel(languageSupport, vocabSize.size)
+                val (vocab, model) = DownloadHelper.getSentencePieceVocabModel(languageSupport, vocabSize.size)
                 SentencePieceTokenizer(model, vocab)
             } else null
     }
diff --git a/src/main/kotlin/com/londogard/nlp/utils/CompressionUtil.kt b/src/main/kotlin/com/londogard/nlp/utils/CompressionUtil.kt
index 98930f37..56093611 100644
--- a/src/main/kotlin/com/londogard/nlp/utils/CompressionUtil.kt
+++ b/src/main/kotlin/com/londogard/nlp/utils/CompressionUtil.kt
@@ -6,12 +6,12 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
 import java.io.BufferedInputStream
 import java.io.IOException
 import java.io.InputStream
-import java.io.OutputStream
 import java.nio.file.Files
 import java.nio.file.Path
 import java.nio.file.StandardCopyOption
 import java.util.zip.GZIPInputStream
 
+/** Simplified usage of apache compress through Object functions. */
 object CompressionUtil {
     fun gunzip(path: Path): InputStream =
         path.toFile()
diff --git a/src/main/kotlin/com/londogard/nlp/utils/CustomExtensions.kt b/src/main/kotlin/com/londogard/nlp/utils/CustomExtensions.kt
index 1ff97c0d..6e5d86e6 100644
--- a/src/main/kotlin/com/londogard/nlp/utils/CustomExtensions.kt
+++ b/src/main/kotlin/com/londogard/nlp/utils/CustomExtensions.kt
@@ -5,6 +5,8 @@ import java.nio.file.Files
 import java.nio.file.Path
 import java.nio.file.attribute.FileAttribute
 
+/** Custom extensions for Path taken from Kotlin EXPERIMENTAL. */
+
 // Taken from Kotlin stdlib (EXPERIMENTAL)
 internal inline fun Path.readLines(charset: Charset = Charsets.UTF_8): List<String> =
     Files.readAllLines(this, charset)
diff --git a/src/main/kotlin/com/londogard/nlp/utils/DownloadHelper.kt b/src/main/kotlin/com/londogard/nlp/utils/DownloadHelper.kt
index 22aea6c2..3f795901 100644
--- a/src/main/kotlin/com/londogard/nlp/utils/DownloadHelper.kt
+++ b/src/main/kotlin/com/londogard/nlp/utils/DownloadHelper.kt
@@ -1,115 +1,79 @@
 package com.londogard.nlp.utils
 
 import com.londogard.nlp.embeddings.EmbeddingLoader.BpeDefaultEmbeddingDimension
-import com.londogard.nlp.tokenizer.toVocabSize
 import com.londogard.nlp.wordfreq.WordFrequencySize
 import java.net.URL
 import java.nio.file.Files
 import java.nio.file.Path
-import java.nio.file.Paths
 
 @PublishedApi
 internal object DownloadHelper {
-    private val rootPath: Path = Paths.get(System.getProperty("user.home")).resolve(".londogard")
-    private const val dataUrl: String = "https://raw.githubusercontent.com/londogard/londogard-nlp-toolkit/main/data"
-    private const val bpeUrl: String = "https://nlp.h-its.org/bpemb/"
-    private val stopwordPath: Path = rootPath.resolve("stopwords")
-    private val wordFrequencyPath: Path = rootPath.resolve("wordfreq")
-    private val embeddingPath: Path = rootPath.resolve("embeddings")
-    private val bpePath: Path = rootPath.resolve("bpe")
-
     fun getStopWords(language: LanguageSupport): Path {
-        val path = stopwordPath.resolve(language.name)
-        if (!Files.exists(path)) {
-            println("Language ${language.name} does not have stopwords locally. Will download (few KBs)...")
+        val fileInfo = UrlProvider.stopwords(language)
+        downloadFileIfMissing(fileInfo)
 
-            "$dataUrl/stopwords/${language.name}".saveTo(path)
+        return fileInfo.path
+    }
 
-            println("Download done! ${language.name} stopwords located at ${path.toAbsolutePath()}")
-        }
+    fun getWordFrequencies(language: LanguageSupport, size: WordFrequencySize = WordFrequencySize.Smallest): Path {
+        val fileInfo = UrlProvider.wordfreq(language, size)
+        downloadFileIfMissing(fileInfo)
 
-        return path
+        return fileInfo.path
     }
 
-    fun getWordFrequencies(language: LanguageSupport, size: WordFrequencySize = WordFrequencySize.Smallest): Path {
-        val filename = size.toFileName(language)
-        val path = wordFrequencyPath.resolve(filename)
-        if (!Files.exists(path)) {
-            println("Language ${language.name} does not have (${size.name}) word frequencies locally. Will download (few KBs)...")
+    private fun downloadFileIfMissing(fileInfo: FileInfo) {
+        if (!Files.exists(fileInfo.path)) {
+            println("Downloading ${fileInfo.description} for ${fileInfo.language} as files don't exist locally.")
 
-            "$dataUrl/wordfreq/${filename}".saveTo(path)
+            fileInfo.toUrl().saveTo(fileInfo.path)
 
-            println("Download done! ${language.name} (${size.name}) word frequencies located at ${path.toAbsolutePath()}")
+            println("Download completed! ${fileInfo.language} ${fileInfo.description} located at ${fileInfo.path.toAbsolutePath()}")
         }
-
-        return path
     }
 
     fun getWordEmbeddings(language: LanguageSupport): Path {
-        val filename = "cc.${language.name}.300.vec"
-        val path = embeddingPath.resolve(filename)
-        if (!Files.exists(path)) {
-            path.parent.createDirectories()
-            println("Language ${language.name} does not have word embeddings locally. Will download (could be GBs)...")
-            val url = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/$filename.gz"
+        val fileInfo = UrlProvider.fastText(language)
+        if (!Files.exists(fileInfo.path)) {
             val tmpPath = Files.createTempFile("tmp", ".gz")
-
-            url.saveTo(tmpPath)
-            Files.newOutputStream(path).use { out ->
+            downloadFileIfMissing(fileInfo.copy(path = tmpPath))
+            Files.newOutputStream(fileInfo.path).use { out ->
                 CompressionUtil.gunzip(tmpPath).use { input -> input.copyTo(out) }
             }
             Files.deleteIfExists(tmpPath)
-
-            println("Download completed! ${language.name} word embeddings located at ${path.toAbsolutePath()}")
         }
-        return path
+        return fileInfo.path
     }
 
-    private fun getBpeBaseUrl(language: LanguageSupport, numMerges: Int): String =
-        "$bpeUrl/$language/$language.wiki.bpe.vs$numMerges"
-
     // TODO improve by `data class`
-    fun getBpeVocabModel(language: LanguageSupport, vocabSize: Int = 10_000): Pair<Path, Path> {
-        val baseUrl = getBpeBaseUrl(language, vocabSize)
-        val vocab = getBpeFile("$baseUrl.vocab")
-        val model = getBpeFile("$baseUrl.model")
-
-        return vocab to model
-    }
-
-    private fun getBpeFile(url: String): Path {
-        val filename = url.takeLastWhile { it != '/' }
-        val path = bpePath.resolve(filename)
-        if (!Files.exists(path)) {
-            println("Downloading BPE Model/Vocab/Embedding ($filename)")
-            url.saveTo(path)
-            println("Download completed! $filename located at ${path.parent}")
-        }
+    fun getSentencePieceVocabModel(language: LanguageSupport, vocabSize: Int = 10_000): Pair<Path, Path> {
+        val (vocab, model) = UrlProvider.sentencePiece(language, vocabSize)
+        downloadFileIfMissing(vocab)
+        downloadFileIfMissing(model)
 
-        return path
+        return vocab.path to model.path
     }
 
-    fun getBpeEmbeddings(language: LanguageSupport, vocabSize: Int = 10_000, dimensions: Int = BpeDefaultEmbeddingDimension): Path {
-        val filePath = bpePath.resolve("$language.wiki.bpe.vs$vocabSize.d$dimensions.w2v.txt")
-
-        return if (Files.exists(filePath)) {
-            filePath
-        } else {
-            val baseUrl = getBpeBaseUrl(language, vocabSize)
-            val embeddingsCompressed = getBpeFile("$baseUrl.d$dimensions.w2v.txt.tar.gz")
-            val tmpPath = CompressionUtil.uncompressTarGz(embeddingsCompressed)
-            embeddingsCompressed.toFile().deleteRecursively()
-            Files.move(tmpPath, filePath)
-            bpePath.resolve("data").toFile().deleteRecursively()
-
-            filePath
+    fun getBpeEmbeddings(
+        language: LanguageSupport,
+        vocabSize: Int = 10_000,
+        dimensions: Int = BpeDefaultEmbeddingDimension
+    ): Path {
+        val fileInfo = UrlProvider.bpeEmbedding(language, vocabSize, dimensions)
+
+        if (!Files.exists(fileInfo.path)) {
+            val tmpPath = fileInfo.path.parent.resolve("${fileInfo.filename}.tar.gz")
+            downloadFileIfMissing(fileInfo.copy(path = tmpPath))
+            CompressionUtil.uncompressTarGz(tmpPath)
+            Files.deleteIfExists(tmpPath)
         }
+        return fileInfo.path
     }
 
-    private fun String.saveTo(path: Path) {
+    private fun URL.saveTo(path: Path) {
         Files.createDirectories(path.parent)
 
-        URL(this).openStream().use { input ->
+        openStream().use { input ->
             path.toFile().outputStream().use { output ->
                 input.copyTo(output)
             }
diff --git a/src/main/kotlin/com/londogard/nlp/utils/EjmlExtensions.kt b/src/main/kotlin/com/londogard/nlp/utils/EjmlExtensions.kt
index dfc00577..5f5b70bd 100644
--- a/src/main/kotlin/com/londogard/nlp/utils/EjmlExtensions.kt
+++ b/src/main/kotlin/com/londogard/nlp/utils/EjmlExtensions.kt
@@ -7,6 +7,10 @@ import org.ejml.dense.row.NormOps_FDRM
 import org.ejml.kotlin.*
 import org.ejml.simple.SimpleMatrix
 
+/**
+ * Custom extensions for EJML simplification in Kotlin. Some optimized for speed.
+ */
+
 /** Basic Retrieval */
 fun SimpleMatrix.getRow(index: Int): SimpleMatrix = extractVector(true, index)
 fun SimpleMatrix.getRows(rows: IntArray): SimpleMatrix =
diff --git a/src/main/kotlin/com/londogard/nlp/utils/FileInfo.kt b/src/main/kotlin/com/londogard/nlp/utils/FileInfo.kt
new file mode 100644
index 00000000..9090b8f2
--- /dev/null
+++ b/src/main/kotlin/com/londogard/nlp/utils/FileInfo.kt
@@ -0,0 +1,8 @@
+package com.londogard.nlp.utils
+
+import java.net.URL
+import java.nio.file.Path
+
+data class FileInfo(val filename: String, val path: Path, val url: String, val description: String, val language: LanguageSupport) {
+    fun toUrl(): URL = URL(url)
+}
diff --git a/src/main/kotlin/com/londogard/nlp/utils/LanguageSupport.kt b/src/main/kotlin/com/londogard/nlp/utils/LanguageSupport.kt
index e2a45f9d..aa675278 100644
--- a/src/main/kotlin/com/londogard/nlp/utils/LanguageSupport.kt
+++ b/src/main/kotlin/com/londogard/nlp/utils/LanguageSupport.kt
@@ -3,7 +3,9 @@ package com.londogard.nlp.utils
 /**
  * All languages and their support.
  * For conversion from ISO-code, see https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
- * 'nb' = Norwegian Bokmål. But you can call 'no' too.
+ *
+ * 'nb' = Norwegian Bokmål. Calling 'no' is the recommended way as that supports
+ * most and automatically cast to 'nb' if required.
  */
 enum class LanguageSupport {
     ab, ace, ady, af, ak, als, am, an, ang, ar, arc, arz, `as`, ast, atj, av, ay, az, azb,
@@ -20,14 +22,14 @@ enum class LanguageSupport {
     ta, tcy, te, tet, tg, th, ti, tk, tl, tn, to, tpi, tr, ts, tt, tum, tw, ty, tyv, udm, ug, uk, ur, uz,
     ve, vec, vep, vi, vls, vo, wa, war, wo, wuu, xal, xh, xmf, yi, yo, za, zea, zh, zu;
 
-    // Supported through SnowballStemmer (http://snowball.tartarus.org/)
+    /** Validates if Stemmer is supported for LanguageSupport. Support via [SnowballStemmer](http://snowball.tartarus.org/) */
     fun hasStemmer(): Boolean = when (this) {
         sv, nl, en, fi, fr, de, hu, it, no, pt, ro, ru, es, tr -> true
         else -> false
     }
 
-    // Supported through FastText vectors (https://fasttext.cc/docs/en/crawl-vectors.html)
-    fun hasWordEmbeddings(): Boolean = when (this) { // TODO add fastText vector extraction
+    /** Validates if WordEmbedding (fastText) is downloadable for LanguageSupport. Support via [fastText](https://fasttext.cc/docs/en/crawl-vectors.html) */
+    fun hasWordEmbeddings(): Boolean = when (this) {
         en, ky, xmf, mwl, tt, vec, ml, pfl, ro, war, tk, mhr, sc, am, cv, `as`,
         nn, vo, az, ia, th, ka, gl, sco, co, mt, rm, bar, zh, pt, kk, fy, pms,
         mzn, ba, cy, li, et, fa, bg, sl, ast, `is`, ja, de, hif, nds, bcl, so, ceb,
@@ -41,14 +43,14 @@ enum class LanguageSupport {
         else -> false
     }
 
-    // Supported through NLTKs stopword lists (https://www.nltk.org/)
+    /** Validates if StopWord is supported for LanguageSupport. Support via NLTKs lists, [NLTK](https://www.nltk.org/) */
     fun hasStopWordSupport(): Boolean = when (this) {
         ar, az, da, de, el, en, es, fi, fr, hu, id, it,
         kk, ne, nl, no, pt, ro, ru, sl, sv, tg, tr -> true
         else -> false
     }
 
-    // Supported through wordfreq.py datasets (https://pypi.org/project/wordfreq/)
+    /** Validates if WordFrequencies is supported for  LanguageSupport. Support via [wordfreq.py](https://pypi.org/project/wordfreq/) amazing files. */
     fun hasWordFrequencySupport(): Boolean = when (this) {
         ar, cs, de, en, es, fi, fr, it, ja, nl, pl, uk,
         pt, ru, zh, bg, bn, ca, da, el, fa, he, hi, ba, hr, rs, me,
@@ -56,13 +58,14 @@ enum class LanguageSupport {
         else -> false
     }
 
+    /** Returns largestWordFrequency or null for LanguageSupport */
     fun largestWordFrequency(): String? = when (this) {
         ar, cs, de, en, es, fi, fr, it, ja, nl, pl, pt, ru, zh -> "large"
         bg, bn, ca, da, el, fa, he, hi, hu, id, ko, lv, mk, ms, nb, no, ro, sh, sv, tr, uk,ba, hr, rs, me -> "small"
         else -> null
     }
 
-    // Download custom model/vocab from here: https://nlp.h-its.org/bpemb/ or create your own.
+    /** Validates if SentencePiece exists pretrained for LanguageSupport. Support via [BPEmb](https://nlp.h-its.org/bpemb/) which are trained on Wikipedia.org. */
     fun hasSentencePiece(): Boolean = when (this) {
         nb, nah, bh, eml -> false
         else -> true
diff --git a/src/main/kotlin/com/londogard/nlp/utils/MapExtensions.kt b/src/main/kotlin/com/londogard/nlp/utils/MapExtensions.kt
index 2b7ec5c0..4a9da62e 100644
--- a/src/main/kotlin/com/londogard/nlp/utils/MapExtensions.kt
+++ b/src/main/kotlin/com/londogard/nlp/utils/MapExtensions.kt
@@ -2,6 +2,9 @@ package com.londogard.nlp.utils
 
 import kotlin.math.roundToInt
 
+/**
+ * Custom extension functions for Map(s).
+ */
 object MapExtensions {
     fun Map<String, Float>.toVocab(): Map<String, Int> {
         val min = this.values.minOrNull() ?: 1f
diff --git a/src/main/kotlin/com/londogard/nlp/utils/UrlProvider.kt b/src/main/kotlin/com/londogard/nlp/utils/UrlProvider.kt
new file mode 100644
index 00000000..4d024ece
--- /dev/null
+++ b/src/main/kotlin/com/londogard/nlp/utils/UrlProvider.kt
@@ -0,0 +1,53 @@
+package com.londogard.nlp.utils
+
+import com.londogard.nlp.embeddings.EmbeddingLoader
+import com.londogard.nlp.wordfreq.WordFrequencySize
+import java.net.URL
+import java.nio.file.Path
+import java.nio.file.Paths
+
+object UrlProvider {
+    private const val githubDataUrl: String = "https://raw.githubusercontent.com/londogard/londogard-nlp-toolkit/main/data"
+    private const val bpeUrl: String = "https://nlp.h-its.org/bpemb/"
+
+    private val rootPath: Path = Paths.get(System.getProperty("user.home")).resolve(".londogard")
+    private val stopwordPath: Path = rootPath.resolve("stopwords")
+    private val wordFrequencyPath: Path = rootPath.resolve("wordfreq")
+    private val embeddingPath: Path = rootPath.resolve("embeddings")
+    private val bpePath: Path = rootPath.resolve("bpe")
+
+    fun fastText(language: LanguageSupport): FileInfo {
+        val filename = "cc.${language.name}.300.vec"
+        val url = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/$filename.gz"
+        return FileInfo(filename, embeddingPath.resolve(filename), url, "fastText (GB(s))", language)
+    }
+
+    fun bpeEmbedding(language: LanguageSupport, vocabSize: Int, dimensions: Int): FileInfo {
+        val filename = "$language.wiki.bpe.vs$vocabSize.d$dimensions.w2v.txt"
+        val url = "${getBpeBaseUrl(language, vocabSize)}.d$dimensions.w2v.txt.tar.gz"
+
+        return FileInfo(filename, bpePath.resolve(filename), url, "bpemb embeddings (100KB - 45MB)", language)
+    }
+
+    fun sentencePiece(language: LanguageSupport, vocabSize: Int): Pair<FileInfo, FileInfo> {
+        val baseUrl = getBpeBaseUrl(language, vocabSize)
+        val baseFileName = baseUrl.takeLastWhile { char -> char != '/' }
+        val vocabFilename = "$baseFileName.vocab"
+        val modelFilename = "$baseFileName.model"
+        val vocab = FileInfo(vocabFilename, bpePath.resolve(vocabFilename), "$baseUrl.vocab", "sentencepiece (bpemb) vocab (<10 KB)", language)
+        val model = FileInfo(modelFilename, bpePath.resolve(modelFilename), "$baseUrl.model", "sentencepiece (bpemb) model (<4 MB)", language)
+
+        return vocab to model
+    }
+
+    fun stopwords(language: LanguageSupport): FileInfo =
+        FileInfo(language.toString(), stopwordPath.resolve(language.toString()),"$githubDataUrl/stopwords/$language", "stopwords (<2 KB)", language)
+
+    fun wordfreq(language: LanguageSupport, size: WordFrequencySize): FileInfo {
+        val filename = size.toFileName(language)
+        return FileInfo(filename, wordFrequencyPath.resolve(filename), "$githubDataUrl/wordfreq/$filename", "wordfreq (<3 MB)", language)
+    }
+
+    private fun getBpeBaseUrl(language: LanguageSupport, vocabSize: Int): String =
+        "$bpeUrl/$language/$language.wiki.bpe.vs$vocabSize"
+}
\ No newline at end of file
diff --git a/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencies.kt b/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencies.kt
index 15c45720..9a9182f5 100644
--- a/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencies.kt
+++ b/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencies.kt
@@ -5,7 +5,6 @@ import com.londogard.nlp.utils.DownloadHelper
 import com.londogard.nlp.utils.LanguageSupport
 import java.io.InputStream
 import java.nio.file.Path
-import java.util.zip.GZIPInputStream
 import kotlin.math.log10
 import kotlin.math.pow
 
diff --git a/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencySize.kt b/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencySize.kt
index 9d110d40..9322bba9 100644
--- a/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencySize.kt
+++ b/src/main/kotlin/com/londogard/nlp/wordfreq/WordFrequencySize.kt
@@ -7,7 +7,7 @@ enum class WordFrequencySize {
     Largest, Smallest;
 
     private fun stringify(languageSupport: LanguageSupport): String = when(languageSupport) {
-        nb -> no.toString()
+        no -> nb.toString()
         ba, hr, rs, cs, me -> sh.toString()
         else -> languageSupport.toString()
     }
diff --git a/src/test/kotlin/com/londogard/nlp/CompressionUtilTests.kt b/src/test/kotlin/com/londogard/nlp/CompressionUtilTests.kt
new file mode 100644
index 00000000..a92af87a
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/CompressionUtilTests.kt
@@ -0,0 +1,36 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.utils.CompressionUtil
+import org.amshove.kluent.shouldContain
+import org.amshove.kluent.shouldHaveSize
+import java.nio.file.Files
+import java.nio.file.Paths
+import kotlin.test.Test
+
+class CompressionUtilTests {
+    @Test
+    fun testGunzip() {
+        val lines = CompressionUtil.gunzip(Paths.get(javaClass.getResource("/hej.txt.gz")?.path ?: ""))
+            .bufferedReader()
+            .readLines()
+            .filter(String::isNotBlank)
+
+        lines shouldHaveSize 1
+        lines shouldContain "hej"
+    }
+
+    @Test
+    fun testUncompressTarGz() {
+        val lines = CompressionUtil.uncompressTarGz(
+            Paths.get(javaClass.getResource("/hej.tar.gz")?.path ?: ""),
+            Files.createTempDirectory("tmp")
+        )
+            .toFile()
+            .readLines()
+            .filter(String::isNotBlank)
+
+        lines shouldHaveSize 1
+        lines shouldContain "hej"
+    }
+
+}
\ No newline at end of file
diff --git a/src/test/kotlin/com/londogard/nlp/EmbeddingTest.kt b/src/test/kotlin/com/londogard/nlp/EmbeddingTest.kt
new file mode 100644
index 00000000..d1ed743e
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/EmbeddingTest.kt
@@ -0,0 +1,46 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.embeddings.BpeEmbeddings
+import com.londogard.nlp.embeddings.EmbeddingLoader
+import com.londogard.nlp.embeddings.LightWordEmbeddings
+import com.londogard.nlp.embeddings.WordEmbeddings
+import com.londogard.nlp.utils.LanguageSupport
+import org.amshove.kluent.shouldBe
+import org.amshove.kluent.shouldNotBe
+import java.nio.file.Path
+import kotlin.test.Test
+
+class EmbeddingTest {
+    @Test
+    fun testBpeEmb() {
+        val embeddings = EmbeddingLoader.fromLanguageOrNull<BpeEmbeddings>(LanguageSupport.sv)
+
+        embeddings shouldNotBe null
+
+        embeddings?.vector("hej") shouldNotBe null
+        embeddings?.subwordVector("h") shouldNotBe null
+    }
+
+    @Test
+    fun testLightWordEmbeddings() {
+        val embeddings = LightWordEmbeddings(Path.of(javaClass.getResource("/sv_embeddings_cut.txt")!!.toURI()), maxWordCount = 1)
+
+        embeddings.embeddings.size shouldBe 1
+        embeddings.contains("hej") shouldBe true
+        embeddings.addWords(setOf("då"))
+        embeddings.embeddings.size shouldBe 1
+        embeddings.contains("då") shouldBe true
+        embeddings.contains("hej") shouldBe false
+        embeddings.vector("då")?.numCols() shouldBe 3
+    }
+
+    @Test
+    fun testWordEmbeddings() {
+        val embeddings = WordEmbeddings(Path.of(javaClass.getResource("/sv_embeddings_cut.txt")!!.toURI()))
+        embeddings.embeddings.size shouldBe 2
+        embeddings.contains("hej") shouldBe true
+        embeddings.contains("då") shouldBe true
+        embeddings.contains("Då") shouldBe false
+        embeddings.vector("då")?.numCols() shouldBe 3
+    }
+}
\ No newline at end of file
diff --git a/src/test/kotlin/com/londogard/nlp/SentenceEmbeddingTests.kt b/src/test/kotlin/com/londogard/nlp/SentenceEmbeddingTests.kt
new file mode 100644
index 00000000..77150682
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/SentenceEmbeddingTests.kt
@@ -0,0 +1,41 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.embeddings.BpeEmbeddings
+import com.londogard.nlp.embeddings.EmbeddingLoader
+import com.londogard.nlp.embeddings.WordEmbeddings
+import com.londogard.nlp.embeddings.sentence.AverageSentenceEmbeddings
+import com.londogard.nlp.embeddings.sentence.USifSentenceEmbeddings
+import com.londogard.nlp.utils.LanguageSupport
+import com.londogard.nlp.utils.avgNorm
+import com.londogard.nlp.utils.normalize
+import com.londogard.nlp.wordfreq.WordFrequencies
+import org.amshove.kluent.shouldBe
+import org.amshove.kluent.shouldBeEqualTo
+import org.amshove.kluent.shouldNotBeEqualTo
+import java.nio.file.Path
+import kotlin.test.Test
+
+class SentenceEmbeddingTests {
+    @Test
+    fun testUsifEmbeddings() {
+        val embeddings = EmbeddingLoader.fromLanguageOrNull<BpeEmbeddings>(LanguageSupport.sv)!!
+        val usif = USifSentenceEmbeddings(embeddings, WordFrequencies.getAllWordFrequenciesOrNull(LanguageSupport.sv) ?: emptyMap())
+        val avgSentenceEmbedding = AverageSentenceEmbeddings(embeddings)
+        val embedding = usif.getSentenceEmbedding(listOf("hej", "där", "borta"))
+        val rawData = embedding.fdrm.data
+
+        rawData[0] shouldNotBeEqualTo rawData[1]
+        rawData.size shouldBe 50
+
+        usif.getSentenceEmbedding(listOf("hej", "då")).toString() shouldNotBeEqualTo avgSentenceEmbedding.getSentenceEmbedding(listOf("hej", "då")).toString()
+    }
+
+    @Test
+    fun testAvgSentenceEmbeddings() {
+        val embeddings = WordEmbeddings(Path.of(javaClass.getResource("/sv_embeddings_cut.txt")!!.toURI()))
+        val avgSentenceEmbeddings = AverageSentenceEmbeddings(embeddings)
+
+        embeddings.traverseVectors(listOf("hej", "då")).avgNorm().toString() shouldBeEqualTo avgSentenceEmbeddings.getSentenceEmbedding(listOf("hej", "då")).toString()
+        embeddings.vector("hej")?.normalize()?.toString() shouldBeEqualTo avgSentenceEmbeddings.getSentenceEmbedding(listOf("hej")).toString()
+    }
+}
\ No newline at end of file
diff --git a/src/test/kotlin/com/londogard/nlp/StemmerTests.kt b/src/test/kotlin/com/londogard/nlp/StemmerTests.kt
new file mode 100644
index 00000000..df13e03c
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/StemmerTests.kt
@@ -0,0 +1,22 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.stemmer.Stemmer
+import com.londogard.nlp.utils.LanguageSupport
+import org.amshove.kluent.shouldBeEqualTo
+import kotlin.test.Test
+
+class StemmerTests {
+    @Test
+    fun testStemmer() {
+        val stemmer = Stemmer(LanguageSupport.sv)
+
+        stemmer.stem("hej") shouldBeEqualTo "hej"
+        stemmer.stem("katten") shouldBeEqualTo "katt"
+    }
+
+    @Test
+    fun testStemmerObject() {
+        Stemmer.stem("hej", LanguageSupport.sv) shouldBeEqualTo "hej"
+        Stemmer.stem("katten", LanguageSupport.sv) shouldBeEqualTo "katt"
+    }
+}
\ No newline at end of file
diff --git a/src/test/kotlin/com/londogard/nlp/StopwordTests.kt b/src/test/kotlin/com/londogard/nlp/StopwordTests.kt
new file mode 100644
index 00000000..3503a79d
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/StopwordTests.kt
@@ -0,0 +1,24 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.stopwords.Stopwords
+import com.londogard.nlp.utils.LanguageSupport
+import org.amshove.kluent.shouldBe
+import org.amshove.kluent.shouldContain
+import org.amshove.kluent.shouldNotBe
+import kotlin.test.Test
+
+class StopwordTests {
+    @Test
+    fun testFullStopwords() {
+        val stopwords = Stopwords.stopwordsOrNull(LanguageSupport.tr)
+
+        stopwords shouldNotBe null
+        stopwords!! shouldContain "acaba"
+    }
+
+    @Test
+    fun testStopwords() {
+        Stopwords.isStopword("acaba", LanguageSupport.tr) shouldBe true
+        Stopwords.isStopword("d", LanguageSupport.tr) shouldBe false
+    }
+}
\ No newline at end of file
diff --git a/src/test/kotlin/com/londogard/nlp/TokenizerTests.kt b/src/test/kotlin/com/londogard/nlp/TokenizerTests.kt
new file mode 100644
index 00000000..7379637b
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/TokenizerTests.kt
@@ -0,0 +1,35 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.tokenizer.CharTokenizer
+import com.londogard.nlp.tokenizer.SentencePieceTokenizer
+import com.londogard.nlp.tokenizer.SimpleTokenizer
+import com.londogard.nlp.tokenizer.VocabSize
+import com.londogard.nlp.utils.LanguageSupport
+import org.amshove.kluent.shouldBeEqualTo
+import kotlin.test.Test
+
+class TokenizerTests {
+    @Test
+    fun testCharTokenizer() {
+        val tokenizer = CharTokenizer()
+
+        tokenizer.split("abc") shouldBeEqualTo listOf("a", "b", "c")
+        tokenizer.split("a bc") shouldBeEqualTo listOf("a", " ", "b", "c")
+    }
+
+    @Test
+    fun testSimpleTokenizer() {
+        val tokenizer = SimpleTokenizer()
+
+        tokenizer.split("abc") shouldBeEqualTo listOf("abc")
+        tokenizer.split("a bc") shouldBeEqualTo listOf("a", "bc")
+        tokenizer.split("and, some") shouldBeEqualTo listOf("and", ",", "some")
+    }
+
+    @Test
+    fun testSentencePieceTokenizer() {
+        val tokenizer = SentencePieceTokenizer.fromLanguageSupportAndSizeOrNull(LanguageSupport.sv, VocabSize.v1000)
+
+        tokenizer?.split("hej där borta?") shouldBeEqualTo listOf("▁h", "e", "j", "▁där", "▁b", "or", "ta", "?")
+    }
+}
\ No newline at end of file
diff --git a/src/test/kotlin/com/londogard/nlp/UrlProviderTest.kt b/src/test/kotlin/com/londogard/nlp/UrlProviderTest.kt
new file mode 100644
index 00000000..f4aaf052
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/UrlProviderTest.kt
@@ -0,0 +1,56 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.embeddings.EmbeddingLoader
+import com.londogard.nlp.tokenizer.VocabSize
+import com.londogard.nlp.utils.FileInfo
+import com.londogard.nlp.utils.LanguageSupport
+import com.londogard.nlp.utils.UrlProvider
+import com.londogard.nlp.wordfreq.WordFrequencySize
+import org.amshove.kluent.shouldBe
+import java.net.HttpURLConnection
+import kotlin.test.Test
+
+class UrlProviderTest {
+    @Test
+    fun testFastText() {
+        val fileInfo = UrlProvider.fastText(LanguageSupport.sv)
+        checkUrlWorks(fileInfo) shouldBe true
+    }
+
+    @Test
+    fun testSentencePiece() {
+        val (vocab, model) = UrlProvider.sentencePiece(LanguageSupport.sv, VocabSize.v1000.size)
+
+        checkUrlWorks(vocab) shouldBe true
+        checkUrlWorks(model) shouldBe true
+    }
+
+    @Test
+    fun testBpeEmbedding() {
+        val fileInfo = UrlProvider.bpeEmbedding(LanguageSupport.sv, VocabSize.v1000.size, EmbeddingLoader.BpeDefaultEmbeddingDimension)
+
+        checkUrlWorks(fileInfo) shouldBe true
+    }
+
+    @Test
+    fun testStopwords() {
+        val fileInfo = UrlProvider.stopwords(LanguageSupport.sv)
+
+        checkUrlWorks(fileInfo) shouldBe true
+    }
+
+    @Test
+    fun testWordFreq() {
+        val fileInfo = UrlProvider.wordfreq(LanguageSupport.sv, WordFrequencySize.Smallest)
+
+        checkUrlWorks(fileInfo) shouldBe true
+    }
+
+    private fun checkUrlWorks(fileInfo: FileInfo): Boolean {
+        val url = fileInfo.toUrl()
+        println(fileInfo.url)
+        val connection: HttpURLConnection = (url.openConnection() as HttpURLConnection).apply { requestMethod = "HEAD" }
+
+        return connection.responseCode == 200
+    }
+}
\ No newline at end of file
diff --git a/src/test/kotlin/com/londogard/nlp/WordFrequencyTests.kt b/src/test/kotlin/com/londogard/nlp/WordFrequencyTests.kt
new file mode 100644
index 00000000..b9789fe9
--- /dev/null
+++ b/src/test/kotlin/com/londogard/nlp/WordFrequencyTests.kt
@@ -0,0 +1,24 @@
+package com.londogard.nlp
+
+import com.londogard.nlp.utils.LanguageSupport
+import com.londogard.nlp.wordfreq.WordFrequencies
+import org.amshove.kluent.shouldBe
+import org.amshove.kluent.shouldNotBe
+import kotlin.test.Test
+
+class WordFrequencyTests {
+    @Test fun testFullWordFreq() {
+        val wordFreqNb = WordFrequencies.getAllWordFrequenciesOrNull(LanguageSupport.nb)
+
+        wordFreqNb shouldNotBe null
+        wordFreqNb?.containsKey("er") shouldBe true
+    }
+
+    @Test fun testSingleWordFreq() {
+        WordFrequencies.wordFrequencyOrNull("er", LanguageSupport.nb) shouldNotBe null
+    }
+
+    @Test fun testSingleZipf() {
+        WordFrequencies.zipfFrequencyOrNull("er", LanguageSupport.nb) shouldNotBe null
+    }
+}
\ No newline at end of file
diff --git a/src/test/resources/hej.txt.gz b/src/test/resources/hej.txt.gz
new file mode 100644
index 00000000..86d21900
Binary files /dev/null and b/src/test/resources/hej.txt.gz differ
diff --git a/src/test/resources/sv_embeddings_cut.txt b/src/test/resources/sv_embeddings_cut.txt
new file mode 100644
index 00000000..64970a2c
--- /dev/null
+++ b/src/test/resources/sv_embeddings_cut.txt
@@ -0,0 +1,3 @@
+2 3
+hej 0 1 1
+då 0 2 2
\ No newline at end of file