-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
325 changed files
with
237,192 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
##### Afrikaans ##### | ||
|
||
Legend: 'low accuracy mode | high accuracy mode' | ||
|
||
>>> Accuracy on average: 64.00% | 78.63% | ||
|
||
>> Detection of 1000 single words (average length: 8 chars) | ||
Accuracy: 37.00% | 58.30% | ||
Erroneously classified as | ||
Dutch: 12.80% | 14.10% | ||
German: 2.70% | 2.30% | ||
Latin: 2.60% | 2.00% | ||
English: 2.10% | 1.90% | ||
Danish: 2.10% | 1.90% | ||
Bokmal: 2.70% | 1.60% | ||
Welsh: 0.70% | 1.10% | ||
Nynorsk: 1.50% | 1.00% | ||
Swedish: 1.50% | 0.90% | ||
Estonian: 1.20% | 0.90% | ||
Lithuanian: 1.30% | 0.70% | ||
Italian: 0.40% | 0.70% | ||
Zulu: 1.20% | 0.70% | ||
Tswana: 1.50% | 0.70% | ||
French: 1.40% | 0.60% | ||
Basque: 1.40% | 0.60% | ||
Ganda: 1.10% | 0.60% | ||
Oromo: 2.00% | 0.60% | ||
Turkish: 0.50% | 0.60% | ||
Sotho: 1.00% | 0.60% | ||
Swahili: 0.60% | 0.50% | ||
Portuguese: 0.90% | 0.50% | ||
Romanian: 0.90% | 0.50% | ||
Tsonga: 1.40% | 0.50% | ||
Esperanto: 1.00% | 0.50% | ||
Xhosa: 0.40% | 0.50% | ||
Latvian: 1.30% | 0.40% | ||
Finnish: 2.30% | 0.40% | ||
Yoruba: 0.70% | 0.40% | ||
Polish: 1.10% | 0.40% | ||
Shona: 0.80% | 0.40% | ||
Icelandic: 0.90% | 0.30% | ||
Malay: 0.80% | 0.30% | ||
Indonesian: 0.40% | 0.30% | ||
Irish: 0.40% | 0.30% | ||
Somali: 1.00% | 0.30% | ||
Maori: 0.60% | 0.20% | ||
Catalan: 0.40% | 0.20% | ||
Tagalog: 1.30% | 0.10% | ||
Slovak: 0.50% | 0.10% | ||
Spanish: 0.70% | 0.10% | ||
Bosnian: 0.10% | 0.10% | ||
Hungarian: 0.60% | 0.10% | ||
Croatian: 0.50% | 0.10% | ||
Vietnamese: 0.40% | 0.10% | ||
Azerbaijani: 0.40% | 0.00% | ||
Czech: 0.40% | 0.00% | ||
Albanian: 0.40% | 0.00% | ||
Slovene: 0.10% | 0.00% | ||
|
||
|
||
>> Detection of 1000 word pairs (average length: 15 chars) | ||
Accuracy: 62.20% | 80.80% | ||
Erroneously classified as | ||
Dutch: 13.30% | 11.00% | ||
English: 0.80% | 1.30% | ||
German: 2.70% | 1.10% | ||
Latin: 1.00% | 0.80% | ||
Danish: 1.30% | 0.70% | ||
Bokmal: 1.90% | 0.40% | ||
Estonian: 1.40% | 0.30% | ||
Sotho: 0.60% | 0.30% | ||
Yoruba: 0.80% | 0.30% | ||
Nynorsk: 0.40% | 0.30% | ||
Swedish: 1.40% | 0.20% | ||
Tsonga: 0.70% | 0.20% | ||
Finnish: 1.70% | 0.20% | ||
Ganda: 0.40% | 0.20% | ||
Italian: 0.40% | 0.20% | ||
Welsh: 0.70% | 0.20% | ||
Oromo: 0.40% | 0.20% | ||
Swahili: 0.00% | 0.10% | ||
Tagalog: 0.40% | 0.10% | ||
French: 0.50% | 0.10% | ||
Hungarian: 0.20% | 0.10% | ||
Portuguese: 0.80% | 0.10% | ||
Malay: 0.20% | 0.10% | ||
Turkish: 0.10% | 0.10% | ||
Esperanto: 0.50% | 0.10% | ||
Shona: 0.50% | 0.10% | ||
Tswana: 0.20% | 0.10% | ||
Catalan: 0.10% | 0.10% | ||
Bosnian: 0.10% | 0.10% | ||
Spanish: 0.10% | 0.10% | ||
Lithuanian: 0.70% | 0.00% | ||
Romanian: 0.40% | 0.00% | ||
Xhosa: 0.20% | 0.00% | ||
Maori: 0.30% | 0.00% | ||
Basque: 0.40% | 0.00% | ||
Indonesian: 0.30% | 0.00% | ||
Somali: 0.20% | 0.00% | ||
Azerbaijani: 0.20% | 0.00% | ||
Czech: 0.10% | 0.00% | ||
Albanian: 0.30% | 0.00% | ||
Latvian: 0.40% | 0.00% | ||
Polish: 0.40% | 0.00% | ||
Slovak: 0.10% | 0.00% | ||
Zulu: 0.10% | 0.00% | ||
Slovene: 0.10% | 0.00% | ||
|
||
|
||
>> Detection of 1000 sentences (average length: 101 chars) | ||
Accuracy: 92.80% | 96.80% | ||
Erroneously classified as | ||
Dutch: 5.10% | 2.60% | ||
German: 0.20% | 0.20% | ||
Sotho: 0.00% | 0.10% | ||
Latin: 0.10% | 0.10% | ||
Danish: 0.00% | 0.10% | ||
English: 0.20% | 0.10% | ||
Welsh: 0.20% | 0.00% | ||
Tswana: 0.10% | 0.00% | ||
Estonian: 0.40% | 0.00% | ||
Tsonga: 0.10% | 0.00% | ||
Bokmal: 0.20% | 0.00% | ||
Yoruba: 0.10% | 0.00% | ||
Catalan: 0.10% | 0.00% | ||
Ganda: 0.10% | 0.00% | ||
Oromo: 0.10% | 0.00% | ||
Finnish: 0.10% | 0.00% | ||
Hungarian: 0.10% | 0.00% | ||
|
||
|
||
>> Exact values: 64 37 62.2 92.80000000000001 78.63333333333334 58.3 80.80000000000001 96.8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
31 changes: 31 additions & 0 deletions
31
tests/Lingua.AccuracyReport.Tests/AbstractLanguageDetectionAccuracyReport.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
using Lingua.Api; | ||
|
||
namespace Lingua.AccuracyReport.Tests; | ||
|
||
public abstract class AbstractLanguageDetectionAccuracyReport | ||
{ | ||
private readonly LanguageDetectionStatistics _statistics; | ||
|
||
public abstract void SingleWordsAreIdentifiedCorrectly(string singleWord); | ||
public abstract void WordPairsAreIdentifiedCorrectly(string wordPair); | ||
public abstract void EntireSentencesAreIdentifiedCorrectly(string wordPair); | ||
|
||
protected AbstractLanguageDetectionAccuracyReport( | ||
Language language, | ||
Implementation implementation, | ||
LanguageDetectionStatistics statistics) | ||
{ | ||
_statistics = statistics; | ||
_statistics.Language = language; | ||
_statistics.Implementation = implementation; | ||
} | ||
|
||
protected void ComputeSingleWordStatistics(string singleWord) => | ||
_statistics.ComputeSingleWordStatistics(singleWord); | ||
|
||
protected void ComputeWordPairStatistics(string wordPair) => | ||
_statistics.ComputeWordPairStatistics(wordPair); | ||
|
||
protected void ComputeSentenceStatistics(string sentence) => | ||
_statistics.ComputeSentenceStatistics(sentence); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace Lingua.AccuracyReport.Tests; | ||
|
||
public enum Implementation | ||
{ | ||
Lingua | ||
} |
Oops, something went wrong.