Last active
May 4, 2024 11:22
-
-
Save mattt/36357b7ffe9370d6fc573088aed0d1f5 to your computer and use it in GitHub Desktop.
NaturalLanguage Framework - NLTagger Language Support Matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import NaturalLanguage | |
let languages: [NLLanguage] = [ | |
.amharic, .arabic, .armenian, .bengali, .bulgarian, | |
.burmese, .catalan, .cherokee, .croatian, .czech, | |
.danish, .dutch, .english, .finnish, .french, | |
.georgian, .german, .greek, .gujarati, .hebrew, | |
.hindi, .hungarian, .icelandic, .indonesian, .italian, | |
.japanese, .kannada, .khmer, .korean, .lao, | |
.malay, .malayalam, .marathi, .mongolian, .norwegian, | |
.oriya, .persian, .polish, .portuguese, .punjabi, | |
.romanian, .russian, .simplifiedChinese, .sinhalese, .slovak, | |
.spanish, .swedish, .tamil, .telugu, .thai, | |
.tibetan, .traditionalChinese, .turkish, .ukrainian, .urdu, | |
.vietnamese | |
].sorted { $0.rawValue < $1.rawValue } | |
let tagSchemes: [NLTagScheme] = [ | |
.language, .script, .tokenType, .lexicalClass, .nameType, | |
.nameTypeOrLexicalClass, .lemma, .sentimentScore | |
] | |
let columns = ["bcp47"] + tagSchemes.map { $0.rawValue } | |
print(columns.joined(separator: ",")) | |
for language in languages { | |
let units: Set<NLTokenUnit> = [.document, .paragraph, .sentence, .word] | |
let availableTagSchemes: Set<NLTagScheme> = Set(units.flatMap { unit in | |
NLTagger.availableTagSchemes(for: unit, language: language) | |
}) | |
let row = [language.rawValue] + tagSchemes.map { availableTagSchemes.contains($0) ? "true" : "false" } | |
print(row.joined(separator: ",")) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bcp47 | Language | Script | TokenType | LexicalClass | NameType | NameTypeOrLexicalClass | Lemma | Sentiment | |
---|---|---|---|---|---|---|---|---|---|
am | true | true | true | false | false | false | false | false | |
ar | true | true | true | false | false | false | false | false | |
bg | true | true | true | false | false | false | false | false | |
bn | true | true | true | false | false | false | false | false | |
bo | true | true | true | false | false | false | false | false | |
ca | true | true | true | false | false | false | false | false | |
chr | true | true | true | false | false | false | false | false | |
cs | true | true | true | false | false | false | false | false | |
da | true | true | true | false | false | false | false | false | |
de | true | true | true | true | true | true | true | true | |
el | true | true | true | false | false | false | false | false | |
en | true | true | true | true | true | true | true | true | |
es | true | true | true | true | true | true | true | true | |
fa | true | true | true | false | false | false | false | false | |
fi | true | true | true | false | false | false | false | false | |
fr | true | true | true | true | true | true | true | true | |
gu | true | true | true | false | false | false | false | false | |
he | true | true | true | false | false | false | false | false | |
hi | true | true | true | false | false | false | false | false | |
hr | true | true | true | false | false | false | false | false | |
hu | true | true | true | false | false | false | false | false | |
hy | true | true | true | false | false | false | false | false | |
id | true | true | true | false | false | false | false | false | |
is | true | true | true | false | false | false | false | false | |
it | true | true | true | true | true | true | true | true | |
ja | true | true | true | false | false | false | false | false | |
ka | true | true | true | false | false | false | false | false | |
km | true | true | true | false | false | false | false | false | |
kn | true | true | true | false | false | false | false | false | |
ko | true | true | true | false | false | false | false | false | |
lo | true | true | true | false | false | false | false | false | |
ml | true | true | true | false | false | false | false | false | |
mn-Mong | true | true | true | false | false | false | false | false | |
mr | true | true | true | false | false | false | false | false | |
ms | true | true | true | false | false | false | false | false | |
my | true | true | true | false | false | false | false | false | |
nb | true | true | true | false | false | false | false | false | |
nl | true | true | true | false | false | false | false | false | |
or | true | true | true | false | false | false | false | false | |
pa-Guru | true | true | true | false | false | false | false | false | |
pl | true | true | true | false | false | false | false | false | |
pt | true | true | true | true | true | true | true | true | |
ro | true | true | true | false | false | false | false | false | |
ru | true | true | true | true | true | true | true | false | |
si | true | true | true | false | false | false | false | false | |
sk | true | true | true | false | false | false | false | false | |
sv | true | true | true | false | false | false | false | false | |
ta | true | true | true | false | false | false | false | false | |
te | true | true | true | false | false | false | false | false | |
th | true | true | true | false | false | false | false | false | |
tr | true | true | true | true | true | true | true | false | |
uk | true | true | true | false | false | false | false | false | |
ur | true | true | true | false | false | false | false | false | |
vi | true | true | true | false | false | false | false | false | |
zh-Hans | true | true | true | false | false | false | false | false | |
zh-Hant | true | true | true | false | false | false | false | false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
am | true | true | true | false | false | false | false | false | |
---|---|---|---|---|---|---|---|---|---|
ar | true | true | true | false | false | false | false | false | |
bg | true | true | true | false | false | false | false | false | |
bn | true | true | true | false | false | false | false | false | |
bo | true | true | true | false | false | false | false | false | |
ca | true | true | true | false | false | false | false | false | |
chr | true | true | true | false | false | false | false | false | |
cs | true | true | true | false | false | false | false | false | |
da | true | true | true | false | false | false | false | false | |
de | true | true | true | true | true | true | true | true | |
el | true | true | true | false | false | false | false | false | |
en | true | true | true | true | true | true | true | true | |
es | true | true | true | true | true | true | true | true | |
fa | true | true | true | false | false | false | false | false | |
fi | true | true | true | false | false | false | false | false | |
fr | true | true | true | true | true | true | true | true | |
gu | true | true | true | false | false | false | false | false | |
he | true | true | true | false | false | false | false | false | |
hi | true | true | true | false | false | false | false | false | |
hr | true | true | true | false | false | false | false | false | |
hu | true | true | true | false | false | false | false | false | |
hy | true | true | true | false | false | false | false | false | |
id | true | true | true | false | false | false | false | false | |
is | true | true | true | false | false | false | false | false | |
it | true | true | true | true | true | true | true | true | |
ja | true | true | true | false | false | false | false | false | |
ka | true | true | true | false | false | false | false | false | |
km | true | true | true | false | false | false | false | false | |
kn | true | true | true | false | false | false | false | false | |
ko | true | true | true | false | false | false | false | false | |
lo | true | true | true | false | false | false | false | false | |
ml | true | true | true | false | false | false | false | false | |
mn-Mong | true | true | true | false | false | false | false | false | |
mr | true | true | true | false | false | false | false | false | |
ms | true | true | true | false | false | false | false | false | |
my | true | true | true | false | false | false | false | false | |
nb | true | true | true | false | false | false | false | false | |
nl | true | true | true | false | false | false | false | false | |
or | true | true | true | false | false | false | false | false | |
pa-Guru | true | true | true | false | false | false | false | false | |
pl | true | true | true | false | false | false | false | false | |
pt | true | true | true | true | true | true | true | true | |
ro | true | true | true | false | false | false | false | false | |
ru | true | true | true | true | true | true | true | false | |
si | true | true | true | false | false | false | false | false | |
sk | true | true | true | false | false | false | false | false | |
sv | true | true | true | false | false | false | false | false | |
ta | true | true | true | false | false | false | false | false | |
te | true | true | true | false | false | false | false | false | |
th | true | true | true | false | false | false | false | false | |
tr | true | true | true | true | true | true | true | false | |
uk | true | true | true | false | false | false | false | false | |
ur | true | true | true | false | false | false | false | false | |
vi | true | true | true | false | false | false | false | false | |
zh-Hans | true | true | true | false | false | false | false | false | |
zh-Hant | true | true | true | false | false | false | false | false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bcp47 | Language | Script | TokenType | LexicalClass | NameType | NameTypeOrLexicalClass | Lemma | Sentiment | |
---|---|---|---|---|---|---|---|---|---|
am | true | true | true | false | false | false | false | false | |
ar | true | true | true | false | false | false | false | false | |
bg | true | true | true | false | false | false | false | false | |
bn | true | true | true | false | false | false | false | false | |
bo | true | true | true | false | false | false | false | false | |
ca | true | true | true | false | false | false | false | false | |
chr | true | true | true | false | false | false | false | false | |
cs | true | true | true | false | false | false | false | false | |
da | true | true | true | false | false | false | false | false | |
de | true | true | true | false | false | false | false | false | |
el | true | true | true | false | false | false | false | false | |
en | true | true | true | true | true | true | false | false | |
es | true | true | true | false | false | false | false | false | |
fa | true | true | true | false | false | false | false | false | |
fi | true | true | true | false | false | false | false | false | |
fr | true | true | true | false | false | false | false | false | |
gu | true | true | true | false | false | false | false | false | |
he | true | true | true | false | false | false | false | false | |
hi | true | true | true | false | false | false | false | false | |
hr | true | true | true | false | false | false | false | false | |
hu | true | true | true | false | false | false | false | false | |
hy | true | true | true | false | false | false | false | false | |
id | true | true | true | false | false | false | false | false | |
is | true | true | true | false | false | false | false | false | |
it | true | true | true | false | false | false | false | false | |
ja | true | true | true | false | false | false | false | false | |
ka | true | true | true | false | false | false | false | false | |
km | true | true | true | false | false | false | false | false | |
kn | true | true | true | false | false | false | false | false | |
ko | true | true | true | false | false | false | false | false | |
lo | true | true | true | false | false | false | false | false | |
ml | true | true | true | false | false | false | false | false | |
mn-Mong | true | true | true | false | false | false | false | false | |
mr | true | true | true | false | false | false | false | false | |
ms | true | true | true | false | false | false | false | false | |
my | true | true | true | false | false | false | false | false | |
nb | true | true | true | false | false | false | false | false | |
nl | true | true | true | false | false | false | false | false | |
or | true | true | true | false | false | false | false | false | |
pa-Guru | true | true | true | false | false | false | false | false | |
pl | true | true | true | false | false | false | false | false | |
pt | true | true | true | false | false | false | false | false | |
ro | true | true | true | false | false | false | false | false | |
ru | true | true | true | false | false | false | false | false | |
si | true | true | true | false | false | false | false | false | |
sk | true | true | true | false | false | false | false | false | |
sv | true | true | true | false | false | false | false | false | |
ta | true | true | true | false | false | false | false | false | |
te | true | true | true | false | false | false | false | false | |
th | true | true | true | false | false | false | false | false | |
tr | true | true | true | false | false | false | false | false | |
uk | true | true | true | false | false | false | false | false | |
ur | true | true | true | false | false | false | false | false | |
vi | true | true | true | false | false | false | false | false | |
zh-Hans | true | true | true | false | false | false | false | false | |
zh-Hant | true | true | true | false | false | false | false | false |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment