-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathes.cfg
executable file
·129 lines (112 loc) · 4.13 KB
/
es.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
##
#### default configuration file for Spanish analyzer
##
#### General options
Lang=es
Locale=default
### Tagset description file, used by different modules
TagsetFile=$FREELINGSHARE/es/tagset.dat
#### Trace options. Only effective if we have compiled with -DVERBOSE
#
## Possible values for TraceModule (may be OR'ed)
#define SPLIT_TRACE 0x00000001
#define TOKEN_TRACE 0x00000002
#define MACO_TRACE 0x00000004
#define OPTIONS_TRACE 0x00000008
#define NUMBERS_TRACE 0x00000010
#define DATES_TRACE 0x00000020
#define PUNCT_TRACE 0x00000040
#define DICT_TRACE 0x00000080
#define SUFF_TRACE 0x00000100
#define LOCUT_TRACE 0x00000200
#define NP_TRACE 0x00000400
#define PROB_TRACE 0x00000800
#define QUANT_TRACE 0x00001000
#define NEC_TRACE 0x00002000
#define AUTOMAT_TRACE 0x00004000
#define TAGGER_TRACE 0x00008000
#define HMM_TRACE 0x00010000
#define RELAX_TRACE 0x00020000
#define RELAX_TAGGER_TRACE 0x00040000
#define CONST_GRAMMAR_TRACE 0x00080000
#define SENSES_TRACE 0x00100000
#define CHART_TRACE 0x00200000
#define GRAMMAR_TRACE 0x00400000
#define DEP_TRACE 0x00800000
#define UTIL_TRACE 0x01000000
TraceLevel=3
TraceModule=0x0000
## Options to control the applied modules. The input may be partially
## processed, or not a full analysis may me wanted. The specific
## formats are a choice of the main program using the library, as well
## as the responsability of calling only the required modules.
## Valid input/output formats are: plain, token, splitted, morfo, tagged, parsed
InputLevel=text
OutputLevel=tagged
# consider each newline as a sentence end
AlwaysFlush=no
#### Tokenizer options
TokenizerFile=$FREELINGSHARE/es/tokenizer.dat
#### Splitter options
SplitterFile=$FREELINGSHARE/es/splitter.dat
#### Morfo options
AffixAnalysis=yes
CompoundAnalysis=yes
MultiwordsDetection=yes
NumbersDetection=yes
PunctuationDetection=yes
DatesDetection=yes
QuantitiesDetection=yes
DictionarySearch=yes
ProbabilityAssignment=yes
DecimalPoint=,
ThousandPoint=.
LocutionsFile=$FREELINGSHARE/es/locucions.dat
QuantitiesFile=$FREELINGSHARE/es/quantities.dat
AffixFile=$FREELINGSHARE/es/afixos.dat
CompoundFile=$FREELINGSHARE/es/compounds.dat
ProbabilityFile=$FREELINGSHARE/es/probabilitats.dat
DictionaryFile=$FREELINGSHARE/es/dicc.src
PunctuationFile=$FREELINGSHARE/common/punct.dat
ProbabilityThreshold=0.001
# NER options
NERecognition=yes
NPDataFile=$FREELINGSHARE/es/np.dat
## comment line above and uncomment one of those below, if you want
## a better NE recognizer (higer accuracy, lower speed)
#NPDataFile=$FREELINGSHARE/es/nerc/ner/ner-ab-poor1.dat
#NPDataFile=$FREELINGSHARE/es/nerc/ner/ner-ab-rich.dat
# "rich" model is trained with rich gazetteer. Offers higher accuracy but
# requires adapting gazetteer files to have high coverage on target corpus.
# "poor1" model is trained with poor gazetteer. Accuracy is splightly lower
# but suffers small accuracy loss the gazetteer has low coverage in target corpus.
# If in doubt, use "poor1" model.
## Phonetic encoding of words.
Phonetics=no
PhoneticsFile=$FREELINGSHARE/es/phonetics.dat
## NEC options. See README in common/nec
NEClassification=no
NECFile=$FREELINGSHARE/es/nerc/nec/nec-ab-poor1.dat
#NECFile=$FREELINGSHARE/es/nerc/nec/nec-ab-rich.dat
## Sense annotation options (none,all,mfs,ukb)
SenseAnnotation=none
SenseConfigFile=$FREELINGSHARE/es/senses.dat
UKBConfigFile=$FREELINGSHARE/es/ukb.dat
#### Tagger options
Tagger=hmm
TaggerHMMFile=$FREELINGSHARE/es/tagger.dat
TaggerRelaxFile=$FREELINGSHARE/es/constr_gram-B.dat
TaggerRelaxMaxIter=500
TaggerRelaxScaleFactor=670.0
TaggerRelaxEpsilon=0.001
TaggerRetokenize=yes
TaggerForceSelect=tagger
#### Parser options
GrammarFile=$FREELINGSHARE/es/chunker/grammar-chunk.dat
#### Dependence Parser options
DependencyParser=txala
DepTxalaFile=$FREELINGSHARE/es/dep_txala/dependences.dat
DepTreelerFile=$FREELINGSHARE/es/dep_treeler/dependences.dat
#### Coreference Solver options
CorefFile=$FREELINGSHARE/es/coref/relaxcor/relaxcor.dat
SemGraphExtractorFile=$FREELINGSHARE/es/semgraph/semgraph-SRL.dat