-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.yaml
83 lines (73 loc) · 2.17 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
LOGGING_LEVEL: "DEBUG"
# Anki deck name
DECK_NAME: "FluentAI"
# The weights for the best mnemonic match
# 1 = 100%
WEIGHTS:
PHONETIC: 1
ORTHOGRAPHIC: 0.66
SEMANTIC: 1
IMAGEABILITY: 1
# The number of words to consider for each similarity metric
WORD_LIMIT: 1000
G2P:
MODEL: "charsiu/g2p_multilingual_byT5_small_100"
TOKENIZER: "google/byt5-small"
LLM:
MODEL: "microsoft/Phi-3-mini-4k-instruct"
TOKENIZER: "microsoft/Phi-3-mini-4k-instruct"
DELETE_AFTER_USE: False
OFFLOAD: True
IMAGE_GEN:
SMALL_MODEL: stabilityai/stable-diffusion-2
MEDIUM_MODEL: stabilityai/sdxl-turbo # 6GB vram
LARGE_MODEL: Efficient-Large-Model/Sana_600M_512px_diffusers # 9GB vram
OUTPUT_DIR: "imagine/generated-img"
DELETE_AFTER_USE: False
OFFLOAD: True
PARAMS:
num_inference_steps: 40
height: 512
width: 512
# Semantic similarity
SEMANTIC_SIM:
MODEL: dunzhang/stella_en_400M_v5
EVAL:
DATASET: "StephanAkkerman/semantic-similarity"
MODELS:
- fasttext
- jinaai/jina-embeddings-v3
- intfloat/multilingual-e5-small
- paraphrase-multilingual-MiniLM-L12-v2
- dunzhang/stella_en_400M_v5 # requires einops xformers (and optionally triton)
- dunzhang/stella_en_1.5B_v5 # very tiny performance increase compared to 400M
IMAGEABILITY:
EMBEDDINGS:
# Model is also used to get the right embeddings dataset in the evaluation
MODEL: paraphrase-multilingual-MiniLM-L12-v2
REPO: StephanAkkerman/imageability-embeddings
EVAL:
DATASET: StephanAkkerman/imageability
MODELS:
- fasttext
- paraphrase-multilingual-MiniLM-L12-v2
PREDICTOR:
REPO: StephanAkkerman/imageability-predictor
FILE: lightgbm-fasttext.joblib
EVAL:
DATASET: StephanAkkerman/imageability
MODELS: # TODO
- OLS
PREDICTIONS:
REPO: StephanAkkerman/imageability-predictions
FILE: predictions.csv
PHONETIC_SIM:
EVAL: "StephanAkkerman/english-words-human-similarity"
IPA:
REPO: "StephanAkkerman/english-words-IPA"
FILE: "en_US_filtered.csv"
EMBEDDINGS:
METHOD: panphon
REPO: StephanAkkerman/english-words-IPA-embeddings
ORTHOGRAPHIC_SIM:
EVAL: "StephanAkkerman/orthographic-similarity-ratings"