forked from microsoft/rag-experiment-accelerator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.sample.json
64 lines (64 loc) · 1.87 KB
/
config.sample.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
"index_name_prefix": "idx",
"experiment_name": "",
"job_name": "",
"job_description": "",
"preprocess": false,
"chunking": {
"chunk_size": [1000],
"overlap_size": [200],
"generate_title": false,
"generate_summary": false,
"override_content_with_summary": false
},
"embedding_models": [
{
"type": "sentence-transformer",
"model_name": "all-mpnet-base-v2"
}
],
"ef_construction": [400],
"ef_search": [400],
"language":{
"analyzers": {
"analyzer_name": "en.microsoft",
"index_analyzer_name": "",
"search_analyzer_name": "",
"char_filters": [],
"tokenizers": [],
"token_filters": []
},
"query_language": "en-us"
},
"rerank": true,
"rerank_type": "crossencoder",
"llm_re_rank_threshold": 3,
"cross_encoder_at_k": 4,
"crossencoder_model": "cross-encoder/stsb-roberta-base",
"search_types": [
"search_for_manual_hybrid",
"search_for_match_Hybrid_multi",
"search_for_match_semantic"
],
"retrieve_num_of_documents": 5,
"metric_types": [
"fuzzy",
"bert_all_MiniLM_L6_v2",
"cosine",
"bert_distilbert_base_nli_stsb_mean_tokens",
"llm_answer_relevance",
"llm_context_precision"
],
"azure_oai_chat_deployment_name": "gpt-35-turbo",
"azure_oai_eval_deployment_name": "gpt-35-turbo",
"openai_temperature": 0,
"search_relevancy_threshold": 0.8,
"data_formats": "all",
"eval_data_jsonl_file_path": "./artifacts/eval_data.jsonl",
"chunking_strategy": "basic",
"chain_of_thoughts": true,
"hyde": "disabled",
"query_expansion": false,
"min_query_expansion_related_question_similarity_score": 90,
"azure_document_intelligence_model": "prebuilt-read"
}