Skip to content

Commit

Permalink
added grid search to pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
ayarotskyi committed Jul 1, 2024
1 parent 25071bd commit 40ee264
Showing 1 changed file with 35 additions and 7 deletions.
42 changes: 35 additions & 7 deletions baseline-retrieval-system/tira-retrieval-system.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,40 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 14,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"ir-lab-sose-2024/ir-acl-anthology-20240504-training documents: 0%| | 0/126958 [00:00<?, ?it/s]/tmp/ipykernel_1917/3762444819.py:15: DeprecationWarning: specifying meta and meta_lengths in IterDictIndexer.index() is deprecated, use constructor instead\n",
" index_ref = indexer.index(pt_dataset.get_corpus_iter(),\n",
"ir-lab-sose-2024/ir-acl-anthology-20240504-training documents: 71%|███████ | 90188/126958 [00:20<00:05, 6715.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10:52:09.990 [ForkJoinPool-1-worker-3] WARN org.terrier.structures.indexing.Indexer - Adding an empty document to the index (2020.mir_conference-2020.1) - further warnings are suppressed\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"ir-lab-sose-2024/ir-acl-anthology-20240504-training documents: 100%|██████████| 126958/126958 [00:25<00:00, 4903.30it/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10:52:22.976 [ForkJoinPool-1-worker-3] WARN org.terrier.structures.indexing.Indexer - Indexed 3 empty documents\n"
]
}
],
"source": [
"# The dataset: the union of the IR Anthology and the ACL Anthology\n",
"# This line creates an IRDSDataset object and registers it under the name provided as an argument.\n",
Expand Down Expand Up @@ -198,8 +229,8 @@
"df_sorted = df.sort_values('nDCG@10', ascending=False)\n",
"print(df_sorted)\n",
"best_configuration = df_sorted['run'].values[0]\n",
"best_b = 0.1\n",
"best_k_1 = 1.9"
"best_b = best_configuration.split(\"-\")[1].split(\"=\")[1]\n",
"best_k_1 = best_configuration.split(\"-\")[2].split(\"=\")[1]"
]
},
{
Expand All @@ -208,9 +239,6 @@
"metadata": {},
"outputs": [],
"source": [
"best_b = 0.1\n",
"best_k_1 = 1.9\n",
"\n",
"bm25 = pt.BatchRetrieve(index, wmodel=\"BM25\", controls= {\"bm25.b\" : best_b, \"bm25.k_1\": best_k_1})\n",
"sdm = pt.rewrite.SDM()"
]
Expand Down

0 comments on commit 40ee264

Please sign in to comment.