From 367bb6d2ad32f19bbefca1c1ebd27582c208ea19 Mon Sep 17 00:00:00 2001 From: Basel Mousi Date: Tue, 22 Aug 2023 15:04:23 +0300 Subject: [PATCH 1/3] modified machine translation assets --- .../MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py | 24 +++++++++++-------- .../MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py | 23 ++++++++++-------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py index 486bb51c..40b7bd28 100644 --- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py +++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py @@ -43,13 +43,13 @@ def config(): "madar.test.nil.0.eg", "madar.test.nil.0.sd", "madar.test.nil.1.eg", - "madar.test.nil.2.eg", - "summa-2M.test.mgr.0.ma", - "summa-AJ.test.msa.0.ms", - "summa-BBC.test.msa.0.ms", - "summa-LBC.test.lev.0.lb", - "summa-Oman.test.glf.0.om", - ] + "madar.test.nil.2.eg"] + # "summa-2M.test.mgr.0.ma", + # "summa-AJ.test.msa.0.ms", + # "summa-BBC.test.msa.0.ms", + # "summa-LBC.test.lev.0.lb", + # "summa-Oman.test.glf.0.om", + # ] configs = [] for testset in sets: configs.append( @@ -84,14 +84,18 @@ def prompt(input_sample): return [ { "role": "system", - "content": "You are an AI assistant that helps people find information.", + "content": "You are an expert translator specialized in translating texts from Arabic to English. You are concise as you only output the translation of the text without any illustrations or extra details", }, { "role": "user", - "content": f"Translate the following to English, output only the translation:\n {input_sample}", + "content": f"Translate the following text to English.\nText: {input_sample}\nTranslation: ", }, ] def post_process(response): - return response["choices"][0]["message"]["content"] + response = response["choices"][0]["message"]["content"] + response = response.replace('"', "") + response = response.strip() + return response + diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py index 245c9b46..a58ea9cd 100644 --- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py +++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py @@ -43,13 +43,13 @@ def config(): "madar.test.nil.0.eg", "madar.test.nil.0.sd", "madar.test.nil.1.eg", - "madar.test.nil.2.eg", - "summa-2M.test.mgr.0.ma", - "summa-AJ.test.msa.0.ms", - "summa-BBC.test.msa.0.ms", - "summa-LBC.test.lev.0.lb", - "summa-Oman.test.glf.0.om", - ] + "madar.test.nil.2.eg"] + #"summa-2M.test.mgr.0.ma", + #"summa-AJ.test.msa.0.ms", + #"summa-BBC.test.msa.0.ms", + #"summa-LBC.test.lev.0.lb", + #"summa-Oman.test.glf.0.om", + configs = [] for testset in sets: configs.append( @@ -82,15 +82,18 @@ def config(): def prompt(input_sample): return { - "system_message": "You are an AI assistant that helps people find information.", + "system_message": "You are an expert translator specialized in translating texts from Arabic to English. You are concise as you only output the translation of the text without any illustrations or extra details", "messages": [ { "sender": "user", - "text": f"Translate the following to English, output only the translation:\n {input_sample}", + "text": f"Translate the following text to English.\nText: {input_sample}\nTranslation: ", } ], } def post_process(response): - return response["choices"][0]["text"] + response = response["choices"][0]["text"] + response = response.replace('"', "") + response = response.strip() + return response From 4e123dc3466b58b58caac0269d45e366df10e221 Mon Sep 17 00:00:00 2001 From: Basel Mousi Date: Sun, 27 Aug 2023 15:16:28 +0300 Subject: [PATCH 2/3] removed private data files --- .../benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py | 6 ------ .../benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py | 7 +------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py index 40b7bd28..9cca70ae 100644 --- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py +++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py @@ -44,12 +44,6 @@ def config(): "madar.test.nil.0.sd", "madar.test.nil.1.eg", "madar.test.nil.2.eg"] - # "summa-2M.test.mgr.0.ma", - # "summa-AJ.test.msa.0.ms", - # "summa-BBC.test.msa.0.ms", - # "summa-LBC.test.lev.0.lb", - # "summa-Oman.test.glf.0.om", - # ] configs = [] for testset in sets: configs.append( diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py index a58ea9cd..b992f28b 100644 --- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py +++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py @@ -44,11 +44,6 @@ def config(): "madar.test.nil.0.sd", "madar.test.nil.1.eg", "madar.test.nil.2.eg"] - #"summa-2M.test.mgr.0.ma", - #"summa-AJ.test.msa.0.ms", - #"summa-BBC.test.msa.0.ms", - #"summa-LBC.test.lev.0.lb", - #"summa-Oman.test.glf.0.om", configs = [] for testset in sets: @@ -96,4 +91,4 @@ def post_process(response): response = response["choices"][0]["text"] response = response.replace('"', "") response = response.strip() - return response + return response \ No newline at end of file From c219f15e28e378aae9996f91a599b6bfb84494a2 Mon Sep 17 00:00:00 2001 From: Fahim Imaduddin Dalvi Date: Sun, 27 Aug 2023 16:42:23 +0300 Subject: [PATCH 3/3] Format code --- .../MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py | 4 ++-- .../benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py index 5724562b..1e2e343f 100644 --- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py +++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT4_ZeroShot.py @@ -43,7 +43,8 @@ def config(): "madar.test.nil.0.eg", "madar.test.nil.0.sd", "madar.test.nil.1.eg", - "madar.test.nil.2.eg"] + "madar.test.nil.2.eg", + ] configs = [] for testset in sets: configs.append( @@ -92,4 +93,3 @@ def post_process(response): response = response.replace('"', "") response = response.strip() return response - diff --git a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py index 89bcb8d2..2631e402 100644 --- a/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py +++ b/assets/benchmark_v1/MT/AraBench_Ara2Eng_ChatGPT_ZeroShot.py @@ -43,7 +43,8 @@ def config(): "madar.test.nil.0.eg", "madar.test.nil.0.sd", "madar.test.nil.1.eg", - "madar.test.nil.2.eg"] + "madar.test.nil.2.eg", + ] configs = [] for testset in sets: @@ -88,7 +89,7 @@ def prompt(input_sample): def post_process(response): - response = response["choices"][0]["text"] - response = response.replace('"', "") + response = response["choices"][0]["text"] + response = response.replace('"', "") response = response.strip() - return response \ No newline at end of file + return response