From badd06bfe0a7b0565da50e7960e1801394c83506 Mon Sep 17 00:00:00 2001 From: SumukhSKashyap Date: Tue, 14 May 2024 14:26:28 +0000 Subject: [PATCH] ntrial2 --- .../language_identification_stopwords.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/language-identification-stopwords/language_identification_stopwords.py b/language-identification-stopwords/language_identification_stopwords.py index f9fe535..30f6561 100644 --- a/language-identification-stopwords/language_identification_stopwords.py +++ b/language-identification-stopwords/language_identification_stopwords.py @@ -21,6 +21,10 @@ # Define language IDs lang_ids = ["af", "az", "bg", "cs", "da", "de", "el", "en", "es", "fi", "fr", "hr", "it", "ko", "nl", "no", "pl", "ru", "ur", "zh"] + # Check if output directory exists, create if not + output_directory = get_output_directory(str(Path(__file__).parent)) + Path(output_directory).mkdir(parents=True, exist_ok=True) + # Convert text data into character n-grams vectorizer = CountVectorizer(analyzer='char', ngram_range=(3, 3)) X = vectorizer.fit_transform(text_validation['text']) @@ -35,8 +39,11 @@ # Create DataFrame for predictions prediction_df = pd.DataFrame({'lang': prediction, 'id': text_validation['id']}) - # saving the prediction - output_directory = get_output_directory(str(Path(__file__).parent)) - prediction_df.to_json( - Path(output_directory) / "predictions.jsonl", orient="records", lines=True - ) + try: + # saving the prediction + prediction_df.to_json( + Path(output_directory) / "predictions.jsonl", orient="records", lines=True + ) + print("Predictions saved successfully.") + except Exception as e: + print(f"Error occurred while saving predictions: {e}")