diff --git a/examples/featurize.py b/examples/featurize.py index adfa781a..328c7e7e 100644 --- a/examples/featurize.py +++ b/examples/featurize.py @@ -41,7 +41,6 @@ tiny_juries_feature_builder = FeatureBuilder( input_df = tiny_juries_df, grouping_keys = ["batch_num", "round_num"], - vector_directory = "./vector_data/", output_file_base = "jury_TINY_output", # Naming output files using the output_file_base parameter (recommended) turns = False, custom_features = [ @@ -56,7 +55,6 @@ tiny_multi_task_feature_builder = FeatureBuilder( input_df = tiny_multi_task_df, conversation_id_col = "stageId", - vector_directory = "./vector_data/", # alternatively, you can name each output file separately. NOTE, however, that we don't directly use this path; # we modify the path to place outputs within the `output/chat`, `output/conv`, and `output/user` folders. output_file_path_chat_level = "./multi_task_TINY_output_chat_level_stageId_cumulative.csv", diff --git a/pyproject.toml b/pyproject.toml index e10f2581..9461e0a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "team_comm_tools" -version = "0.1.4.post1" +version = "0.1.4.post2" requires-python = ">= 3.10" dependencies = [ "chardet>=3.0.4", diff --git a/src/team_comm_tools/feature_builder.py b/src/team_comm_tools/feature_builder.py index cb3500d9..bd600064 100644 --- a/src/team_comm_tools/feature_builder.py +++ b/src/team_comm_tools/feature_builder.py @@ -99,14 +99,14 @@ class FeatureBuilder: def __init__( self, input_df: pd.DataFrame, - vector_directory: "./vector_data/", - output_file_base = "output", - output_file_path_chat_level = None, - output_file_path_user_level = None, - output_file_path_conv_level = None, + vector_directory: str = "./vector_data/", + output_file_base: str = "output", + output_file_path_chat_level: str = None, + output_file_path_user_level: str = None, + output_file_path_conv_level: str = None, custom_features: list = [], analyze_first_pct: list = [1.0], - turns: bool=False, + turns: bool = False, conversation_id_col: str = "conversation_num", speaker_id_col: str = "speaker_nickname", message_col: str = "message", diff --git a/tests/data/cleaned_data/test_chat_level.csv b/tests/data/cleaned_data/test_chat_level.csv index e4c82b35..b1ea3374 100644 --- a/tests/data/cleaned_data/test_chat_level.csv +++ b/tests/data/cleaned_data/test_chat_level.csv @@ -678,4 +678,16 @@ I respond to that too",num_block_quote_responses,2.0 0,0,maybe I guess possibly sort of a little,hedge_words_lexical_wordcount,5 0,0,a little possibly I think sort of probably,hedge_words_lexical_wordcount,5 0,0,probably sort of,hedge_words_lexical_wordcount,2 -0,0,I think,hedge_words_lexical_wordcount,1 \ No newline at end of file +0,0,I think,hedge_words_lexical_wordcount,1 +0,0,I love dogs,positive_bert,0.932 +0,0,I love dogs!!!,positive_bert,0.980 +0,0,I love dogs,negative_bert,0.012 +0,0,I love dogs!!!,negative_bert,0.004 +0,0,This is relatively neutral,neutral_bert,0.470 +0,0,This is relatively neutral,positive_bert,0.505 +0,0,This is relatively neutral...,neutral_bert,0.492 +0,0,What the heck is going on,negative_bert,0.541 +0,0,What the heck is going on??,negative_bert,0.730 +0,0,!!!,positive_bert,0.917 +0,0,.,neutral_bert,0.577 +0,0,???,neutral_bert,0.713 \ No newline at end of file diff --git a/tests/run_tests.py b/tests/run_tests.py index aadd767c..ec0aabc0 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -43,7 +43,8 @@ "Forward Flow", "Discursive Diversity" ], - turns = False + turns = False, + regenerate_vectors=True ) testing_chat.featurize()