diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index 768b4a2e..8993482e 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/examples.doctree b/docs/build/doctrees/examples.doctree index 68951273..06563bc8 100644 Binary files a/docs/build/doctrees/examples.doctree and b/docs/build/doctrees/examples.doctree differ diff --git a/docs/build/doctrees/feature_builder.doctree b/docs/build/doctrees/feature_builder.doctree index 66243b7b..d1a34a70 100644 Binary files a/docs/build/doctrees/feature_builder.doctree and b/docs/build/doctrees/feature_builder.doctree differ diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree index 9cddc1f9..d3f131a1 100644 Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ diff --git a/docs/build/doctrees/utils/check_embeddings.doctree b/docs/build/doctrees/utils/check_embeddings.doctree index 6441dca4..76a641b7 100644 Binary files a/docs/build/doctrees/utils/check_embeddings.doctree and b/docs/build/doctrees/utils/check_embeddings.doctree differ diff --git a/docs/build/html/_sources/examples.rst.txt b/docs/build/html/_sources/examples.rst.txt index 8d4e89e1..b65bc187 100644 --- a/docs/build/html/_sources/examples.rst.txt +++ b/docs/build/html/_sources/examples.rst.txt @@ -90,7 +90,7 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de output_file_path_conv_level = "./jury_output_conversation_level.csv", turns = True ) - jury_feature_builder.featurize(col="message") + jury_feature_builder.featurize() Basic Input Columns ^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt index e3a9e994..f17dbd8b 100644 --- a/docs/build/html/_sources/index.rst.txt +++ b/docs/build/html/_sources/index.rst.txt @@ -76,7 +76,7 @@ Once you import the tool, you will be able to declare a FeatureBuilder object, w ) # this line of code runs the FeatureBuilder on your data - my_feature_builder.featurize(col="message") + my_feature_builder.featurize() Use the Table of Contents below to learn more about our tool. We recommend that you begin in the "Introduction" section, then explore other sections of the documentation as they become relevant to you. We recommend reading :ref:`basics` for a high-level overview of the requirements and parameters, and then reading through the :ref:`examples` for a detailed walkthrough and discussion of considerations. diff --git a/docs/build/html/examples.html b/docs/build/html/examples.html index 87206ae3..82a0c604 100644 --- a/docs/build/html/examples.html +++ b/docs/build/html/examples.html @@ -160,7 +160,7 @@

Configuring the FeatureBuilderoutput_file_path_conv_level = "./jury_output_conversation_level.csv", turns = True ) -jury_feature_builder.featurize(col="message") +jury_feature_builder.featurize()
diff --git a/docs/build/html/feature_builder.html b/docs/build/html/feature_builder.html index d74e306f..42dec2fe 100644 --- a/docs/build/html/feature_builder.html +++ b/docs/build/html/feature_builder.html @@ -174,22 +174,19 @@
-featurize(col: str = 'message') None
+featurize() None

Main driver function for feature generation.

This function creates chat-level features, generates features for different truncation percentages of the data if specified, and produces user-level and conversation-level features. Finally, the features are saved into the designated output files.

-
Parameters:
-

col (str, optional) – Column to preprocess, defaults to “message”

+
Returns:
+

None

-
Returns:
+
Return type:

None

-
Return type:
-

None

-
diff --git a/docs/build/html/index.html b/docs/build/html/index.html index 5353d11e..a0e113d7 100644 --- a/docs/build/html/index.html +++ b/docs/build/html/index.html @@ -139,7 +139,7 @@

Using the Package) # this line of code runs the FeatureBuilder on your data -my_feature_builder.featurize(col="message") +my_feature_builder.featurize()

Use the Table of Contents below to learn more about our tool. We recommend that you begin in the “Introduction” section, then explore other sections of the documentation as they become relevant to you. We recommend reading The Basics for a high-level overview of the requirements and parameters, and then reading through the Worked Example for a detailed walkthrough and discussion of considerations.

diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index a845cc9c..316e418f 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"A Light-Touch, One-Function Package": [[0, "a-light-touch-one-function-package"]], "Additional FeatureBuilder Considerations": [[1, "additional-featurebuilder-considerations"]], "Advanced Configuration Columns": [[1, "advanced-configuration-columns"]], "Basic Input Columns": [[1, "basic-input-columns"]], "Certainty": [[30, null]], "Citation": [[29, "citation"], [30, "citation"], [31, "citation"], [32, "citation"], [33, "citation"], [34, "citation"], [35, "citation"], [36, "citation"], [37, "citation"], [38, "citation"], [40, "citation"], [41, "citation"], [42, "citation"], [43, "citation"], [44, "citation"], [45, "citation"], [46, "citation"], [47, "citation"], [48, "citation"], [49, "citation"], [50, "citation"], [51, "citation"], [52, "citation"], [53, "citation"], [54, "citation"], [55, "citation"], [56, "citation"], [57, "citation"], [58, "citation"], [59, "citation"], [60, "citation"]], "Configuring the FeatureBuilder": [[1, "configuring-the-featurebuilder"]], "Content Word Accommodation": [[31, null]], "Contents:": [[61, null]], "Conversation-Level Features": [[11, "conversation-level-features"], [39, "conversation-level-features"]], "Conversational Repair": [[32, null]], "Customizable Parameters": [[0, "customizable-parameters"]], "Dale-Chall Score": [[33, null]], "Demo / Sample Code": [[0, "demo-sample-code"], [1, "demo-sample-code"]], "Discursive Diversity": [[34, null]], "Example:": [[41, "example"]], "FEATURE NAME": [[29, null]], "Feature Documentation": [[62, "feature-documentation"]], "Features: Conceptual Documentation": [[39, null]], "Features: Technical Documentation": [[11, null]], "Forward Flow": [[35, null]], "Function Word Accommodation": [[36, null]], "Generating Features: Utterance-, Speaker-, and Conversation-Level": [[62, "generating-features-utterance-speaker-and-conversation-level"]], "Getting Started": [[1, "getting-started"], [61, "getting-started"], [62, "getting-started"]], "Gini Coefficient": [[37, null]], "Hedge": [[38, null]], "High*Level Intuition": [[54, "high-level-intuition"]], "High-Level Intuition": [[29, "high-level-intuition"], [30, "high-level-intuition"], [31, "high-level-intuition"], [32, "high-level-intuition"], [33, "high-level-intuition"], [34, "high-level-intuition"], [35, "high-level-intuition"], [36, "high-level-intuition"], [37, "high-level-intuition"], [38, "high-level-intuition"], [40, "high-level-intuition"], [41, "high-level-intuition"], [42, "high-level-intuition"], [43, "high-level-intuition"], [44, "high-level-intuition"], [45, "high-level-intuition"], [46, "high-level-intuition"], [47, "high-level-intuition"], [48, "high-level-intuition"], [49, "high-level-intuition"], [50, "high-level-intuition"], [51, "high-level-intuition"], [52, "high-level-intuition"], [53, "high-level-intuition"], [55, "high-level-intuition"], [56, "high-level-intuition"], [57, "high-level-intuition"], [58, "high-level-intuition"], [59, "high-level-intuition"], [60, "high-level-intuition"]], "Implementation": [[32, "implementation"], [42, "implementation"], [52, "implementation"], [54, "implementation"]], "Implementation Basics": [[29, "implementation-basics"], [30, "implementation-basics"], [31, "implementation-basics"], [33, "implementation-basics"], [34, "implementation-basics"], [35, "implementation-basics"], [36, "implementation-basics"], [37, "implementation-basics"], [38, "implementation-basics"], [40, "implementation-basics"], [41, "implementation-basics"], [43, "implementation-basics"], [44, "implementation-basics"], [45, "implementation-basics"], [46, "implementation-basics"], [47, "implementation-basics"], [48, "implementation-basics"], [49, "implementation-basics"], [50, "implementation-basics"], [51, "implementation-basics"], [53, "implementation-basics"], [55, "implementation-basics"], [56, "implementation-basics"], [57, "implementation-basics"], [58, "implementation-basics"], [59, "implementation-basics"], [60, "implementation-basics"]], "Implementation Notes/Caveats": [[29, "implementation-notes-caveats"], [30, "implementation-notes-caveats"], [31, "implementation-notes-caveats"], [33, "implementation-notes-caveats"], [34, "implementation-notes-caveats"], [35, "implementation-notes-caveats"], [36, "implementation-notes-caveats"], [38, "implementation-notes-caveats"], [40, "implementation-notes-caveats"], [41, "implementation-notes-caveats"], [43, "implementation-notes-caveats"], [44, "implementation-notes-caveats"], [45, "implementation-notes-caveats"], [46, "implementation-notes-caveats"], [47, "implementation-notes-caveats"], [48, "implementation-notes-caveats"], [49, "implementation-notes-caveats"], [50, "implementation-notes-caveats"], [51, "implementation-notes-caveats"], [53, "implementation-notes-caveats"], [55, "implementation-notes-caveats"], [56, "implementation-notes-caveats"], [57, "implementation-notes-caveats"], [58, "implementation-notes-caveats"], [59, "implementation-notes-caveats"]], "Import Recommendations: Virtual Environment and Pip": [[1, "import-recommendations-virtual-environment-and-pip"], [61, "import-recommendations-virtual-environment-and-pip"]], "Importing the Package": [[1, "importing-the-package"]], "Indices and Tables": [[61, "indices-and-tables"]], "Information Diversity": [[40, null]], "Information Exchange": [[41, null]], "Input File": [[34, "id2"]], "Interpretation:": [[41, "interpretation"]], "Interpreting the Feature": [[29, "interpreting-the-feature"], [30, "interpreting-the-feature"], [31, "interpreting-the-feature"], [32, "interpreting-the-feature"], [33, "interpreting-the-feature"], [34, "interpreting-the-feature"], [35, "interpreting-the-feature"], [36, "interpreting-the-feature"], [37, "interpreting-the-feature"], [38, "interpreting-the-feature"], [40, "interpreting-the-feature"], [41, "interpreting-the-feature"], [42, "interpreting-the-feature"], [43, "interpreting-the-feature"], [44, "interpreting-the-feature"], [45, "interpreting-the-feature"], [46, "interpreting-the-feature"], [47, "interpreting-the-feature"], [48, "interpreting-the-feature"], [49, "interpreting-the-feature"], [50, "interpreting-the-feature"], [51, "interpreting-the-feature"], [52, "interpreting-the-feature"], [53, "interpreting-the-feature"], [54, "interpreting-the-feature"], [55, "interpreting-the-feature"], [56, "interpreting-the-feature"], [57, "interpreting-the-feature"], [58, "interpreting-the-feature"], [59, "interpreting-the-feature"], [60, "interpreting-the-feature"]], "Introduction": [[62, null]], "Key Assumptions and Parameters": [[0, "key-assumptions-and-parameters"]], "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons": [[42, null]], "Message Length": [[43, null]], "Message Quantity": [[44, null]], "Mimicry (BERT)": [[45, null]], "Motivation": [[62, "motivation"]], "Moving Mimicry": [[46, null]], "Named Entity Recognition": [[47, null]], "Named Entity Training Examples": [[47, "id2"]], "Online Discussion Tags": [[48, null]], "Other Utilities": [[69, "other-utilities"]], "Ouput File": [[34, "id3"]], "Our Team": [[62, "our-team"]], "Output File": [[30, "id2"], [35, "id2"], [45, "id2"], [46, "id2"], [47, "id3"], [51, "id1"]], "Package Assumptions": [[0, "package-assumptions"]], "Politeness Strategies": [[50, null]], "Politeness/Receptiveness Markers": [[49, null]], "Positivity Z-Score": [[52, null]], "Proportion of First Person Pronouns": [[53, null]], "Question (Naive)": [[54, null]], "Related Features": [[29, "related-features"], [30, "related-features"], [31, "related-features"], [32, "related-features"], [33, "related-features"], [34, "related-features"], [35, "related-features"], [36, "related-features"], [37, "related-features"], [38, "related-features"], [40, "related-features"], [41, "related-features"], [42, "related-features"], [43, "related-features"], [44, "related-features"], [45, "related-features"], [46, "related-features"], [47, "related-features"], [48, "related-features"], [49, "related-features"], [50, "related-features"], [51, "related-features"], [52, "related-features"], [53, "related-features"], [54, "related-features"], [55, "related-features"], [56, "related-features"], [57, "related-features"], [58, "related-features"], [59, "related-features"], [60, "related-features"]], "Sentiment (RoBERTa)": [[51, null]], "Speaker Turn Counts": [[59, "id2"]], "Speaker- (User) Level Features": [[11, "speaker-user-level-features"]], "Team Burstiness": [[55, null]], "Textblob Polarity": [[56, null]], "Textblob Subjectivity": [[57, null]], "The Basics": [[0, null]], "The FeatureBuilder": [[62, "the-featurebuilder"]], "The Team Communication Toolkit": [[61, null]], "Time Difference": [[58, null]], "Troubleshooting": [[1, "troubleshooting"], [61, "troubleshooting"]], "Turn Taking Index": [[59, null]], "Using the Package": [[61, "using-the-package"]], "Utilities": [[69, null]], "Utterance- (Chat) Level Features": [[11, "utterance-chat-level-features"], [39, "utterance-chat-level-features"]], "Walkthrough: Running the FeatureBuilder on Your Data": [[1, "walkthrough-running-the-featurebuilder-on-your-data"]], "Word Type-Token Ratio": [[60, null]], "Worked Example": [[1, null]], "assign_chunk_nums module": [[63, null]], "basic_features module": [[3, null]], "burstiness module": [[4, null]], "calculate_chat_level_features module": [[64, null]], "calculate_conversation_level_features module": [[65, null]], "calculate_user_level_features module": [[66, null]], "certainty module": [[5, null]], "check_embeddings module": [[67, null]], "discursive_diversity module": [[6, null]], "feature_builder module": [[2, null]], "fflow module": [[7, null]], "get_all_DD_features module": [[8, null]], "get_user_network module": [[9, null]], "gini_coefficient module": [[68, null]], "hedge module": [[10, null]], "info_exchange_zscore module": [[12, null]], "information_diversity module": [[13, null]], "lexical_features_v2 module": [[14, null]], "named_entity_recognition_features module": [[15, null]], "other_lexical_features module": [[16, null]], "politeness_features module": [[17, null]], "politeness_v2 module": [[18, null]], "politeness_v2_helper module": [[19, null]], "preload_word_lists module": [[70, null]], "preprocess module": [[71, null]], "question_num module": [[20, null]], "readability module": [[21, null]], "reddit_tags module": [[22, null]], "summarize_features module": [[72, null]], "temporal_features module": [[23, null]], "textblob_sentiment_analysis module": [[24, null]], "turn_taking_features module": [[25, null]], "variance_in_DD module": [[26, null]], "within_person_discursive_range module": [[27, null]], "word_mimicry module": [[28, null]], "z-scores:": [[41, "z-scores"]], "zscore_chats_and_conversation module": [[73, null]], "\u201cDriver\u201d Classes: Utterance-, Conversation-, and Speaker-Level Features": [[69, "driver-classes-utterance-conversation-and-speaker-level-features"]]}, "docnames": ["basics", "examples", "feature_builder", "features/basic_features", "features/burstiness", "features/certainty", "features/discursive_diversity", "features/fflow", "features/get_all_DD_features", "features/get_user_network", "features/hedge", "features/index", "features/info_exchange_zscore", "features/information_diversity", "features/lexical_features_v2", "features/named_entity_recognition_features", "features/other_lexical_features", "features/politeness_features", "features/politeness_v2", "features/politeness_v2_helper", "features/question_num", "features/readability", "features/reddit_tags", "features/temporal_features", "features/textblob_sentiment_analysis", "features/turn_taking_features", "features/variance_in_DD", "features/within_person_discursive_range", "features/word_mimicry", "features_conceptual/TEMPLATE", "features_conceptual/certainty", "features_conceptual/content_word_accommodation", "features_conceptual/conversational_repair", "features_conceptual/dale_chall_score", "features_conceptual/discursive_diversity", "features_conceptual/forward_flow", "features_conceptual/function_word_accommodation", "features_conceptual/gini_coefficient", "features_conceptual/hedge", "features_conceptual/index", "features_conceptual/information_diversity", "features_conceptual/information_exchange", "features_conceptual/liwc", "features_conceptual/message_length", "features_conceptual/message_quantity", "features_conceptual/mimicry_bert", "features_conceptual/moving_mimicry", "features_conceptual/named_entity_recognition", "features_conceptual/online_discussions_tags", "features_conceptual/politeness_receptiveness_markers", "features_conceptual/politeness_strategies", "features_conceptual/positivity_bert", "features_conceptual/positivity_z_score", "features_conceptual/proportion_of_first_person_pronouns", "features_conceptual/questions", "features_conceptual/team_burstiness", "features_conceptual/textblob_polarity", "features_conceptual/textblob_subjectivity", "features_conceptual/time_difference", "features_conceptual/turn_taking_index", "features_conceptual/word_ttr", "index", "intro", "utils/assign_chunk_nums", "utils/calculate_chat_level_features", "utils/calculate_conversation_level_features", "utils/calculate_user_level_features", "utils/check_embeddings", "utils/gini_coefficient", "utils/index", "utils/preload_word_lists", "utils/preprocess", "utils/summarize_features", "utils/zscore_chats_and_conversation"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["basics.rst", "examples.rst", "feature_builder.rst", "features/basic_features.rst", "features/burstiness.rst", "features/certainty.rst", "features/discursive_diversity.rst", "features/fflow.rst", "features/get_all_DD_features.rst", "features/get_user_network.rst", "features/hedge.rst", "features/index.rst", "features/info_exchange_zscore.rst", "features/information_diversity.rst", "features/lexical_features_v2.rst", "features/named_entity_recognition_features.rst", "features/other_lexical_features.rst", "features/politeness_features.rst", "features/politeness_v2.rst", "features/politeness_v2_helper.rst", "features/question_num.rst", "features/readability.rst", "features/reddit_tags.rst", "features/temporal_features.rst", "features/textblob_sentiment_analysis.rst", "features/turn_taking_features.rst", "features/variance_in_DD.rst", "features/within_person_discursive_range.rst", "features/word_mimicry.rst", "features_conceptual/TEMPLATE.rst", "features_conceptual/certainty.rst", "features_conceptual/content_word_accommodation.rst", "features_conceptual/conversational_repair.rst", "features_conceptual/dale_chall_score.rst", "features_conceptual/discursive_diversity.rst", "features_conceptual/forward_flow.rst", "features_conceptual/function_word_accommodation.rst", "features_conceptual/gini_coefficient.rst", "features_conceptual/hedge.rst", "features_conceptual/index.rst", "features_conceptual/information_diversity.rst", "features_conceptual/information_exchange.rst", "features_conceptual/liwc.rst", "features_conceptual/message_length.rst", "features_conceptual/message_quantity.rst", "features_conceptual/mimicry_bert.rst", "features_conceptual/moving_mimicry.rst", "features_conceptual/named_entity_recognition.rst", "features_conceptual/online_discussions_tags.rst", "features_conceptual/politeness_receptiveness_markers.rst", "features_conceptual/politeness_strategies.rst", "features_conceptual/positivity_bert.rst", "features_conceptual/positivity_z_score.rst", "features_conceptual/proportion_of_first_person_pronouns.rst", "features_conceptual/questions.rst", "features_conceptual/team_burstiness.rst", "features_conceptual/textblob_polarity.rst", "features_conceptual/textblob_subjectivity.rst", "features_conceptual/time_difference.rst", "features_conceptual/turn_taking_index.rst", "features_conceptual/word_ttr.rst", "index.rst", "intro.rst", "utils/assign_chunk_nums.rst", "utils/calculate_chat_level_features.rst", "utils/calculate_conversation_level_features.rst", "utils/calculate_user_level_features.rst", "utils/check_embeddings.rst", "utils/gini_coefficient.rst", "utils/index.rst", "utils/preload_word_lists.rst", "utils/preprocess.rst", "utils/summarize_features.rst", "utils/zscore_chats_and_conversation.rst"], "indexentries": {"adverb_limiter() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.adverb_limiter", false]], "assert_key_columns_present() (in module utils.preprocess)": [[71, "utils.preprocess.assert_key_columns_present", false]], "assign_chunk_nums() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.assign_chunk_nums", false]], "bare_command() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.bare_command", false]], "built_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.built_spacy_ner", false]], "burstiness() (in module features.burstiness)": [[4, "features.burstiness.burstiness", false]], "calculate_chat_level_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_chat_level_features", false]], "calculate_conversation_level_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_conversation_level_features", false]], "calculate_hedge_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_hedge_features", false]], "calculate_id_score() (in module features.information_diversity)": [[13, "features.information_diversity.calculate_ID_score", false]], "calculate_info_diversity() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_info_diversity", false]], "calculate_named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.calculate_named_entities", false]], "calculate_num_question_naive() (in module features.question_num)": [[20, "features.question_num.calculate_num_question_naive", false]], "calculate_politeness_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_sentiment", false]], "calculate_politeness_v2() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_v2", false]], "calculate_team_burstiness() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_team_burstiness", false]], "calculate_textblob_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_textblob_sentiment", false]], "calculate_user_level_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.calculate_user_level_features", false]], "calculate_vector_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_vector_word_mimicry", false]], "calculate_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_word_mimicry", false]], "chat_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.chat_level_features", false]], "chatlevelfeaturescalculator (class in utils.calculate_chat_level_features)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator", false]], "check_embeddings() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.check_embeddings", false]], "classify_ntri() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.classify_NTRI", false]], "classify_text_dalechall() (in module features.readability)": [[21, "features.readability.classify_text_dalechall", false]], "clean_text() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.clean_text", false]], "coerce_to_date_or_number() (in module features.temporal_features)": [[23, "features.temporal_features.coerce_to_date_or_number", false]], "commit_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.commit_data", false]], "compress() (in module utils.preprocess)": [[71, "utils.preprocess.compress", false]], "compute_frequency() (in module features.word_mimicry)": [[28, "features.word_mimicry.compute_frequency", false]], "computetf() (in module features.word_mimicry)": [[28, "features.word_mimicry.computeTF", false]], "concat_bert_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.concat_bert_features", false]], "conjection_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.conjection_seperator", false]], "content_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.Content_mimicry_score", false]], "conv_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.conv_level_features", false]], "conv_to_float_arr() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.conv_to_float_arr", false]], "conversationlevelfeaturescalculator (class in utils.calculate_conversation_level_features)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator", false]], "count_all_caps() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_all_caps", false]], "count_bullet_points() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_bullet_points", false]], "count_characters() (in module features.basic_features)": [[3, "features.basic_features.count_characters", false]], "count_difficult_words() (in module features.readability)": [[21, "features.readability.count_difficult_words", false]], "count_ellipses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_ellipses", false]], "count_emojis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emojis", false]], "count_emphasis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emphasis", false]], "count_line_breaks() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_line_breaks", false]], "count_links() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_links", false]], "count_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_matches", false]], "count_messages() (in module features.basic_features)": [[3, "features.basic_features.count_messages", false]], "count_numbering() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_numbering", false]], "count_parentheses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_parentheses", false]], "count_quotes() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_quotes", false]], "count_responding_to_someone() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_responding_to_someone", false]], "count_spacy_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_spacy_matches", false]], "count_syllables() (in module features.readability)": [[21, "features.readability.count_syllables", false]], "count_turn_taking_index() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turn_taking_index", false]], "count_turns() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turns", false]], "count_user_references() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_user_references", false]], "count_words() (in module features.basic_features)": [[3, "features.basic_features.count_words", false]], "create_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks", false]], "create_chunks_messages() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks_messages", false]], "create_cumulative_rows() (in module utils.preprocess)": [[71, "utils.preprocess.create_cumulative_rows", false]], "dale_chall_helper() (in module features.readability)": [[21, "features.readability.dale_chall_helper", false]], "feat_counts() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.feat_counts", false]], "feature_builder": [[2, "module-feature_builder", false]], "featurebuilder (class in feature_builder)": [[2, "feature_builder.FeatureBuilder", false]], "features.basic_features": [[3, "module-features.basic_features", false]], "features.burstiness": [[4, "module-features.burstiness", false]], "features.certainty": [[5, "module-features.certainty", false]], "features.discursive_diversity": [[6, "module-features.discursive_diversity", false]], "features.fflow": [[7, "module-features.fflow", false]], "features.get_all_dd_features": [[8, "module-features.get_all_DD_features", false]], "features.get_user_network": [[9, "module-features.get_user_network", false]], "features.hedge": [[10, "module-features.hedge", false]], "features.info_exchange_zscore": [[12, "module-features.info_exchange_zscore", false]], "features.information_diversity": [[13, "module-features.information_diversity", false]], "features.lexical_features_v2": [[14, "module-features.lexical_features_v2", false]], "features.named_entity_recognition_features": [[15, "module-features.named_entity_recognition_features", false]], "features.other_lexical_features": [[16, "module-features.other_lexical_features", false]], "features.politeness_features": [[17, "module-features.politeness_features", false]], "features.politeness_v2": [[18, "module-features.politeness_v2", false]], "features.politeness_v2_helper": [[19, "module-features.politeness_v2_helper", false]], "features.question_num": [[20, "module-features.question_num", false]], "features.readability": [[21, "module-features.readability", false]], "features.reddit_tags": [[22, "module-features.reddit_tags", false]], "features.temporal_features": [[23, "module-features.temporal_features", false]], "features.textblob_sentiment_analysis": [[24, "module-features.textblob_sentiment_analysis", false]], "features.turn_taking_features": [[25, "module-features.turn_taking_features", false]], "features.variance_in_dd": [[26, "module-features.variance_in_DD", false]], "features.within_person_discursive_range": [[27, "module-features.within_person_discursive_range", false]], "features.word_mimicry": [[28, "module-features.word_mimicry", false]], "featurize() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.featurize", false]], "function_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.function_mimicry_score", false]], "generate_bert() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_bert", false]], "generate_certainty_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_certainty_pkl", false]], "generate_lexicon_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_lexicon_pkl", false]], "generate_vect() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_vect", false]], "get_average() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_average", false]], "get_centroids() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_centroids", false]], "get_certainty() (in module features.certainty)": [[5, "features.certainty.get_certainty", false]], "get_certainty_score() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_certainty_score", false]], "get_content_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_content_words_in_message", false]], "get_conversation_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_conversation_level_aggregates", false]], "get_cosine_similarity() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_cosine_similarity", false]], "get_dale_chall_easy_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_dale_chall_easy_words", false]], "get_dale_chall_score_and_classfication() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_dale_chall_score_and_classfication", false]], "get_dd() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_DD", false]], "get_dd_features() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.get_DD_features", false]], "get_dep_pairs() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs", false]], "get_dep_pairs_noneg() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs_noneg", false]], "get_discursive_diversity_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_discursive_diversity_features", false]], "get_first_pct_of_chat() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.get_first_pct_of_chat", false]], "get_first_person_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_first_person_words", false]], "get_forward_flow() (in module features.fflow)": [[7, "features.fflow.get_forward_flow", false]], "get_forward_flow() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_forward_flow", false]], "get_function_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_function_words", false]], "get_function_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_function_words_in_message", false]], "get_gini() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.get_gini", false]], "get_gini_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_gini_features", false]], "get_info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.get_info_diversity", false]], "get_info_exchange_wordcount() (in module features.info_exchange_zscore)": [[12, "features.info_exchange_zscore.get_info_exchange_wordcount", false]], "get_liwc_rate() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.get_liwc_rate", false]], "get_max() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_max", false]], "get_mimicry_bert() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_mimicry_bert", false]], "get_min() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_min", false]], "get_moving_mimicry() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_moving_mimicry", false]], "get_named_entity() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_named_entity", false]], "get_nan_vector() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_nan_vector", false]], "get_polarity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_polarity_score", false]], "get_politeness_strategies() (in module features.politeness_features)": [[17, "features.politeness_features.get_politeness_strategies", false]], "get_politeness_v2() (in module features.politeness_v2)": [[18, "features.politeness_v2.get_politeness_v2", false]], "get_proportion_first_pronouns() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_proportion_first_pronouns", false]], "get_question_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_question_words", false]], "get_reddit_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_reddit_features", false]], "get_sentiment() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.get_sentiment", false]], "get_stdev() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_stdev", false]], "get_subjectivity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_subjectivity_score", false]], "get_sum() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_sum", false]], "get_team_burstiness() (in module features.burstiness)": [[4, "features.burstiness.get_team_burstiness", false]], "get_temporal_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_temporal_features", false]], "get_time_diff() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff", false]], "get_time_diff_startend() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff_startend", false]], "get_turn() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.get_turn", false]], "get_turn_id() (in module utils.preprocess)": [[71, "utils.preprocess.get_turn_id", false]], "get_turn_taking_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_turn_taking_features", false]], "get_unique_pairwise_combos() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_unique_pairwise_combos", false]], "get_user_average_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_average_dataframe", false]], "get_user_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_user_level_aggregates", false]], "get_user_level_averaged_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_averaged_features", false]], "get_user_level_summary_statistics_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summary_statistics_features", false]], "get_user_level_summed_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summed_features", false]], "get_user_network() (in module features.get_user_network)": [[9, "features.get_user_network.get_user_network", false]], "get_user_network() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_network", false]], "get_user_sum_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_sum_dataframe", false]], "get_variance_in_dd() (in module features.variance_in_dd)": [[26, "features.variance_in_DD.get_variance_in_DD", false]], "get_within_person_disc_range() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_within_person_disc_range", false]], "get_word_ttr() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_word_TTR", false]], "get_zscore_across_all_chats() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_chats", false]], "get_zscore_across_all_conversations() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_conversations", false]], "gini_coefficient() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.gini_coefficient", false]], "info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.info_diversity", false]], "info_exchange() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.info_exchange", false]], "is_hedged_sentence_1() (in module features.hedge)": [[10, "features.hedge.is_hedged_sentence_1", false]], "lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.lexical_features", false]], "liwc_features() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.liwc_features", false]], "load_saved_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_saved_data", false]], "load_to_dict() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_dict", false]], "load_to_lists() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_lists", false]], "merge_conv_data_with_original() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.merge_conv_data_with_original", false]], "mimic_words() (in module features.word_mimicry)": [[28, "features.word_mimicry.mimic_words", false]], "module": [[2, "module-feature_builder", false], [3, "module-features.basic_features", false], [4, "module-features.burstiness", false], [5, "module-features.certainty", false], [6, "module-features.discursive_diversity", false], [7, "module-features.fflow", false], [8, "module-features.get_all_DD_features", false], [9, "module-features.get_user_network", false], [10, "module-features.hedge", false], [12, "module-features.info_exchange_zscore", false], [13, "module-features.information_diversity", false], [14, "module-features.lexical_features_v2", false], [15, "module-features.named_entity_recognition_features", false], [16, "module-features.other_lexical_features", false], [17, "module-features.politeness_features", false], [18, "module-features.politeness_v2", false], [19, "module-features.politeness_v2_helper", false], [20, "module-features.question_num", false], [21, "module-features.readability", false], [22, "module-features.reddit_tags", false], [23, "module-features.temporal_features", false], [24, "module-features.textblob_sentiment_analysis", false], [25, "module-features.turn_taking_features", false], [26, "module-features.variance_in_DD", false], [27, "module-features.within_person_discursive_range", false], [28, "module-features.word_mimicry", false], [63, "module-utils.assign_chunk_nums", false], [64, "module-utils.calculate_chat_level_features", false], [65, "module-utils.calculate_conversation_level_features", false], [66, "module-utils.calculate_user_level_features", false], [67, "module-utils.check_embeddings", false], [68, "module-utils.gini_coefficient", false], [70, "module-utils.preload_word_lists", false], [71, "module-utils.preprocess", false], [72, "module-utils.summarize_features", false], [73, "module-utils.zscore_chats_and_conversation", false]], "named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.named_entities", false]], "num_named_entity() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.num_named_entity", false]], "other_lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.other_lexical_features", false]], "phrase_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.phrase_split", false]], "positivity_zscore() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.positivity_zscore", false]], "prep_simple() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_simple", false]], "prep_whole() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_whole", false]], "preprocess_chat_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.preprocess_chat_data", false]], "preprocess_conversation_columns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_conversation_columns", false]], "preprocess_naive_turns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_naive_turns", false]], "preprocess_text() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text", false]], "preprocess_text_lowercase_but_retain_punctuation() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text_lowercase_but_retain_punctuation", false]], "preprocessing() (in module features.information_diversity)": [[13, "features.information_diversity.preprocessing", false]], "punctuation_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.punctuation_seperator", false]], "question() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.Question", false]], "read_in_lexicons() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.read_in_lexicons", false]], "reduce_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.reduce_chunks", false]], "remove_active_user() (in module features.get_user_network)": [[9, "features.get_user_network.remove_active_user", false]], "save_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.save_features", false]], "sentence_pad() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_pad", false]], "sentence_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_split", false]], "sentenciser() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentenciser", false]], "set_self_conv_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.set_self_conv_data", false]], "text_based_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.text_based_features", false]], "token_count() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.token_count", false]], "train_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.train_spacy_ner", false]], "user_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.user_level_features", false]], "userlevelfeaturescalculator (class in utils.calculate_user_level_features)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator", false]], "utils.assign_chunk_nums": [[63, "module-utils.assign_chunk_nums", false]], "utils.calculate_chat_level_features": [[64, "module-utils.calculate_chat_level_features", false]], "utils.calculate_conversation_level_features": [[65, "module-utils.calculate_conversation_level_features", false]], "utils.calculate_user_level_features": [[66, "module-utils.calculate_user_level_features", false]], "utils.check_embeddings": [[67, "module-utils.check_embeddings", false]], "utils.gini_coefficient": [[68, "module-utils.gini_coefficient", false]], "utils.preload_word_lists": [[70, "module-utils.preload_word_lists", false]], "utils.preprocess": [[71, "module-utils.preprocess", false]], "utils.summarize_features": [[72, "module-utils.summarize_features", false]], "utils.zscore_chats_and_conversation": [[73, "module-utils.zscore_chats_and_conversation", false]], "word_start() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.word_start", false]]}, "objects": {"": [[2, 0, 0, "-", "feature_builder"]], "feature_builder": [[2, 1, 1, "", "FeatureBuilder"]], "feature_builder.FeatureBuilder": [[2, 2, 1, "", "chat_level_features"], [2, 2, 1, "", "conv_level_features"], [2, 2, 1, "", "featurize"], [2, 2, 1, "", "get_first_pct_of_chat"], [2, 2, 1, "", "merge_conv_data_with_original"], [2, 2, 1, "", "preprocess_chat_data"], [2, 2, 1, "", "save_features"], [2, 2, 1, "", "set_self_conv_data"], [2, 2, 1, "", "user_level_features"]], "features": [[3, 0, 0, "-", "basic_features"], [4, 0, 0, "-", "burstiness"], [5, 0, 0, "-", "certainty"], [6, 0, 0, "-", "discursive_diversity"], [7, 0, 0, "-", "fflow"], [8, 0, 0, "-", "get_all_DD_features"], [9, 0, 0, "-", "get_user_network"], [10, 0, 0, "-", "hedge"], [12, 0, 0, "-", "info_exchange_zscore"], [13, 0, 0, "-", "information_diversity"], [14, 0, 0, "-", "lexical_features_v2"], [15, 0, 0, "-", "named_entity_recognition_features"], [16, 0, 0, "-", "other_lexical_features"], [17, 0, 0, "-", "politeness_features"], [18, 0, 0, "-", "politeness_v2"], [19, 0, 0, "-", "politeness_v2_helper"], [20, 0, 0, "-", "question_num"], [21, 0, 0, "-", "readability"], [22, 0, 0, "-", "reddit_tags"], [23, 0, 0, "-", "temporal_features"], [24, 0, 0, "-", "textblob_sentiment_analysis"], [25, 0, 0, "-", "turn_taking_features"], [26, 0, 0, "-", "variance_in_DD"], [27, 0, 0, "-", "within_person_discursive_range"], [28, 0, 0, "-", "word_mimicry"]], "features.basic_features": [[3, 3, 1, "", "count_characters"], [3, 3, 1, "", "count_messages"], [3, 3, 1, "", "count_words"]], "features.burstiness": [[4, 3, 1, "", "burstiness"], [4, 3, 1, "", "get_team_burstiness"]], "features.certainty": [[5, 3, 1, "", "get_certainty"]], "features.discursive_diversity": [[6, 3, 1, "", "get_DD"], [6, 3, 1, "", "get_cosine_similarity"], [6, 3, 1, "", "get_unique_pairwise_combos"]], "features.fflow": [[7, 3, 1, "", "get_forward_flow"]], "features.get_all_DD_features": [[8, 3, 1, "", "conv_to_float_arr"], [8, 3, 1, "", "get_DD_features"]], "features.get_user_network": [[9, 3, 1, "", "get_user_network"], [9, 3, 1, "", "remove_active_user"]], "features.hedge": [[10, 3, 1, "", "is_hedged_sentence_1"]], "features.info_exchange_zscore": [[12, 3, 1, "", "get_info_exchange_wordcount"]], "features.information_diversity": [[13, 3, 1, "", "calculate_ID_score"], [13, 3, 1, "", "get_info_diversity"], [13, 3, 1, "", "info_diversity"], [13, 3, 1, "", "preprocessing"]], "features.lexical_features_v2": [[14, 3, 1, "", "get_liwc_rate"], [14, 3, 1, "", "liwc_features"]], "features.named_entity_recognition_features": [[15, 3, 1, "", "built_spacy_ner"], [15, 3, 1, "", "calculate_named_entities"], [15, 3, 1, "", "named_entities"], [15, 3, 1, "", "num_named_entity"], [15, 3, 1, "", "train_spacy_ner"]], "features.other_lexical_features": [[16, 3, 1, "", "classify_NTRI"], [16, 3, 1, "", "get_proportion_first_pronouns"], [16, 3, 1, "", "get_word_TTR"]], "features.politeness_features": [[17, 3, 1, "", "get_politeness_strategies"]], "features.politeness_v2": [[18, 3, 1, "", "get_politeness_v2"]], "features.politeness_v2_helper": [[19, 3, 1, "", "Question"], [19, 3, 1, "", "adverb_limiter"], [19, 3, 1, "", "bare_command"], [19, 3, 1, "", "clean_text"], [19, 3, 1, "", "commit_data"], [19, 3, 1, "", "conjection_seperator"], [19, 3, 1, "", "count_matches"], [19, 3, 1, "", "count_spacy_matches"], [19, 3, 1, "", "feat_counts"], [19, 3, 1, "", "get_dep_pairs"], [19, 3, 1, "", "get_dep_pairs_noneg"], [19, 3, 1, "", "load_saved_data"], [19, 3, 1, "", "load_to_dict"], [19, 3, 1, "", "load_to_lists"], [19, 3, 1, "", "phrase_split"], [19, 3, 1, "", "prep_simple"], [19, 3, 1, "", "prep_whole"], [19, 3, 1, "", "punctuation_seperator"], [19, 3, 1, "", "sentence_pad"], [19, 3, 1, "", "sentence_split"], [19, 3, 1, "", "sentenciser"], [19, 3, 1, "", "token_count"], [19, 3, 1, "", "word_start"]], "features.question_num": [[20, 3, 1, "", "calculate_num_question_naive"]], "features.readability": [[21, 3, 1, "", "classify_text_dalechall"], [21, 3, 1, "", "count_difficult_words"], [21, 3, 1, "", "count_syllables"], [21, 3, 1, "", "dale_chall_helper"]], "features.reddit_tags": [[22, 3, 1, "", "count_all_caps"], [22, 3, 1, "", "count_bullet_points"], [22, 3, 1, "", "count_ellipses"], [22, 3, 1, "", "count_emojis"], [22, 3, 1, "", "count_emphasis"], [22, 3, 1, "", "count_line_breaks"], [22, 3, 1, "", "count_links"], [22, 3, 1, "", "count_numbering"], [22, 3, 1, "", "count_parentheses"], [22, 3, 1, "", "count_quotes"], [22, 3, 1, "", "count_responding_to_someone"], [22, 3, 1, "", "count_user_references"]], "features.temporal_features": [[23, 3, 1, "", "coerce_to_date_or_number"], [23, 3, 1, "", "get_time_diff"], [23, 3, 1, "", "get_time_diff_startend"]], "features.textblob_sentiment_analysis": [[24, 3, 1, "", "get_polarity_score"], [24, 3, 1, "", "get_subjectivity_score"]], "features.turn_taking_features": [[25, 3, 1, "", "count_turn_taking_index"], [25, 3, 1, "", "count_turns"], [25, 3, 1, "", "get_turn"]], "features.variance_in_DD": [[26, 3, 1, "", "get_variance_in_DD"]], "features.within_person_discursive_range": [[27, 3, 1, "", "get_nan_vector"], [27, 3, 1, "", "get_within_person_disc_range"]], "features.word_mimicry": [[28, 3, 1, "", "Content_mimicry_score"], [28, 3, 1, "", "computeTF"], [28, 3, 1, "", "compute_frequency"], [28, 3, 1, "", "function_mimicry_score"], [28, 3, 1, "", "get_content_words_in_message"], [28, 3, 1, "", "get_function_words_in_message"], [28, 3, 1, "", "get_mimicry_bert"], [28, 3, 1, "", "get_moving_mimicry"], [28, 3, 1, "", "mimic_words"]], "utils": [[63, 0, 0, "-", "assign_chunk_nums"], [64, 0, 0, "-", "calculate_chat_level_features"], [65, 0, 0, "-", "calculate_conversation_level_features"], [66, 0, 0, "-", "calculate_user_level_features"], [67, 0, 0, "-", "check_embeddings"], [68, 0, 0, "-", "gini_coefficient"], [70, 0, 0, "-", "preload_word_lists"], [71, 0, 0, "-", "preprocess"], [72, 0, 0, "-", "summarize_features"], [73, 0, 0, "-", "zscore_chats_and_conversation"]], "utils.assign_chunk_nums": [[63, 3, 1, "", "assign_chunk_nums"], [63, 3, 1, "", "create_chunks"], [63, 3, 1, "", "create_chunks_messages"], [63, 3, 1, "", "reduce_chunks"]], "utils.calculate_chat_level_features": [[64, 1, 1, "", "ChatLevelFeaturesCalculator"]], "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator": [[64, 2, 1, "", "calculate_chat_level_features"], [64, 2, 1, "", "calculate_hedge_features"], [64, 2, 1, "", "calculate_politeness_sentiment"], [64, 2, 1, "", "calculate_politeness_v2"], [64, 2, 1, "", "calculate_textblob_sentiment"], [64, 2, 1, "", "calculate_vector_word_mimicry"], [64, 2, 1, "", "calculate_word_mimicry"], [64, 2, 1, "", "concat_bert_features"], [64, 2, 1, "", "get_certainty_score"], [64, 2, 1, "", "get_dale_chall_score_and_classfication"], [64, 2, 1, "", "get_forward_flow"], [64, 2, 1, "", "get_named_entity"], [64, 2, 1, "", "get_reddit_features"], [64, 2, 1, "", "get_temporal_features"], [64, 2, 1, "", "info_exchange"], [64, 2, 1, "", "lexical_features"], [64, 2, 1, "", "other_lexical_features"], [64, 2, 1, "", "positivity_zscore"], [64, 2, 1, "", "text_based_features"]], "utils.calculate_conversation_level_features": [[65, 1, 1, "", "ConversationLevelFeaturesCalculator"]], "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator": [[65, 2, 1, "", "calculate_conversation_level_features"], [65, 2, 1, "", "calculate_info_diversity"], [65, 2, 1, "", "calculate_team_burstiness"], [65, 2, 1, "", "get_conversation_level_aggregates"], [65, 2, 1, "", "get_discursive_diversity_features"], [65, 2, 1, "", "get_gini_features"], [65, 2, 1, "", "get_turn_taking_features"], [65, 2, 1, "", "get_user_level_aggregates"]], "utils.calculate_user_level_features": [[66, 1, 1, "", "UserLevelFeaturesCalculator"]], "utils.calculate_user_level_features.UserLevelFeaturesCalculator": [[66, 2, 1, "", "calculate_user_level_features"], [66, 2, 1, "", "get_centroids"], [66, 2, 1, "", "get_user_level_averaged_features"], [66, 2, 1, "", "get_user_level_summary_statistics_features"], [66, 2, 1, "", "get_user_level_summed_features"], [66, 2, 1, "", "get_user_network"]], "utils.check_embeddings": [[67, 3, 1, "", "check_embeddings"], [67, 3, 1, "", "generate_bert"], [67, 3, 1, "", "generate_certainty_pkl"], [67, 3, 1, "", "generate_lexicon_pkl"], [67, 3, 1, "", "generate_vect"], [67, 3, 1, "", "get_sentiment"], [67, 3, 1, "", "read_in_lexicons"]], "utils.gini_coefficient": [[68, 3, 1, "", "get_gini"], [68, 3, 1, "", "gini_coefficient"]], "utils.preload_word_lists": [[70, 3, 1, "", "get_dale_chall_easy_words"], [70, 3, 1, "", "get_first_person_words"], [70, 3, 1, "", "get_function_words"], [70, 3, 1, "", "get_question_words"]], "utils.preprocess": [[71, 3, 1, "", "assert_key_columns_present"], [71, 3, 1, "", "compress"], [71, 3, 1, "", "create_cumulative_rows"], [71, 3, 1, "", "get_turn_id"], [71, 3, 1, "", "preprocess_conversation_columns"], [71, 3, 1, "", "preprocess_naive_turns"], [71, 3, 1, "", "preprocess_text"], [71, 3, 1, "", "preprocess_text_lowercase_but_retain_punctuation"]], "utils.summarize_features": [[72, 3, 1, "", "get_average"], [72, 3, 1, "", "get_max"], [72, 3, 1, "", "get_min"], [72, 3, 1, "", "get_stdev"], [72, 3, 1, "", "get_sum"], [72, 3, 1, "", "get_user_average_dataframe"], [72, 3, 1, "", "get_user_sum_dataframe"]], "utils.zscore_chats_and_conversation": [[73, 3, 1, "", "get_zscore_across_all_chats"], [73, 3, 1, "", "get_zscore_across_all_conversations"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function"}, "terms": {"": [0, 1, 2, 4, 5, 9, 11, 13, 25, 28, 29, 31, 32, 34, 35, 36, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 64, 65, 66], "0": [1, 2, 5, 10, 13, 16, 21, 24, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 45, 46, 47, 50, 51, 53, 55, 59, 61], "000": 42, "00222437221134802": [5, 64], "01": 51, "02": 51, "04": 40, "0496": [21, 33], "05": [13, 40, 50, 51], "06": 51, "08": 50, "09": [45, 46, 50], "1": [1, 2, 3, 10, 13, 22, 24, 32, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 51, 53, 55, 56, 57, 59, 62], "10": [1, 5, 6, 21, 24, 33, 42, 59, 61, 64], "100": [1, 14, 21, 33, 37, 42, 47, 62], "1000": 42, "10th": 33, "1145": [21, 24], "1177": [5, 64], "11th": 33, "12": [35, 45, 46, 50], "1287": 6, "12th": 33, "13": 50, "14": 50, "15": [37, 50], "1579": [21, 33], "17": 50, "1948": 33, "195": 36, "1977": 62, "1lpngokujsx": 5, "1st": 50, "1st_person": 50, "1st_person_pl": 50, "1st_person_start": 50, "2": [1, 2, 34, 35, 41, 47, 59, 61, 62], "20": [37, 59], "2004": 42, "2007": [5, 42], "2009": 60, "2012": 55, "2013": [12, 16, 31, 32, 36, 37, 38, 41, 43, 50, 52, 54, 70], "2015": [53, 58, 60], "2016": 4, "2017": 13, "2018": [40, 44, 55], "2019": [35, 52], "2020": [18, 21, 24, 33, 49, 50, 56, 57], "2021": [1, 6, 43, 44], "2022": [13, 34], "2023": [5, 14, 30, 42, 59, 64], "2024": 40, "21": 59, "22": [41, 50], "2384068": 4, "24": [1, 61], "25": 47, "27": 50, "28": 50, "29": 50, "2nd": 50, "2nd_person": 50, "2nd_person_start": 50, "3": [1, 2, 21, 34, 41, 51, 59, 61, 71], "30": 50, "3000": 33, "32": [34, 50], "3432929": [21, 24], "35": 51, "36": 50, "38": 50, "39": 49, "39512260": 68, "3n": 59, "4": [5, 13, 21, 30, 33, 41, 56, 62], "42": 14, "4274": 6, "43": 50, "45": 50, "47": 50, "49": 50, "4pit4bqz6": 5, "4th": [21, 33], "5": [1, 5, 21, 30, 33, 37, 41, 59], "50": [1, 47], "52": 50, "53": 50, "57": 50, "58": 50, "5th": 33, "6": [1, 33, 43], "60": 51, "63": 50, "6365": 21, "68": 47, "6th": 33, "7": [30, 33, 48], "70": 50, "78": [35, 50], "7th": 33, "8": [1, 30, 33], "80": [21, 70], "82": 41, "85": 34, "86": 35, "87": 50, "89": [45, 46], "8th": 33, "9": [2, 5, 21, 30, 33, 40, 47, 50], "9123": 47, "92": 51, "93chall_readability_formula": [21, 70], "94": 15, "95": 47, "97": 51, "9855072464": 47, "9992": 47, "99954": 47, "9th": 33, "A": [1, 2, 4, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 28, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 47, 49, 50, 51, 52, 57, 59, 60, 61, 62, 66, 67, 68, 70, 71, 72, 73], "And": [1, 62], "As": [1, 31, 35, 36, 40, 45, 61], "But": [1, 50, 62], "By": [1, 42, 50], "For": [0, 1, 31, 34, 37, 41, 42, 43, 47, 49, 54, 56, 59, 62, 65], "If": [0, 1, 2, 5, 21, 29, 30, 35, 45, 47, 50, 55, 61, 62, 63, 64, 67, 71], "In": [1, 21, 30, 31, 34, 35, 36, 37, 39, 41, 42, 45, 46, 47, 50, 55, 59, 61, 62], "It": [1, 2, 31, 32, 33, 36, 37, 41, 44, 45, 46, 50, 64, 65, 66, 67, 71], "NO": 37, "NOT": [1, 61], "No": [19, 53], "Not": 41, "One": [1, 37, 61], "That": [29, 55], "The": [1, 2, 3, 4, 5, 7, 9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "Then": [1, 55, 61], "There": [1, 11, 32, 61, 66], "These": [1, 11, 17, 32, 34, 42, 48, 52, 62, 69], "To": [0, 1, 29, 31, 34, 37, 40, 55, 56, 57, 61, 62], "WITH": 21, "Will": 50, "_deviat": 55, "abil": [13, 29], "abl": [31, 36, 61], "abort": 1, "about": [1, 12, 29, 31, 36, 41, 47, 61, 62], "abov": [1, 21, 34, 61], "abstract_id": 4, "accept": [0, 1, 58, 61], "access": [0, 1, 15], "accommod": [28, 32, 39, 45, 46, 64, 65, 66], "accord": [21, 37, 59, 64, 70], "accordingli": 63, "account": [1, 29, 32, 42], "accus": 50, "achiev": [50, 62], "acknowledg": 49, "acm": [21, 24], "acommod": 36, "across": [1, 13, 28, 31, 34, 40, 41, 42, 50, 62, 64, 73], "action": 59, "activ": [1, 9, 44, 55, 71], "actual": [41, 56], "ad": [61, 62, 71], "adapt": 59, "add": [0, 1, 2, 21, 51, 61], "addit": [0, 2, 32, 34, 42, 63, 69], "addition": [30, 31, 32, 54], "address": 1, "adjac": 71, "adjust": [0, 21, 37, 63], "advanc": [31, 36], "advantag": 4, "adverb": [19, 31, 36], "adverb_limit": [19, 49], "affect": [1, 29, 35, 44], "affirm": 49, "after": [0, 1, 31, 34, 36, 43, 61, 62, 64], "again": [32, 34], "against": [28, 31, 36, 52], "agarw": 62, "aggreg": [0, 3, 11, 37, 44, 62, 65, 66, 72], "agre": 47, "agreement": 49, "ah": [31, 36], "ai": 62, "aim": [39, 62], "airtim": [37, 62], "al": [1, 5, 14, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "algorithm": [56, 57], "align": [35, 51], "all": [0, 1, 2, 6, 12, 13, 15, 19, 22, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 46, 48, 49, 51, 52, 55, 58, 61, 62, 64, 66, 71, 73], "allow": 1, "almaatouq": 59, "along": 1, "alongsid": 1, "alphabet": 49, "alphanumer": 71, "alreadi": [0, 1, 2, 4, 10, 12, 16, 67], "also": [0, 1, 2, 28, 30, 31, 32, 34, 36, 37, 38, 42, 47, 51, 54, 60, 61, 62, 64, 65, 67, 69, 71], "alsobai": 59, "altern": 59, "although": [1, 23, 31, 36], "alwai": [1, 55], "am": [31, 36, 42, 54, 62], "amaz": [48, 56], "ambient": 32, "american": 33, "ami": [47, 59, 62], "amic": 62, "among": [36, 37, 52, 55, 62], "amongst": [6, 35, 48], "an": [1, 2, 5, 8, 11, 12, 13, 21, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 45, 47, 48, 50, 51, 52, 54, 59, 60, 61, 62, 63, 65, 66, 68], "analys": [1, 62], "analysi": [1, 11, 52, 62, 71], "analyt": 62, "analyz": [0, 1, 2, 13, 14, 16, 17, 19, 20, 21, 22, 24, 28, 43, 52, 62, 67, 71], "analyze_first_pct": [0, 1, 2], "angri": 47, "ani": [0, 1, 29, 31, 33, 38, 54, 62, 71], "annot": [17, 50], "anoth": [30, 34, 36, 48], "answer": 29, "anybodi": [31, 36], "anyth": [1, 2, 23, 31, 36, 56], "anywher": [31, 36], "apartment": 42, "api": 47, "api_refer": 24, "apolog": [17, 50], "apologi": 49, "appear": [0, 15, 37, 38, 42, 64], "append": [1, 17, 64, 65, 66, 67], "appli": [4, 13, 14, 18, 62, 64, 69], "applic": [29, 71], "appreci": 50, "approach": [32, 38, 42, 45, 46, 49, 53, 64], "appropri": 69, "ar": [0, 1, 2, 3, 5, 9, 10, 11, 15, 17, 19, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 51, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 71], "arcross": 34, "area": 62, "aren": [31, 36], "around": 2, "arous": 48, "arrai": [6, 8, 68], "articl": [37, 50], "ask": [20, 47, 54], "ask_ag": 49, "aspect": [50, 62], "assert_key_columns_pres": 71, "assign": [31, 36, 38, 45, 46, 52, 59, 63, 71], "assign_chunk_num": 69, "associ": [4, 15, 21, 29, 30, 31, 32, 36, 40, 45, 46, 47, 48, 61], "assum": [0, 1, 2, 10, 12, 16, 23, 41, 60, 71], "assumign": 1, "assumpt": [1, 41, 61], "asterisk": 22, "attribut": [1, 11, 34, 51, 52, 56, 62], "author": [5, 31, 36, 59], "automat": [1, 61, 69], "auxiliari": [31, 36], "avail": [62, 63, 64, 67], "averag": [11, 13, 28, 30, 33, 34, 35, 40, 41, 46, 52, 64, 65, 66, 72], "avil": 62, "avoid": 30, "awar": 29, "awesom": 62, "b": [4, 34, 35, 45, 46, 55, 62], "back": 62, "bag": [32, 38, 42, 45, 46, 49, 53, 56, 57], "bare_command": [19, 49], "base": [1, 2, 15, 18, 19, 31, 32, 34, 35, 36, 37, 40, 42, 51, 52, 53, 54, 55, 56, 57, 62, 63, 64, 65, 66, 71], "basic": [10, 11, 12, 16, 61, 62], "basic_featur": 11, "batch_num": 1, "bay": [56, 57], "bbevi": 18, "becaus": [1, 2, 12, 21, 31, 36, 40, 56, 61], "becom": [44, 61, 62], "been": [1, 2, 12, 16, 31, 36, 61], "befor": [0, 1, 2, 17, 31, 36, 45, 48], "beforehand": 64, "begin": [34, 54, 58, 61, 62, 63], "behavior": [62, 63], "being": [4, 13, 14, 16, 17, 20, 21, 24, 31, 32, 36, 43, 47, 51, 55, 56, 60], "belong": [1, 42], "below": [1, 11, 21, 33, 36, 45, 48, 51, 61, 62, 69], "ber": 54, "bert": [0, 1, 31, 35, 36, 39, 46, 61, 64, 67], "bert_path": 67, "bert_sentiment_data": 64, "best": [14, 29], "better": 61, "between": [4, 6, 13, 21, 23, 24, 28, 30, 31, 34, 35, 36, 37, 40, 45, 46, 55, 58, 59, 62, 64, 65], "betwen": 34, "beyond": 2, "big": 59, "binari": [10, 32, 38], "blame": 47, "blob": 24, "block": [22, 32, 48, 59], "blog": 15, "bold": [22, 64], "bool": [2, 63, 67, 71], "bootstrap": 62, "both": [1, 2, 42, 52, 54, 55, 59, 62], "bother": 50, "bottom": 59, "bought": 41, "bound": [29, 35, 36, 37, 42, 52, 55], "boundari": [34, 35], "break": [22, 48, 64], "brief": 44, "broader": 52, "broken": 59, "btw": 50, "bug": [1, 61], "build": [1, 7, 34, 45, 46, 62], "built": 11, "built_spacy_n": 15, "bullet": [22, 48, 64], "bunch": 59, "burst": 58, "bursti": [11, 39, 58, 65], "by_the_wai": 49, "c": [12, 34, 35, 45, 46, 62], "cach": [0, 1, 2, 51, 61], "calcul": [2, 5, 11, 12, 16, 18, 21, 28, 33, 41, 48, 49, 50, 56, 57, 58, 60, 62, 63, 64, 65, 66, 67, 68, 72, 73], "calculate_chat_level_featur": 69, "calculate_conversation_level_featur": 69, "calculate_hedge_featur": 64, "calculate_id_scor": 13, "calculate_info_divers": 65, "calculate_named_ent": 15, "calculate_num_question_na": 20, "calculate_politeness_senti": 64, "calculate_politeness_v2": 64, "calculate_team_bursti": 65, "calculate_textblob_senti": 64, "calculate_user_level_featur": 69, "calculate_vector_word_mimicri": 64, "calculate_word_mimicri": 64, "call": [1, 2, 8, 13, 61, 62, 64, 69], "can": [0, 1, 11, 23, 31, 32, 33, 34, 36, 37, 42, 43, 44, 47, 48, 49, 50, 52, 54, 60, 61, 62, 69], "can_you": 49, "cannot": [1, 31, 36, 45, 46, 49, 62], "cao": [21, 24, 33, 43, 44, 56, 57, 62], "cap": [22, 48, 64], "capit": 48, "captur": [29, 30, 32, 34, 35, 38, 41, 42, 55], "caract": 40, "carefulli": 60, "casa_token": 5, "case": [1, 13, 16, 29, 30, 31, 36, 37, 41, 45, 46, 51, 55, 56, 59, 61], "casual": 43, "categori": [21, 32, 45, 46, 49, 52], "caus": [31, 32, 36, 59], "caveat": 1, "center": 62, "central": 34, "centroid": [34, 66], "certain": [5, 19, 30, 42, 45, 46, 49], "certainli": 42, "certainti": [11, 38, 39, 42, 64, 67], "cfm": 4, "chall": [1, 21, 39, 64, 70], "chang": [1, 34, 50, 61, 71], "charact": [2, 3, 15, 19, 37, 49, 62, 64, 65, 66, 71], "characterist": 62, "chat": [0, 1, 2, 4, 5, 6, 7, 8, 12, 13, 14, 16, 23, 25, 28, 29, 32, 35, 36, 41, 44, 45, 46, 49, 59, 61, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "chat_data": [2, 6, 7, 8, 26, 27, 28, 63, 64, 65, 66, 67, 71], "chat_df": 14, "chat_level_data": 72, "chat_level_featur": 2, "chatlevelfeaturescalcul": [2, 17, 21, 64, 69], "chats_data": 73, "check": [19, 23, 44, 64, 67, 71], "check_embed": 69, "chen": 62, "choos": 60, "chose": 1, "chunk": [34, 59, 63], "chunk_num": 63, "circlelyt": 13, "citat": [21, 24], "cite": 50, "clarif": [16, 32, 64], "class": [1, 2, 31, 61, 62, 64, 65, 66], "classif": [21, 64], "classifi": [16, 21, 50, 56, 57], "classify_ntri": 16, "classify_text_dalechal": 21, "clean": [2, 17, 19, 67], "clean_text": 19, "clear": 1, "close": [31, 48, 62], "closer": [45, 46, 59], "clue": 62, "cmu": 12, "code": [6, 18, 29, 32, 51, 55, 61, 62, 68], "coeffici": [4, 39, 62, 65, 68], "coerce_to_date_or_numb": 23, "cognit": 62, "col": [1, 2, 61], "colab": [0, 1], "collabor": [59, 62], "collaps": 2, "collect": [2, 34, 49, 50, 52, 62], "colleg": 33, "column": [0, 2, 4, 6, 7, 8, 9, 12, 13, 14, 16, 18, 23, 25, 28, 51, 56, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "column_count_frequ": 28, "column_count_mim": 28, "column_mimc": 28, "column_nam": 71, "column_to_summar": 72, "com": [1, 2, 4, 5, 13, 15, 18, 64, 68, 71], "comb": 62, "combin": [0, 1, 6, 28, 61, 64, 71], "come": [12, 13, 21, 32, 33, 58, 61], "comm": [1, 61], "command": [1, 61], "comment": 48, "commit": 23, "commit_data": 19, "common": [32, 62, 64], "commonli": 37, "commun": [0, 1, 11, 44, 48, 55, 60, 62, 64], "companion": 1, "compar": [2, 31, 35, 42, 44, 45, 52, 64, 71, 73], "compat": [1, 61], "complement": [31, 36], "complet": [1, 2, 55], "complex": [35, 43, 50, 62], "compon": 50, "comprehens": [33, 48], "compress": 71, "comput": [0, 2, 4, 5, 6, 10, 11, 12, 13, 14, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 45, 46, 49, 52, 55, 62, 64, 65, 66, 69, 73], "compute_frequ": 28, "computetf": 28, "conain": 61, "concat_bert_featur": 64, "concaten": [19, 49, 64, 71], "concentr": 55, "concept": [29, 39, 42, 62], "conceptu": [61, 62], "concis": 43, "concret": 29, "conduct": 1, "confid": [2, 5, 15, 30, 47, 64], "conflict": 62, "confound": 44, "congruent": 34, "conjection_seper": 19, "conjunct": [19, 31, 36, 49], "conjunction_start": 49, "connect": 39, "conscious": 35, "consecut": 22, "consid": [1, 33, 37], "consider": [61, 62], "consist": [36, 40, 41], "constitut": 41, "constrain": [34, 35], "construct": [11, 55, 62], "constructor": 47, "consult": 5, "contain": [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 29, 30, 35, 38, 42, 47, 49, 55, 61, 62, 63, 64, 67, 71, 72, 73], "content": [0, 1, 12, 13, 28, 34, 36, 39, 41, 42, 45, 46, 62, 64, 67], "content_mimicry_scor": 28, "content_word_mimicri": 28, "context": [2, 32, 42, 48, 62, 71], "continu": [56, 57], "contract": 49, "contrast": 39, "contribut": [13, 34, 37, 62], "control": 1, "conv": 1, "conv_data": [2, 65], "conv_level_featur": 2, "conv_to_float_arr": 8, "convei": [6, 34, 52], "convers": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 23, 25, 28, 29, 31, 34, 35, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 58, 59, 61, 63, 64, 65, 66, 68, 71, 72, 73], "conversation_id": [2, 28, 61, 71], "conversation_id_col": [0, 1, 2, 4, 6, 7, 8, 9, 13, 23, 25, 26, 27, 61, 63, 64, 65, 66, 68, 72, 73], "conversation_num": [0, 1, 2, 6, 7, 66, 71, 73], "conversationlevelfeaturescalcul": [2, 65, 69], "convert": [8, 41, 49, 71], "convict": 5, "convokit": [17, 50, 62, 64], "coordin": 55, "copi": [0, 1], "copular": [31, 36], "core": [34, 69], "cornel": 17, "corpu": 50, "corrado": 37, "correl": [41, 55], "correspond": [30, 34, 35, 40, 49, 55, 66], "cosin": [6, 7, 13, 28, 31, 34, 35, 36, 40, 45, 46, 65], "could": [1, 31, 33, 36, 50, 54], "could_you": 49, "couldn": [31, 36], "count": [1, 3, 12, 14, 15, 16, 19, 21, 25, 28, 30, 31, 32, 36, 39, 41, 43, 44, 49, 52, 53, 54, 56, 58, 64, 65, 66], "count_all_cap": 22, "count_bullet_point": 22, "count_charact": 3, "count_difficult_word": 21, "count_ellips": 22, "count_emoji": 22, "count_emphasi": 22, "count_line_break": 22, "count_link": 22, "count_match": [19, 49], "count_messag": 3, "count_numb": 22, "count_parenthes": 22, "count_quot": 22, "count_responding_to_someon": 22, "count_spacy_match": 19, "count_syl": 21, "count_turn": 25, "count_turn_taking_index": 25, "count_user_refer": 22, "count_word": 3, "countabl": 65, "countd": 36, "counterfactu": 50, "cours": [16, 31, 34, 36, 63], "creat": [0, 1, 2, 13, 19, 31, 40, 42, 61, 62, 64, 65, 66, 71], "create_chunk": 63, "create_chunks_messag": 63, "create_cumulative_row": 71, "credit": 33, "crowd": 13, "csv": [0, 1, 2, 61, 62, 67], "cumul": [1, 2, 71], "cumulative_group": [0, 1, 2, 71], "current": [1, 11, 23, 31, 34, 35, 36, 40, 45, 46, 58, 61, 64, 71], "curt": 43, "custom": [0, 62], "custom_featur": [0, 1, 2, 61], "customiz": 62, "cut": 1, "cutoff": [2, 15, 47, 64], "d": [1, 31, 34, 36], "dale": [1, 21, 39, 64, 70], "dale_chall_help": 21, "danescu": 50, "dash": 22, "data": [0, 2, 6, 7, 8, 9, 13, 19, 20, 32, 37, 40, 41, 47, 51, 55, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "datafram": [0, 1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 37, 47, 49, 59, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "dataknowsal": 15, "dataset": [1, 2, 9, 12, 13, 28, 31, 41, 47, 52, 61, 64, 65, 66, 73], "date": [1, 61], "datetim": [23, 58], "dcosta": 62, "deal": [50, 59], "death": 1, "debat": 59, "decid": 62, "decis": [1, 13, 62], "declar": [1, 61, 62, 69], "deepli": 62, "default": [0, 1, 2, 5, 13, 16, 30, 34, 35, 42, 47, 62, 63, 66, 67, 71, 73], "defer": [17, 50], "defin": [0, 11, 21, 31, 34, 36, 40, 59, 62, 64, 65, 66, 70], "definit": [1, 3, 44], "degre": [6, 30, 36, 45, 46, 55], "delet": 29, "deliber": 1, "demo": 61, "democrat": 1, "demystifi": 62, "denomin": 59, "densiti": 60, "dep_": 49, "dep_pair": 19, "depend": [0, 1, 10, 19, 32, 49, 52, 61, 63], "deriv": [2, 11, 65, 66], "describ": [11, 62], "design": [0, 1, 2, 13, 34, 62], "desir": [2, 63, 72], "detail": [0, 1, 33, 41, 43, 61, 62], "detect": [1, 32, 37, 38, 47, 48, 49, 54], "determin": [13, 18, 31, 35, 36, 40, 45, 46, 71], "dev": 24, "develop": [5, 37, 40, 62], "deviat": [4, 5, 29, 40, 41, 55, 58, 65, 72, 73], "df": [4, 8, 9, 12, 13, 16, 18, 23, 28, 63, 71], "dict": [17, 19, 28, 67], "dictionari": [15, 17, 19, 28, 30, 42, 49, 67], "did": [1, 31, 36, 37, 47, 50, 54, 62], "didn": [31, 36], "differ": [1, 2, 4, 11, 12, 23, 29, 31, 34, 36, 37, 39, 40, 44, 45, 46, 47, 49, 55, 62, 63, 64, 65, 66, 71], "differenti": [49, 59], "difficult": [21, 33], "difficult_word": 21, "difficulti": 33, "dimens": [40, 62], "dimension": [34, 35], "dinner": 41, "direct": [34, 43, 45, 47, 50, 69], "direct_quest": [32, 50, 54], "direct_start": 50, "directli": [1, 62, 69], "directori": [0, 1, 2, 19, 61, 65, 67], "disagr": 49, "disagre": 51, "discours": [31, 36], "discret": [31, 36, 45, 46], "discurs": [0, 1, 6, 8, 39, 40, 61, 65, 66], "discursive_divers": 11, "discus": 8, "discuss": [0, 1, 31, 34, 39, 40, 42, 43, 61, 62, 71], "dispers": 68, "displai": [34, 42, 46], "dispos": 1, "distanc": [34, 35, 40], "distinct": [36, 59], "distinguish": 59, "div": 16, "diverg": [6, 34, 35], "divers": [0, 1, 6, 8, 13, 39, 61, 65], "divid": [16, 34, 59, 63], "dl": [21, 24], "do": [0, 1, 29, 31, 34, 36, 37, 43, 49, 50, 54, 62, 69], "doc": 19, "doc_top": 13, "document": [1, 17, 61, 69], "doe": [1, 2, 29, 40, 42, 43, 45, 47, 54, 61, 71], "doesn": [0, 1, 2, 29, 31, 36, 45, 61], "doi": [5, 6, 21, 24, 64], "domain": 50, "don": [31, 36, 49, 54, 62, 67], "done": [2, 50], "dot": 22, "doubl": 30, "down": [31, 36], "download": [1, 61], "download_resourc": [1, 61], "downstream": [17, 62], "dozen": 62, "drive": [62, 69], "driver": [2, 61, 64, 65, 66], "drop": [2, 64], "due": [34, 59], "duncan": 62, "duplic": [1, 2, 71], "durat": [58, 63], "dure": [2, 55, 59, 62], "dynam": [59, 61], "e": [0, 1, 2, 4, 15, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 47, 48, 49, 52, 54, 56, 59, 63, 65, 66, 71], "e2": [21, 70], "each": [0, 1, 2, 3, 4, 7, 8, 9, 11, 12, 15, 17, 19, 23, 25, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "earlier": [0, 1, 2], "easi": [1, 21, 62, 70], "easier": [21, 42], "easili": 33, "easy_word": 21, "eat": 34, "echo": 31, "econom": 37, "edg": [29, 59], "edu": [1, 12, 16, 17, 70], "effect": [1, 41], "effici": 1, "effort": 55, "either": [20, 23, 52, 55], "elaps": [23, 58], "element": [1, 6], "ellips": [22, 48, 64], "els": [1, 22, 47, 64], "embed": [8, 31, 34, 35, 36, 45, 46, 65, 66, 67, 69], "emili": [30, 35, 45, 46, 47, 59, 62], "emoji": [22, 48, 64], "emoticon": 48, "emphas": [22, 48, 64], "emphasi": 48, "empirica": [1, 2, 71], "emploi": 45, "empti": [2, 13], "en": [21, 24, 70], "en_core_web_sm": [1, 61], "enabl": 71, "enclos": 22, "encod": [1, 8], "encompass": 62, "encount": [1, 34, 35, 61], "encourag": 64, "end": [0, 1, 15, 20, 23, 34, 54, 62, 63], "engag": 43, "engin": 2, "english": [34, 42], "enjoi": 62, "ensur": [0, 1, 40, 49, 61, 63, 67, 71], "entir": [1, 12, 36, 40, 41, 52, 59, 62, 73], "entiti": [0, 1, 2, 15, 39, 64], "entityrecogn": 47, "entri": 28, "ep8dauru1ogvjurwdbof5h6ayfbslvughjyiv31d_as6ppbt": 5, "equal": [1, 21, 23, 34, 37, 40, 55, 59, 61, 62, 63], "equival": [1, 41, 55, 61], "eric": 62, "error": [1, 16, 61], "especi": [41, 62], "essenti": [51, 71], "estim": 31, "et": [1, 5, 14, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "etc": [10, 15, 16, 17, 42], "evalu": [5, 47, 50], "evan": 62, "even": [0, 1, 2, 34, 37, 42, 62, 63, 67], "evenli": [34, 55], "event": [1, 34, 55, 61], "ever": 62, "everi": [1, 4, 13, 31, 34, 35, 36, 62], "everybodi": [31, 36], "everyon": [31, 36, 47, 62], "everyth": [31, 36, 56], "everywher": [31, 36], "evolut": 35, "evolv": [35, 71], "exactli": [1, 2, 71], "examin": [40, 62, 63], "exampl": [0, 10, 11, 15, 21, 24, 29, 31, 32, 34, 37, 42, 43, 48, 50, 51, 54, 56, 59, 60, 61, 62], "example_data": 1, "exce": 15, "exchang": [12, 35, 39, 40, 45, 55, 64], "exclud": [0, 41, 42], "exclus": [41, 42], "excus": 32, "exhibit": 35, "exist": [0, 1, 2, 55, 61, 62, 63, 64, 67], "expand": 49, "expect": [1, 37, 47], "expected_valu": 47, "explain": 29, "explan": [29, 43], "explor": [61, 62], "express": [5, 14, 30, 31, 32, 36, 38, 42, 64], "extend": 1, "extens": [43, 44], "extent": [1, 4, 7, 12, 31, 34, 35, 37, 51, 55, 59], "extern": 48, "extra": 51, "extract": [1, 17, 19, 28, 40, 50, 64], "extrem": [55, 56, 57], "face": 51, "facilit": [62, 71], "fact": [4, 35, 50, 54, 59], "factual": [17, 24, 50], "fail": [1, 61], "fals": [1, 2, 31, 54, 61, 71], "famili": 42, "far": [34, 35, 46, 50, 62], "faster": 14, "feat_count": 19, "featuer": 2, "featur": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 61, 63, 64, 65, 66, 67], "feature_build": [0, 1, 61], "feature_method": [64, 65], "featurebuild": [0, 2, 47, 61, 69], "few": [48, 62], "fewer": [12, 60], "fflow": 11, "field": [13, 17], "file": [0, 1, 2, 12, 14, 19, 65, 67], "filenam": [0, 1, 19], "filenotfounderror": 67, "fill": 71, "filler": [37, 60], "filler_paus": 49, "filter": [19, 62], "final": [1, 2, 34, 42, 62], "find": [1, 19, 28, 50], "fingertip": 62, "finit": 55, "first": [0, 1, 2, 11, 12, 16, 19, 31, 34, 35, 36, 39, 40, 41, 42, 45, 46, 49, 52, 54, 59, 62, 64, 70, 71], "first_person": 12, "first_person_plur": 49, "first_person_raw": [12, 16], "first_person_singl": 49, "five": 37, "fix": 52, "flag": 71, "float": [2, 4, 5, 6, 8, 10, 13, 14, 16, 21, 24, 25, 28, 68], "floor": 59, "flow": [0, 1, 7, 31, 36, 39, 41, 45, 46, 61, 64], "focal": [31, 36], "focu": 41, "folder": [0, 1, 19], "follow": [1, 2, 14, 16, 17, 29, 31, 32, 33, 41, 42, 47, 49, 50, 53, 55, 59, 60, 61, 64, 65], "for_m": 49, "for_you": 49, "forc": [0, 1, 61], "form": 1, "formal_titl": 49, "format": [1, 8, 17, 22, 47, 48, 61, 62, 64], "former": [45, 46], "formula": [14, 33, 59, 64, 70], "fornt": 1, "forward": [0, 1, 7, 39, 41, 61, 64], "forward_flow": 35, "found": [1, 5, 28, 30, 33, 61, 69], "four": [1, 8], "fourth": 33, "frac": 55, "fraction": 59, "framework": [49, 50, 62], "frequenc": [28, 31, 44, 64], "frequency_dict": 28, "fridai": 34, "from": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 19, 21, 28, 29, 31, 32, 33, 34, 35, 36, 39, 41, 42, 49, 50, 51, 53, 55, 56, 57, 58, 61, 62, 64, 65, 66, 67, 71], "full": [1, 2, 37], "full_empirical_dataset": 1, "fulli": [32, 48], "functinon": 12, "function": [1, 2, 3, 4, 10, 11, 12, 13, 14, 16, 20, 21, 23, 28, 31, 39, 44, 45, 46, 50, 56, 57, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73], "function_mimic_word": 28, "function_mimicry_scor": 28, "function_word_mimicri": 28, "function_word_refer": 28, "fund": 62, "further": [1, 2, 61, 71], "futur": [23, 66], "g": [0, 1, 4, 15, 20, 29, 31, 32, 36, 37, 38, 41, 42, 47, 48, 52, 54, 59, 63, 65, 66, 71], "game": [1, 2, 59, 71], "gaug": [33, 52], "gener": [0, 1, 2, 9, 11, 12, 16, 21, 31, 34, 35, 36, 40, 42, 45, 46, 49, 51, 59, 61, 67, 69, 71, 72], "generaliz": 23, "generate_bert": 67, "generate_certainty_pkl": 67, "generate_lexicon_pkl": 67, "generate_vect": 67, "gensim": 40, "get": [0, 16, 20, 21, 28, 30, 31, 36, 49, 66], "get_all_dd_featur": 11, "get_averag": 72, "get_centroid": 66, "get_certainti": 5, "get_certainty_scor": 64, "get_content_words_in_messag": 28, "get_conversation_level_aggreg": 65, "get_cosine_similar": 6, "get_dale_chall_easy_word": [21, 70], "get_dale_chall_score_and_classf": 64, "get_dd": 6, "get_dd_featur": 8, "get_dep_pair": [19, 49], "get_dep_pairs_noneg": [19, 49], "get_discursive_diversity_featur": 65, "get_first_pct_of_chat": 2, "get_first_person_word": [12, 70], "get_forward_flow": [7, 64], "get_function_word": 70, "get_function_words_in_messag": 28, "get_gini": 68, "get_gini_featur": 65, "get_info_divers": 13, "get_info_exchange_wordcount": 12, "get_liwc_r": 14, "get_max": 72, "get_mimicry_bert": 28, "get_min": 72, "get_moving_mimicri": 28, "get_named_ent": 64, "get_nan_vector": 27, "get_polarity_scor": 24, "get_politeness_strategi": 17, "get_politeness_v2": 18, "get_proportion_first_pronoun": 16, "get_question_word": 70, "get_reddit_featur": 64, "get_senti": 67, "get_stdev": 72, "get_subjectivity_scor": 24, "get_sum": 72, "get_team_bursti": 4, "get_temporal_featur": [4, 64], "get_time_diff": 23, "get_time_diff_startend": 23, "get_turn": 25, "get_turn_id": 71, "get_turn_taking_featur": 65, "get_unique_pairwise_combo": 6, "get_user_average_datafram": 72, "get_user_level_aggreg": 65, "get_user_level_averaged_featur": 66, "get_user_level_summary_statistics_featur": 66, "get_user_level_summed_featur": 66, "get_user_network": [11, 66], "get_user_sum_datafram": 72, "get_variance_in_dd": 26, "get_within_person_disc_rang": 27, "get_word_ttr": 16, "get_zscore_across_all_chat": 73, "get_zscore_across_all_convers": 73, "gina": 62, "gini": [39, 62, 65, 68], "gini_coeffici": [11, 69], "github": [0, 1, 2, 18, 71], "give": [1, 29, 37, 61], "give_ag": 49, "given": [5, 6, 13, 14, 28, 30, 31, 33, 34, 35, 36, 40, 41, 55, 59, 66, 67, 71], "go": [1, 34, 35, 45, 46, 50, 62], "goal": 62, "good": [50, 56, 62], "goodby": 49, "googl": [0, 1], "got": [31, 36], "gotta": [31, 36], "grade": 33, "grader": 21, "grai": 35, "grammat": 36, "granularli": 35, "grate": 62, "gratitud": [17, 49, 50], "great": [47, 50, 51, 56, 59, 60, 62], "greater": 55, "greet": 50, "groceri": 41, "group": [0, 1, 2, 4, 13, 29, 33, 34, 41, 52, 59, 62, 68, 71, 72], "grouping_kei": [0, 1, 2, 71], "gt": 22, "guess": 10, "gun": 1, "gy": 15, "gym": 34, "ha": [1, 2, 32, 34, 35, 37, 42, 43, 46, 52, 54, 55, 56, 59, 62, 63, 71], "had": [1, 31, 36, 54, 61], "hadn": [31, 36], "handl": [19, 29, 71], "happen": [1, 2, 55, 62, 63], "happi": 42, "harder": 21, "hashedg": [17, 50], "hasn": [31, 36], "hasneg": 50, "hasposit": 50, "hate": 31, "have": [0, 1, 2, 10, 12, 16, 31, 34, 36, 37, 40, 41, 42, 45, 46, 50, 54, 59, 60, 61, 62, 71], "haven": [31, 36], "he": [1, 31, 36], "header": 18, "hear": 32, "heart": [61, 62], "heat": 1, "heavi": 62, "hedg": [11, 30, 39, 49, 50, 64], "hei": [1, 35, 45, 46, 50], "helena": [47, 62], "hello": [43, 49], "help": [0, 31, 34, 36, 43, 45, 46, 52, 58, 69], "helper": [23, 67], "her": [30, 31, 36], "here": [0, 1, 29, 34, 41, 42, 47, 61, 66], "herself": [31, 36], "hesit": [60, 64], "hi": [31, 35, 36, 43, 45, 46], "hierach": 71, "hierarch": 71, "high": [0, 1, 2, 61, 62, 71], "higher": [21, 31, 34, 36, 40, 41, 42, 44, 45, 46, 55, 60], "highest": 71, "highlight": 1, "him": [31, 36], "himself": [31, 36], "hmm": [31, 36], "hoc": 62, "hole": 62, "home": 42, "homework": 34, "homonym": 31, "hope": 35, "host": [45, 46], "hour": 48, "how": [1, 5, 29, 30, 31, 34, 35, 36, 39, 43, 45, 51, 52, 54, 56, 62], "howev": [0, 1, 3, 35, 40, 42, 44, 54, 56, 61, 62], "howitwork": 1, "html": [15, 17, 24], "http": [1, 2, 4, 5, 6, 12, 13, 14, 15, 16, 17, 18, 21, 24, 41, 45, 46, 47, 64, 68, 70, 71], "hu": [1, 42, 62], "hug": 51, "huggingfac": 1, "huh": [31, 32, 36], "human": [37, 50, 62], "hyperlink": 48, "hyphen": [1, 61], "hypothet": 42, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 73], "iby1": 5, "id": [2, 4, 7, 23, 28, 62, 66, 68, 71, 72, 73], "idea": [12, 35, 40, 47, 51], "ident": [34, 35], "identif": 1, "identifi": [0, 1, 2, 4, 8, 9, 15, 23, 25, 30, 41, 47, 50, 52, 61, 63, 64, 71, 72], "identiif": [13, 71], "ignor": [1, 32], "illustr": [1, 41, 48, 62], "imagin": 1, "immedi": [31, 35, 64], "impact": [1, 60], "impersonal_pronoun": 49, "implement": 64, "impli": 37, "import": [31, 32, 36, 44, 45, 62, 69], "incent": 13, "includ": [1, 2, 10, 17, 22, 31, 32, 35, 36, 42, 45, 46, 51, 52, 56, 62, 66, 71], "inclus": [13, 71], "incongru": [8, 34], "incorpor": [1, 42, 45, 46], "increas": [1, 42, 62], "increment": 71, "independ": 1, "index": [1, 2, 4, 13, 25, 37, 39, 55, 61, 65], "indic": [1, 2, 16, 21, 22, 30, 32, 34, 35, 36, 40, 41, 43, 44, 48, 49, 50, 52, 55, 60, 63, 71], "indirect": 50, "indirect_btw": 50, "indirect_greet": 50, "indirectli": 69, "individu": [0, 1, 5, 11, 31, 34, 37, 45, 50, 59, 60, 62, 72], "inequ": 37, "infer": [1, 51, 67], "influenc": 1, "info": [13, 14, 18, 64], "info_divers": 13, "info_exchang": 64, "info_exchange_wordcount": [41, 64], "info_exchange_zscor": 11, "inform": [1, 6, 11, 12, 13, 24, 32, 34, 39, 48, 62, 64, 65], "informal_titl": 49, "information_divers": 11, "initi": [2, 62, 63, 64, 65, 66], "input": [0, 2, 4, 6, 12, 13, 14, 15, 16, 19, 20, 21, 22, 28, 31, 50, 55, 60, 62, 63, 64, 65, 66, 67, 71, 72], "input_column": [65, 66], "input_data": [25, 68, 72], "input_df": [1, 2, 61, 71], "inquiri": [30, 39, 52], "insid": 1, "insight": 1, "inspir": 15, "instal": [1, 61, 62], "instanc": [1, 22, 50, 59, 66], "instanti": 2, "insteac": 1, "instead": [1, 2, 62], "instruct": [1, 61], "int": [2, 3, 10, 13, 15, 16, 19, 20, 21, 22, 28, 63, 64], "intact": 71, "integ": [13, 40, 47], "intend": 59, "interact": [1, 11, 43, 44, 62, 69], "interconnect": 62, "interest": [1, 61, 62], "interfac": 62, "intermedi": [59, 64], "intern": 29, "interpret": 23, "interrupt": 59, "interv": [58, 65], "introduc": 62, "introduct": [11, 61], "invalid": 67, "invers": 64, "involv": [41, 62, 65], "io": [24, 47], "ipynb": [0, 1], "is_hedged_sentence_1": 10, "isn": [1, 31, 36], "issu": [1, 31, 36, 37, 42, 61], "ital": 64, "italic": 22, "item": [0, 71], "its": [0, 2, 15, 31, 35, 36, 40, 41, 47, 54, 55, 64, 69], "itself": [23, 31, 36, 44], "john": 1, "jonson": 62, "journal": [5, 64], "jurafski": 70, "juri": 1, "juries_df": 1, "jury_conversations_with_outcome_var": 1, "jury_feature_build": 1, "jury_output_chat_level": 1, "jury_output_conversation_level": 1, "jury_output_turn_level": 1, "jury_output_user_level": 1, "just": [0, 1, 2, 31, 36, 46, 50, 59, 61, 62], "katharina": 34, "keep": [1, 71], "kei": [1, 2, 4, 19, 28, 30, 54, 61, 71], "keyerror": 71, "keyword": [19, 49], "kind": [10, 62], "kitchen": 42, "knob": 0, "know": [1, 30], "knowledg": 29, "known": [1, 32, 61], "kumar": 62, "kw": 19, "lab": [1, 2, 62, 71], "label": [1, 15, 21, 51], "lack": [31, 38, 45, 46], "languag": [15, 34, 42, 50, 62], "larg": [31, 69], "larger": [0, 61], "last": [1, 31], "late": 32, "later": [0, 1, 2, 61], "latter": [31, 36], "lda": [13, 40], "learn": [61, 62], "least": [10, 32, 42, 63, 67], "led": 62, "legal": 49, "lemmat": [13, 40], "len": 28, "length": [14, 35, 39, 41, 42, 44], "less": [13, 32, 50, 52, 55, 62, 63], "let": [41, 49, 53], "let_me_know": 49, "letter": [49, 71], "level": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 14, 16, 23, 61, 64, 65, 66, 71, 72], "lexic": [10, 12, 14, 16, 31, 32, 36, 60, 62, 64], "lexical_featur": [14, 64], "lexical_features_v2": [10, 11], "lexicon": [5, 10, 14, 30, 39, 50, 52, 67, 69], "lexicons_dict": 67, "librari": [34, 51, 56, 57], "lift": 62, "light": 61, "like": [1, 22, 31, 34, 36, 41, 50, 61, 62], "limiat": 32, "limit": [11, 32, 37, 42, 54], "line": [0, 1, 19, 22, 48, 61, 62, 64], "linear": 64, "linguist": [18, 19, 30, 39, 50, 52], "link": [22, 29, 48, 50, 64], "list": [1, 2, 6, 7, 10, 11, 12, 13, 15, 19, 20, 21, 22, 28, 31, 33, 36, 37, 42, 48, 49, 50, 53, 54, 61, 64, 65, 66, 68, 70, 71], "literatur": 62, "littl": 38, "littlehors": 1, "liu": [42, 52], "live": [1, 54], "liwc": [14, 30, 39, 51, 52, 56, 62], "liwc_featur": [10, 14], "lix": 34, "ll": [1, 31, 36, 61], "load": [19, 69], "load_saved_data": 19, "load_to_dict": 19, "load_to_list": 19, "loc": 15, "local": [1, 51, 61], "locat": [1, 62], "long": [4, 42], "longer": [30, 41, 43, 48, 61, 62], "look": [2, 34, 61, 65, 66], "loos": 36, "lot": [31, 36], "loud": 60, "love": [31, 56], "low": [1, 2, 29, 55, 60, 71], "lower": [21, 31, 33, 36, 41, 42, 44, 55, 60], "lowercas": [2, 13, 40, 48, 49, 71], "lowest": 71, "lpearl": 16, "lst": 6, "m": [30, 31, 36], "made": [1, 23, 35, 59, 61, 62], "magnitud": 55, "mai": [1, 2, 11, 31, 32, 35, 36, 37, 41, 42, 43, 44, 54, 61, 62, 71], "main": [1, 2, 5, 62, 64, 65, 66], "make": [0, 1, 5, 34, 42, 55, 56, 62, 66, 69, 71], "man": 62, "mani": [1, 4, 11, 32, 37, 41, 60, 62, 66], "manner": [55, 62], "manual": [1, 61], "map": [13, 34], "mark": [19, 20, 22, 43, 54, 64, 71], "marker": [18, 32, 39, 42, 50, 51, 52, 54, 56], "marlow": 44, "matarazzo": 62, "match": [5, 16, 19, 30], "math": 34, "matter": 47, "max": 66, "max_num_chunk": 63, "maxim": [34, 35, 37], "maximum": [63, 65, 72], "mayb": [38, 47], "mcfarland": 70, "me": [31, 32, 36, 41, 50, 53], "mean": [0, 1, 4, 6, 11, 13, 21, 29, 31, 34, 36, 40, 41, 42, 47, 55, 56, 58, 61, 62, 65, 66, 73], "meaning": [41, 55], "meaningless": 41, "meant": 39, "measur": [0, 7, 12, 13, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 64, 68], "mechan": 32, "medium": 21, "meet": 48, "member": [13, 34, 37, 55], "merg": [2, 8, 65, 66], "merge_conv_data_with_origin": 2, "messag": [0, 1, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 39, 41, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 71, 73], "messaga": 61, "message_col": [0, 1, 2, 12, 13, 14, 61, 64, 65, 67, 71], "message_embed": [6, 7, 8], "message_lower_with_punc": 71, "metadata": [0, 1], "method": [5, 31, 41, 50, 62], "metric": [1, 8, 30, 34, 35, 46, 47, 48, 55, 66], "michael": 1, "mid": [1, 2, 71], "middl": [21, 34, 63], "might": [0, 1, 29, 43, 48, 53], "mikeyeoman": [14, 18, 64], "mileston": 34, "mimic": [28, 31, 36, 45], "mimic_word": 28, "mimick": [28, 31, 64], "mimicri": [0, 1, 28, 31, 35, 36, 39, 61, 64], "mimicry_bert": [45, 46], "mind": [1, 35, 50], "mine": [31, 36, 53, 59], "minim": [0, 41, 60], "minimum": [65, 72], "minu": [12, 41, 64], "minut": [55, 58], "mirror": 1, "miss": [1, 32, 61, 71], "mitig": [31, 36], "mizil": 50, "mm": [31, 36], "mnsc": 6, "modal": 50, "mode": 60, "model": [1, 13, 15, 31, 34, 35, 36, 40, 45, 46, 47, 51, 62, 67], "modif": 35, "modifi": [9, 19, 32, 64], "modul": [0, 1, 11, 34, 49, 61, 69], "monologu": 59, "more": [0, 1, 2, 11, 12, 22, 23, 24, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 45, 46, 50, 52, 55, 59, 61, 62, 71], "morn": 1, "most": [24, 31, 55, 62, 69], "motiv": 61, "move": [0, 1, 28, 31, 36, 39, 45, 59, 61], "movi": 31, "much": [1, 31, 34, 35, 36, 45, 62], "multi": [1, 2, 71], "multidimension": [45, 46], "multipl": [0, 1, 2, 19, 62, 71], "must": [1, 6, 62, 71], "my": [30, 31, 35, 36, 45, 46, 50, 53], "my_chat_featur": 1, "my_feature_build": 61, "my_fil": [0, 1], "my_output_chat_level": 61, "my_output_conversation_level": 61, "my_output_user_level": 61, "my_pandas_datafram": 61, "myself": [31, 36, 53], "n": [35, 45, 46, 47, 57, 59, 60], "n_chat": 59, "na": [5, 33, 43, 44, 48, 49, 50, 53, 58], "naiv": [2, 20, 32, 34, 38, 39, 53, 56, 57, 64], "name": [0, 1, 2, 4, 7, 8, 9, 12, 13, 14, 15, 17, 19, 23, 25, 28, 30, 32, 35, 39, 45, 46, 50, 51, 56, 61, 63, 64, 66, 67, 68, 71, 72, 73], "name_to_train": 47, "named_ent": [15, 47], "named_entity_recognition_featur": 11, "nan": 34, "nate": [35, 45, 46], "nathaniel": [35, 45, 46], "nativ": 50, "natur": [43, 55], "ndarrai": 68, "nearest": [13, 40], "nearli": 62, "necessari": [63, 67], "need": [0, 1, 2, 21, 62, 66, 67], "need_sent": 67, "need_senti": 67, "neg": [24, 29, 31, 34, 35, 36, 42, 50, 51, 52, 54, 56, 62, 67], "negat": [19, 49], "negative_bert": 51, "negative_emot": [49, 51, 52, 56], "negoti": 62, "neighborhood": 54, "neither": 30, "ner": 15, "ner_cutoff": [0, 1, 2, 47, 64], "ner_train": 64, "ner_training_df": [0, 1, 2, 47, 64], "nest": [0, 1, 2, 22, 71], "net": [45, 46], "network": 11, "neutral": [5, 24, 30, 51, 55, 67], "neutral_bert": 51, "never": 1, "new": [1, 4, 13, 34, 64, 65, 66, 72], "new_column_nam": 72, "next": [1, 32, 47, 58], "nice": [50, 54], "nicknam": 1, "niculescu": 50, "night": 31, "nikhil": [59, 62], "nltk": [1, 42, 61], "nobodi": [31, 36], "nois": 32, "non": [1, 2, 28, 31, 37, 48, 62, 71], "none": [2, 19, 23, 37, 55, 64, 65, 66, 67], "nor": 30, "normal": [19, 31], "notabl": 62, "note": [1, 2, 12, 16, 20, 42, 61, 71], "notebook": [0, 1], "noth": [31, 36, 56], "noun": 1, "novel": [45, 46], "now": 1, "nowher": [31, 36], "np": 68, "ntri": 32, "null": 34, "num": 48, "num_char": 65, "num_chunk": [27, 63], "num_hedge_word": 10, "num_messag": 65, "num_named_ent": [15, 47], "num_row": 63, "num_top": 13, "num_word": [12, 16, 65], "number": [0, 3, 11, 12, 13, 15, 16, 19, 20, 21, 22, 23, 25, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 47, 48, 49, 54, 56, 58, 59, 60, 62, 63, 64, 66, 69, 71, 72], "numer": [0, 1, 13, 33, 68, 72, 73], "numpi": [1, 61, 68], "o": 35, "object": [1, 2, 19, 44, 50, 57, 58, 61, 62, 64, 65, 66], "obtain": [13, 17, 23, 24, 34], "occur": [0, 4, 31, 42, 71], "occurr": 19, "off": [1, 31, 36], "offer": 0, "offici": 61, "often": [36, 47, 48, 62], "oh": [31, 36, 48], "okai": [31, 36], "older": [1, 61], "on_column": [18, 23, 28, 68, 72, 73], "onc": [2, 11, 58, 61, 62], "one": [0, 1, 2, 4, 10, 12, 19, 23, 25, 29, 31, 32, 36, 37, 47, 51, 56, 59, 61, 62, 67, 68, 71, 73], "ones": [31, 36], "onli": [0, 1, 2, 5, 11, 23, 29, 31, 32, 34, 36, 37, 45, 53, 58, 59, 61, 62, 71], "onlin": [1, 32, 39, 64], "open": [0, 62, 66], "operation": [39, 50, 59], "opinion": [24, 31], "oppos": [2, 31, 34, 35, 55], "opposit": 34, "option": [1, 2, 37, 62, 63, 67, 71], "order": [0, 1, 35, 37, 42, 71], "org": [6, 15, 21, 24, 41, 70], "origin": [1, 2, 5, 12, 21, 31, 32, 35, 36, 37, 45, 46, 49, 59], "orthogon": 34, "other": [1, 9, 11, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 45, 46, 48, 51, 52, 54, 56, 58, 59, 61, 62, 64, 66, 71], "other_lexical_featur": [11, 64], "otherwis": [2, 10, 21, 23, 32, 38, 63, 67], "our": [0, 1, 2, 11, 13, 29, 31, 32, 36, 37, 39, 53, 59, 61, 71], "ourselv": 53, "out": [1, 2, 16, 19, 31, 36, 55, 60, 62], "outcom": [1, 44, 62], "output": [0, 1, 2, 10, 17, 19, 40, 61, 62, 64, 67], "output_file_path_chat_level": [0, 1, 2, 61], "output_file_path_conv_level": [0, 1, 2, 61], "output_file_path_user_level": [0, 1, 2, 61], "output_path": 67, "outsid": [1, 2, 12], "over": [1, 16, 29, 31, 34, 35, 36, 37, 53, 55, 60, 62, 71], "overal": [30, 31, 34, 36, 45, 46], "overrid": [0, 1], "overview": [0, 61, 62], "overwritten": 1, "own": [0, 1, 9, 35, 62], "p": [14, 55], "pacakg": 24, "pace": [43, 62], "packag": [17, 18, 40, 62], "pad": 19, "page": [1, 11, 29, 39, 61, 62, 69], "pair": [6, 19, 34, 49, 71], "pairwis": [6, 34], "panda": [0, 1, 2, 12, 14, 16, 23, 47, 64, 65, 66, 71, 72, 73], "paper": [4, 5, 12, 14, 18, 29, 40, 50, 64], "paragraph": 22, "param": 71, "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 47, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "paramt": 1, "pardon": 32, "parenthes": [22, 48, 64], "parenthet": [22, 48], "pars": [16, 50, 60], "part": [1, 10, 13, 29, 36, 42, 52, 71], "particip": [1, 9, 37, 62], "particl": [31, 36], "particular": [11, 32, 34, 41, 45, 47, 51, 59, 62], "particularli": 42, "partner": 32, "pass": [1, 13, 21, 47, 71], "path": [0, 1, 2, 19, 67], "path_in": 19, "pattern": [4, 11, 19, 55, 62, 67], "paus": 4, "pd": [1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 19, 23, 25, 63, 64, 65, 66, 67, 68, 71], "pdf": [5, 12, 13, 14, 16, 18, 21, 24, 64, 70], "penalti": 1, "pennebak": [12, 37, 41, 42, 52], "pennyslvania": 62, "peopl": [1, 32, 59, 62], "per": [1, 6, 9, 14, 19, 42, 63, 66, 72], "percentag": [2, 21], "perfect": [37, 59], "perform": [0, 1, 2, 16, 50], "perhap": 1, "period": [4, 34, 55], "person": [1, 8, 12, 15, 16, 32, 34, 39, 41, 42, 50, 59, 62, 64, 70], "perspect": 1, "petrocelli": 5, "pgcr_yeoman": 14, "phrase": [19, 30, 38, 54], "phrase_split": 19, "pickl": [19, 67], "piec": [36, 42, 59, 63], "pl": 50, "place": [55, 61, 62], "plan": [34, 35, 45, 46], "player": 59, "pleas": [1, 38, 49, 50, 61, 62], "please_start": 50, "point": [22, 24, 34, 35, 45, 46, 48, 52, 64, 66], "poisson": 55, "polar": [24, 39, 51, 52, 64], "polit": [1, 17, 18, 30, 32, 38, 39, 42, 51, 52, 54, 56, 64], "politeness_featur": 11, "politeness_v2": 11, "politeness_v2_help": 11, "politenessstrategi": [17, 50], "portion": 0, "posit": [0, 11, 15, 24, 29, 31, 39, 42, 50, 51, 54, 56, 62, 64, 67], "positive_affect_lexical_per_100": [51, 52, 56], "positive_bert": 51, "positive_emot": [49, 51, 52, 56], "positivity_zscor": 64, "positivity_zscore_chat": 52, "positivity_zscore_convers": 52, "possess": 31, "possibl": [1, 34, 62, 66], "possibli": [38, 62], "practic": [14, 34, 35], "pre": [1, 4, 21, 37, 49, 64], "preced": [31, 35, 71], "precend": 35, "precis": 47, "precomput": 51, "predefin": 19, "predetermin": [31, 36], "predict": [2, 47, 51, 64], "prefer": [0, 1], "preload_word_list": 69, "prep_simpl": 19, "prep_whol": 19, "preposit": [31, 36], "preproces": 48, "preprocess": [0, 2, 13, 19, 40, 43, 49, 51, 69], "preprocess_chat_data": 2, "preprocess_conversation_column": 71, "preprocess_naive_turn": 71, "preprocess_text": 71, "preprocess_text_lowercase_but_retain_punctu": 71, "presenc": [2, 32, 67], "present": [1, 2, 30, 31, 38, 55, 62, 71], "prespecifi": 19, "prevent": 51, "previou": [1, 7, 28, 31, 36, 45, 46, 58, 64, 71], "primari": 34, "print": 2, "prior": [2, 64, 71], "priya": [47, 62], "probabl": [15, 47], "problem": 62, "procedur": 62, "proceed": 46, "process": [0, 1, 2, 4, 10, 21, 37, 55, 62, 64, 65, 67, 69, 71], "prodi": 15, "produc": [2, 34], "product": 15, "professor": 62, "progress": [1, 2], "project": [54, 62], "pronoun": [12, 16, 31, 36, 39, 41, 42, 64, 70], "proper": 1, "proport": [16, 39, 42, 64], "propos": 37, "provid": [0, 1, 2, 15, 29, 30, 33, 36, 39, 44, 47, 54, 62], "proxi": 42, "pseudonym": 1, "psycholog": 42, "pub": 70, "publish": [5, 30, 64], "pubsonlin": 6, "punctuat": [16, 19, 20, 21, 28, 43, 54, 60, 71], "punctuation_seper": 19, "puncut": 48, "pure": [24, 36], "purpos": 1, "put": [34, 50, 62, 66], "py": [0, 1, 14, 49], "pypi": [1, 61], "python": [1, 32, 41, 56, 57, 61, 62, 68], "qtd": 62, "qualiti": 41, "quantifi": [31, 36, 62], "quantiti": [37, 39, 41, 47], "quartil": 50, "question": [16, 19, 20, 29, 32, 39, 49, 50, 64, 66, 68, 70], "question_num": 11, "question_word": 20, "quick": [1, 43], "quickli": 0, "quit": 40, "quot": [22, 48, 64], "quotat": [22, 48], "rabbit": 62, "rain": 41, "rais": [67, 71], "random": 55, "rang": [5, 8, 24, 30, 33, 34, 35, 40, 51, 53, 55, 56, 57], "ranganath": [16, 31, 32, 36, 38, 43, 54, 70], "ranganath2013": 70, "ranganathetal2013_detectingflirt": 16, "rapid": [1, 4], "rare": [34, 35], "rate": [14, 42, 51], "rather": [31, 34, 35, 36, 37, 45, 46, 63], "ratio": [16, 39, 64], "raw": [12, 16, 21, 31, 33, 50, 64], "re": [1, 31, 36, 42, 50, 61], "read": [0, 1, 2, 16, 21, 29, 33, 61, 62, 64, 65, 66, 67], "read_csv": 1, "read_in_lexicon": 67, "readabl": [11, 33, 64, 70], "reader": 33, "readi": 1, "readili": 62, "readthedoc": 24, "real": [1, 55], "realit": 13, "realli": [31, 36, 50], "reason": [31, 36, 45, 46, 49], "reassur": 49, "recal": 47, "recept": [18, 32, 39, 42, 50, 51, 52, 54, 56, 62, 64], "recogn": [1, 43, 47], "recognit": [0, 1, 2, 39, 64], "recommend": [42, 62], "reddit": [48, 64], "reddit_tag": 11, "redditus": 48, "reduc": 63, "reduce_chunk": 63, "redund": [42, 62], "refer": [0, 1, 11, 22, 24, 28, 31, 42, 48, 52, 62, 64, 70], "reflect": [37, 43], "regardless": 1, "regener": [0, 2, 51, 67], "regenerate_vector": [0, 1, 2, 67], "regex": [14, 16, 49], "regist": 37, "regress": 1, "regular": [5, 14, 30, 32, 42, 55, 58], "reichel": [53, 58, 60], "reidl": [4, 13], "reinvent": 62, "rel": [41, 51, 52, 55, 60, 64], "relat": [1, 61, 62, 64], "relationship": 36, "relev": [1, 29, 42, 44, 49, 51, 56, 61, 64, 65], "reli": [31, 34, 35, 36, 69], "reliabl": [33, 42], "remain": [1, 30, 71], "rememb": 1, "remov": [2, 9, 13, 19, 28, 40, 43, 48, 49, 50, 71], "remove_active_us": 9, "renam": 1, "repair": [16, 39], "repeat": [60, 71], "repetit": 60, "replac": 19, "report": [1, 61], "repres": [2, 4, 6, 7, 11, 13, 23, 31, 34, 36, 42, 45, 46, 66, 67, 68, 71, 72, 73], "represent": [34, 38], "reproduc": [36, 62], "republican": 1, "request": [32, 50, 51], "requir": [0, 1, 20, 21, 31, 55, 61, 62, 64, 65, 66, 67], "research": [1, 2, 62], "reserv": 0, "resolv": 62, "resourc": [1, 39, 48, 61, 62], "respect": [1, 2, 12, 31, 36, 37, 69], "respons": [22, 48, 55, 58, 64], "restaur": [34, 56], "restrict": 71, "result": [40, 55, 65, 72], "retain": [2, 16, 20, 21, 60, 71], "retriev": 50, "retunr": 3, "return": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 32, 43, 49, 50, 51, 55, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "reveal": 62, "revert": 50, "review": 62, "rewrit": 50, "rich": 62, "riedl": [13, 40, 55], "right": [31, 36, 61, 62], "roberta": [1, 39, 42, 52, 56, 64, 67], "robust": 13, "rocklag": [5, 30, 64], "room": 59, "root": [13, 40], "rough": [12, 54], "roughli": 31, "round": [13, 40, 59, 71], "round_num": 1, "row": [0, 1, 2, 9, 13, 25, 37, 40, 59, 63, 68, 71, 72, 73], "rowbotham": 62, "rucker": 5, "rule": [1, 69], "run": [0, 10, 12, 16, 35, 46, 47, 48, 51, 61, 69], "runtim": [1, 35], "sagepub": [5, 64], "sai": [1, 32, 50, 59], "said": [1, 36, 62], "same": [0, 1, 2, 31, 34, 37, 45, 48, 52, 59, 60, 61, 62, 71], "sampl": [61, 62], "sarcast": 48, "save": [0, 1, 2, 19, 64, 67], "save_featur": 2, "sbert": [1, 28, 31, 34, 35, 36, 45, 46, 64, 65, 67], "scale": [42, 51], "school": [21, 62], "scienc": [29, 39, 62], "scientist": [61, 62], "score": [4, 5, 11, 12, 13, 15, 21, 24, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 45, 46, 47, 51, 53, 56, 57, 64, 65, 67, 73], "script": [1, 61], "sea": 1, "seamless": 62, "search": [19, 61], "second": [0, 1, 4, 34, 42, 58, 59], "second_person": 49, "secr": [18, 49, 64], "section": [1, 29, 61], "see": [0, 1, 2, 30, 34, 38, 41, 45, 46, 47, 55, 62, 71], "seek": [5, 62], "segment": [0, 19], "select": [2, 4, 23, 28, 36, 45, 66, 67, 68, 71, 72, 73], "self": 2, "semant": [31, 34, 35, 41], "send": [1, 37, 55], "sens": [5, 31, 54, 66], "sent": [1, 37, 64], "sentenc": [0, 1, 10, 15, 19, 20, 21, 33, 34, 35, 36, 42, 45, 46, 47, 48, 54, 56, 61, 67], "sentence_pad": 19, "sentence_split": 19, "sentence_to_train": 47, "sentencis": 19, "sentiment": [1, 24, 31, 39, 42, 52, 56, 62, 64, 67], "separ": [1, 2, 19, 34, 51], "sepcifi": 1, "septemb": 40, "sequenc": [1, 59], "sequenti": 1, "seri": [12, 16, 23, 28, 42, 71, 73], "serv": 12, "set": [1, 2, 13, 23, 34, 48, 59], "set_self_conv_data": 2, "sever": [1, 30, 41, 42, 48, 51, 56, 61], "shall": 54, "share": [31, 36, 37], "she": [30, 31, 36], "shift": 34, "shop": 62, "short": [55, 58], "shorter": [13, 40, 41, 42, 43], "should": [0, 1, 2, 4, 14, 23, 28, 29, 31, 36, 47, 48, 54, 61, 62, 65, 66, 67, 68, 69, 71, 72, 73], "shouldn": [31, 36], "show": 37, "showeth": 62, "shruti": [35, 45, 46, 47, 62], "side": 31, "signal": [45, 55], "signifi": 42, "signific": [1, 61], "silent": 37, "similar": [1, 6, 7, 13, 28, 29, 31, 34, 35, 36, 40, 45, 46, 49, 62, 65], "similarli": [1, 35], "simpl": [0, 1, 16, 19, 42, 61, 62], "simpli": [1, 5, 11, 28, 56, 62], "simplifi": 1, "simplist": 41, "sinc": [1, 32, 41, 71], "singh": 62, "singl": [0, 1, 2, 11, 12, 19, 23, 31, 34, 35, 36, 37, 41, 45, 46, 59, 62, 71, 72], "singular": [12, 41, 64], "site": 16, "situat": 37, "size": [1, 13, 63], "skip": 1, "slightli": [32, 62, 63], "slow": 1, "small": 40, "so": [1, 2, 10, 30, 31, 36, 37, 50, 61, 62, 66], "social": [29, 39, 61, 62], "socsci": 16, "softwar": 62, "sohi": 62, "sol3": 4, "solut": 59, "solv": 62, "some": [0, 1, 11, 17, 29, 32, 34, 35, 37, 41, 61, 63], "somebodi": [31, 36], "someon": [22, 29, 31, 36, 47, 48, 61, 64], "someplac": [31, 36], "someth": 47, "sometim": 1, "somewhat": 35, "soon": 62, "sorri": [16, 32, 50], "sort": 10, "sound": [47, 51], "sourc": [4, 5, 6, 12, 13, 16, 17, 21, 34, 35, 50, 64, 68], "space": [34, 40, 71], "spaci": [1, 19, 47, 49, 50, 61], "span": 63, "spars": 32, "speak": [1, 31, 36, 37, 59, 60, 62], "speaker": [0, 1, 2, 6, 8, 9, 25, 31, 34, 35, 37, 38, 42, 45, 46, 61, 66, 71, 72], "speaker_id": [2, 61, 72], "speaker_id_col": [0, 1, 2, 6, 8, 9, 25, 26, 27, 61, 65, 66, 71, 72], "speaker_nicknam": [0, 1, 2, 6, 9, 59, 66, 71], "special": [0, 1, 2, 48, 71], "specif": [1, 2, 12, 32, 41, 48, 55, 61, 62, 69, 71], "specifi": [1, 2, 19, 47, 49, 67, 68, 71, 72, 73], "speciifc": 63, "spend": [51, 62], "spike": 55, "split": [19, 21, 43, 63], "spoke": 59, "spoken": [11, 37], "spread": 55, "squar": [13, 40], "ssrn": 4, "stabl": 40, "stack": 14, "stackoverflow": 68, "stage": [1, 2, 34, 71], "stamp": 55, "standard": [1, 4, 37, 40, 41, 49, 55, 58, 60, 65, 72, 73], "stanford": 70, "start": [0, 15, 19, 20, 22, 23, 50], "statement": [38, 42, 47, 48, 62, 64], "statist": [65, 66, 68], "statologi": 41, "stem": 42, "step": [1, 4, 28, 41, 45, 46, 51], "still": [41, 45, 46], "stochast": 40, "stop": [40, 62], "stopword": [13, 19], "store": [1, 12, 16, 41, 49, 51, 65, 67], "stoword": 42, "str": [2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 63, 64, 65, 66, 67, 68, 71, 72, 73], "straightforward": 29, "strategi": [17, 30, 32, 38, 39, 42, 49, 54, 64], "stream": 35, "strictli": 1, "string": [0, 1, 2, 4, 8, 12, 13, 14, 19, 23, 24, 50, 66, 67, 68, 71, 72, 73], "strongli": [1, 41, 61], "structur": [0, 36, 49], "student": [21, 33], "studi": [1, 34, 62], "style": [1, 31, 36, 59], "sub": [0, 1, 71], "subfold": 1, "subject": [5, 24, 39, 49, 64], "subjunct": 50, "sublist": 28, "submiss": 55, "subpart": [1, 71], "subsequ": [1, 30, 51, 58], "subset": 62, "substanc": 36, "substant": 31, "substanti": 1, "substr": 30, "subtask": 1, "subtract": [41, 58], "succe": 62, "success": [0, 1, 4, 31, 36, 43, 55, 58, 61], "suggest": [1, 13, 34, 42, 44, 50], "suit": [62, 64], "sum": [28, 34, 64, 65, 66, 72], "summar": [0, 1, 65, 66, 69], "summari": [65, 66, 72], "summariz": [0, 65], "summarize_featur": 69, "suppl": 6, "support": [1, 15, 61], "suppos": 1, "sure": 30, "swear": 49, "syllabl": 21, "syntax": [1, 32, 61], "system": [2, 59, 64], "t": [0, 1, 2, 15, 29, 31, 36, 45, 49, 54, 61, 62, 67], "tabl": 62, "tag": 39, "take": [1, 4, 5, 9, 14, 25, 29, 31, 34, 37, 39, 42, 55, 61, 65, 71], "taken": [59, 71], "talk": [1, 37, 47, 59, 62], "target": 15, "task": [1, 2, 59, 71], "tausczik": [12, 37, 41, 52], "tausczikpennebaker2013": 12, "team": [0, 1, 4, 11, 12, 13, 34, 39, 40, 59, 65], "team_bursti": 4, "team_comm_tool": [1, 61], "teamcommtool": 1, "technic": [29, 39, 61, 62], "teghxgbqdhgaaaaa": 5, "tempor": [0, 2, 55, 58, 64, 71], "temporal_featur": 11, "tend": [1, 34, 60], "term": [1, 28, 59], "termin": [1, 2, 61], "terribl": 51, "test": [13, 33, 47], "text": [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 28, 32, 33, 36, 42, 48, 55, 62, 64, 67, 71], "text_based_featur": 64, "textblob": [24, 39, 51, 52, 64], "textblob_sentiment_analysi": 11, "than": [1, 2, 11, 13, 31, 34, 35, 36, 37, 40, 41, 45, 46, 54, 60, 62, 63], "thee": 62, "thei": [0, 1, 29, 31, 34, 36, 37, 39, 47, 58, 59, 61, 62, 67], "them": [1, 2, 19, 28, 29, 31, 36, 50, 51, 55, 59, 61, 62, 64, 65, 66, 67], "themselv": [31, 36, 60], "theoret": 35, "theori": [34, 50], "therefor": [0, 1, 11, 37, 45, 59, 62, 69], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 18, 20, 21, 23, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 73], "thing": [48, 61], "think": [1, 38, 47], "thorough": [43, 62], "those": [1, 21, 31, 36, 61], "though": [34, 42], "thought": [1, 35, 45], "thread": [1, 61], "three": [0, 1, 22, 34, 37, 40, 51, 62, 69, 71], "threshold": [15, 47], "through": [1, 45, 46, 50, 61, 62], "throughout": [31, 35, 36, 40, 45, 46, 55, 63], "tht": 35, "thu": [1, 2, 34, 35, 36, 37, 46, 55, 71], "time": [0, 1, 4, 23, 34, 35, 39, 42, 48, 51, 55, 59, 61, 62, 63, 64, 65, 66, 71], "time_diff": 55, "timediff": 4, "timestamp": [0, 1, 2, 8, 23, 58, 61, 62, 63, 64, 71], "timestamp_col": [0, 1, 2, 8, 61, 63, 64, 65, 71], "timestamp_end": [1, 23, 61, 64], "timestamp_start": [1, 23, 61, 64], "todai": [34, 35, 41, 43, 45, 46, 47], "todo": 66, "togeth": [0, 62, 66], "token": [16, 19, 39, 49, 54, 64], "token_count": [19, 49], "too": [30, 31, 36, 62], "took": [1, 59], "tool": [1, 61, 62], "toolkit": [0, 1, 11, 42, 45, 46, 55, 62], "top": [1, 50, 59], "topic": [1, 13, 34, 40, 42, 43, 65], "tormala": 5, "total": [3, 12, 16, 25, 31, 34, 36, 37, 41, 44, 53, 59, 60, 62, 63, 64, 66, 72], "touch": [1, 61], "toward": [31, 36, 38, 42, 45, 46], "tradit": 49, "train": [1, 2, 15, 64], "train_spacy_n": 15, "transcript": 0, "transfom": [45, 46], "transform": [31, 34, 35, 36, 51], "transform_utter": 50, "treat": [1, 59, 61], "tri": [50, 64], "trivial": [3, 44, 62], "troubl": [1, 61], "true": [1, 2, 37, 61, 63, 67, 71], "truncat": 2, "truth_intensifi": 49, "ttr": 64, "tupl": [0, 1, 2, 15, 19, 64], "turn": [0, 1, 2, 25, 28, 31, 32, 37, 39, 61, 64, 65, 71], "turn_count": 59, "turn_df": 71, "turn_id": 71, "turn_taking_featur": 11, "twice": 63, "twitter": 51, "two": [0, 1, 2, 23, 31, 34, 36, 41, 45, 46, 52, 62, 63], "txt": 19, "type": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 37, 39, 52, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "typic": [1, 34, 40, 41, 42, 52, 60], "u": [1, 22, 31, 36, 48, 49, 58, 61], "uci": 16, "uh": [31, 36], "ulrich": 55, "um": [31, 36, 60], "umbrella": [8, 29, 34], "uncertain": [5, 30], "uncertainti": 30, "under": [0, 1, 10, 11, 12, 28, 40], "underli": 1, "underscor": [1, 61], "understand": [0, 33, 39, 43, 48, 58, 61, 62], "understood": 33, "uninterrupt": 59, "uniqu": [0, 1, 2, 6, 9, 13, 16, 23, 25, 41, 47, 52, 60, 61, 63, 71], "univers": 62, "unix": 58, "unless": [31, 36], "unpack": 62, "until": [31, 36, 45, 46], "unzip": [1, 61], "up": [1, 17, 21, 28, 31, 35, 36, 37, 45, 46, 51, 59, 61], "updat": [1, 9, 40, 54, 61], "upenn": 1, "upload": 13, "upon": 33, "upper": 42, "us": [0, 1, 2, 3, 5, 11, 12, 13, 14, 17, 19, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 60, 62, 64, 65, 66, 67, 71], "usag": [21, 24], "use_time_if_poss": 63, "user": [0, 1, 2, 9, 15, 22, 37, 47, 48, 51, 61, 62, 63, 64, 65, 66, 69, 72], "user_data": [2, 65, 66], "user_df": 9, "user_level_featur": 2, "user_list": 9, "userlevelfeaturescalcul": [2, 66, 69], "usernam": [22, 48], "utf": 1, "util": [12, 21, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "utilti": 62, "utter": [0, 1, 2, 3, 4, 5, 13, 14, 15, 16, 17, 20, 21, 23, 24, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52, 54, 58, 60, 61, 67], "v": 13, "valenc": 51, "valid": [23, 55], "valu": [1, 2, 5, 6, 10, 12, 13, 18, 19, 23, 28, 30, 31, 34, 36, 37, 40, 41, 42, 45, 46, 47, 55, 59, 64, 68, 71, 72, 73], "vari": [13, 31, 34, 35, 42], "variabl": [1, 56, 57, 64, 65, 66], "varianc": [8, 34], "variance_in_dd": 11, "variat": [4, 32], "varieti": [42, 62], "variou": [19, 42, 64, 65, 66], "vast": 62, "ve": [0, 31, 36, 50, 61], "vec": 6, "vect_data": [7, 8, 28, 64, 65, 66], "vect_path": 67, "vector": [0, 1, 2, 6, 7, 8, 13, 28, 34, 35, 40, 55, 61, 64, 65, 67], "vector_data": [1, 61], "vector_directori": [0, 1, 2, 61, 65], "vein": 45, "verb": [19, 31, 36], "verbal": 32, "veri": [5, 30, 31, 34, 35, 36, 42, 49, 54], "verifi": 2, "verit": 62, "version": [1, 12, 14, 21, 40, 51, 61], "versu": [4, 29, 47, 55, 59], "via": [3, 44], "view": 50, "visit": 41, "voila": 62, "w": 31, "wa": [1, 2, 5, 12, 31, 32, 35, 36, 47, 51, 56, 59, 62, 71], "wai": [1, 2, 29, 30, 31, 32, 34, 49, 50, 54, 56, 57, 61, 62, 66], "waiai": 62, "wait": [4, 55], "walk": 1, "walkthrough": [0, 61, 62], "want": [1, 28, 34, 59, 61, 62, 67], "warn": 50, "watt": [1, 2, 62, 71], "we": [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 14, 15, 16, 18, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 57, 58, 59, 61, 62, 66, 67, 71], "web": 70, "week": 47, "weight": 66, "welcom": 61, "well": [29, 31, 36, 55, 62], "went": 41, "were": [1, 12, 31, 36], "western": 1, "wh": [19, 31, 36], "wh_question": [32, 49, 54], "wharton": 62, "what": [1, 2, 12, 16, 20, 29, 31, 32, 34, 35, 36, 39, 41, 45, 46, 47, 50, 54, 62, 63], "whatev": [1, 31, 36], "wheel": 62, "when": [1, 16, 20, 31, 33, 36, 47, 54, 55, 59, 60, 61, 62, 69, 71], "whenev": 71, "where": [0, 1, 2, 19, 20, 28, 31, 32, 36, 37, 40, 41, 42, 48, 50, 51, 54, 59, 61, 65, 68, 73], "wherea": [31, 34, 35, 36, 43], "wherev": [31, 36], "whether": [1, 2, 10, 16, 19, 32, 37, 38, 41, 43, 47, 57, 58, 62, 63, 64, 67, 71], "which": [1, 2, 3, 4, 5, 7, 9, 12, 13, 14, 15, 16, 18, 23, 25, 28, 31, 34, 35, 36, 37, 38, 40, 41, 42, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 66, 68, 69, 71, 72, 73], "while": [31, 32, 34, 36, 37, 44, 45, 46, 55, 62, 71], "whitespac": 43, "who": [20, 31, 32, 36, 47, 51, 54, 59, 60, 62], "whole": [28, 59, 62, 71], "whom": [31, 36, 54], "whose": [31, 36, 54], "why": [20, 29, 31, 36, 54], "wide": 31, "wien": 62, "wiki": [21, 29, 70], "wikipedia": [21, 33, 37, 70], "williamson": 60, "wish": [1, 2, 18], "within": [0, 1, 2, 8, 11, 16, 30, 34, 35, 36, 41, 45, 46, 52, 55, 59, 60, 62, 63, 64, 68, 71, 73], "within_group": 2, "within_person_discursive_rang": 11, "within_task": [0, 1, 2, 71], "without": [1, 19, 31, 36, 42, 47, 54, 62, 69], "won": [0, 31, 36, 45], "wonder": 56, "woolei": 4, "woollei": [13, 40, 55], "wooten": 55, "word": [3, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 28, 30, 32, 33, 37, 38, 39, 40, 41, 43, 45, 46, 48, 49, 52, 53, 54, 56, 57, 62, 64, 65, 66, 69, 70], "word_mimicri": 11, "word_start": [19, 49], "wordnet": [1, 61], "words_in_lin": 19, "work": [0, 47, 50, 55, 61, 62], "world": 55, "worri": 62, "would": [1, 29, 31, 34, 35, 36, 37, 42, 50, 54, 62], "wouldn": [31, 36], "wow": 50, "wp": 13, "write": [2, 29, 60], "www": [12, 13, 14, 18, 41, 64], "x": [0, 1, 2, 4, 46, 68], "xinlan": 62, "yashveer": 62, "ye": 19, "yeah": [31, 36], "yeoman": [14, 18, 42, 49], "yesno_quest": [32, 49, 54], "yet": 48, "ylatau": 12, "you": [0, 1, 2, 11, 24, 29, 31, 36, 37, 43, 47, 50, 59, 61, 62, 69], "your": [0, 29, 31, 32, 36, 37, 50, 59, 61, 62], "yourself": [31, 36, 50], "yuluan": 62, "yup": [31, 36], "yuxuan": 62, "z": [12, 39, 49, 51, 64, 73], "zero": [13, 52], "zhang": 62, "zheng": 62, "zhong": 62, "zhou": 62, "zscore": 41, "zscore_chat": 41, "zscore_chats_and_convers": 69, "zscore_convers": 41, "\u00bc": 47, "\u03c4": 55}, "titles": ["The Basics", "Worked Example", "feature_builder module", "basic_features module", "burstiness module", "certainty module", "discursive_diversity module", "fflow module", "get_all_DD_features module", "get_user_network module", "hedge module", "Features: Technical Documentation", "info_exchange_zscore module", "information_diversity module", "lexical_features_v2 module", "named_entity_recognition_features module", "other_lexical_features module", "politeness_features module", "politeness_v2 module", "politeness_v2_helper module", "question_num module", "readability module", "reddit_tags module", "temporal_features module", "textblob_sentiment_analysis module", "turn_taking_features module", "variance_in_DD module", "within_person_discursive_range module", "word_mimicry module", "FEATURE NAME", "Certainty", "Content Word Accommodation", "Conversational Repair", "Dale-Chall Score", "Discursive Diversity", "Forward Flow", "Function Word Accommodation", "Gini Coefficient", "Hedge", "Features: Conceptual Documentation", "Information Diversity", "Information Exchange", "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons", "Message Length", "Message Quantity", "Mimicry (BERT)", "Moving Mimicry", "Named Entity Recognition", "Online Discussion Tags", "Politeness/Receptiveness Markers", "Politeness Strategies", "Sentiment (RoBERTa)", "Positivity Z-Score", "Proportion of First Person Pronouns", "Question (Naive)", "Team Burstiness", "Textblob Polarity", "Textblob Subjectivity", "Time Difference", "Turn Taking Index", "Word Type-Token Ratio", "The Team Communication Toolkit", "Introduction", "assign_chunk_nums module", "calculate_chat_level_features module", "calculate_conversation_level_features module", "calculate_user_level_features module", "check_embeddings module", "gini_coefficient module", "Utilities", "preload_word_lists module", "preprocess module", "summarize_features module", "zscore_chats_and_conversation module"], "titleterms": {"A": 0, "One": 0, "The": [0, 61, 62], "accommod": [31, 36], "addit": 1, "advanc": 1, "assign_chunk_num": 63, "assumpt": 0, "basic": [0, 1, 29, 30, 31, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59, 60], "basic_featur": 3, "bert": 45, "bursti": [4, 55], "calculate_chat_level_featur": 64, "calculate_conversation_level_featur": 65, "calculate_user_level_featur": 66, "caveat": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "certainti": [5, 30], "chall": 33, "chat": [11, 39], "check_embed": 67, "citat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "class": 69, "code": [0, 1], "coeffici": 37, "column": 1, "commun": 61, "conceptu": 39, "configur": 1, "consider": 1, "content": [31, 61], "convers": [11, 32, 39, 62, 69], "count": [42, 59], "customiz": 0, "dale": 33, "data": 1, "demo": [0, 1], "differ": 58, "discurs": 34, "discursive_divers": 6, "discuss": 48, "divers": [34, 40], "document": [11, 39, 62], "driver": 69, "entiti": 47, "environ": [1, 61], "exampl": [1, 41, 47], "exchang": 41, "featur": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "feature_build": 2, "featurebuild": [1, 62], "fflow": 7, "file": [30, 34, 35, 45, 46, 47, 51], "first": 53, "flow": 35, "forward": 35, "function": [0, 36], "gener": 62, "get": [1, 61, 62], "get_all_dd_featur": 8, "get_user_network": 9, "gini": 37, "gini_coeffici": 68, "hedg": [10, 38], "high": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "implement": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "import": [1, 61], "index": 59, "indic": 61, "info_exchange_zscor": 12, "inform": [40, 41], "information_divers": 13, "input": [1, 34], "inquiri": 42, "interpret": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "introduct": 62, "intuit": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "kei": 0, "length": 43, "level": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "lexical_features_v2": 14, "lexicon": 42, "light": 0, "linguist": 42, "liwc": 42, "marker": 49, "messag": [43, 44], "mimicri": [45, 46], "modul": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "motiv": 62, "move": 46, "naiv": 54, "name": [29, 47], "named_entity_recognition_featur": 15, "note": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "onlin": 48, "other": [42, 69], "other_lexical_featur": 16, "ouput": 34, "our": 62, "output": [30, 35, 45, 46, 47, 51], "packag": [0, 1, 61], "paramet": 0, "person": 53, "pip": [1, 61], "polar": 56, "polit": [49, 50], "politeness_featur": 17, "politeness_v2": 18, "politeness_v2_help": 19, "posit": 52, "preload_word_list": 70, "preprocess": 71, "pronoun": 53, "proport": 53, "quantiti": 44, "question": 54, "question_num": 20, "ratio": 60, "readabl": 21, "recept": 49, "recognit": 47, "recommend": [1, 61], "reddit_tag": 22, "relat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "repair": 32, "roberta": 51, "run": 1, "sampl": [0, 1], "score": [33, 41, 52], "sentiment": 51, "speaker": [11, 59, 62, 69], "start": [1, 61, 62], "strategi": 50, "subject": 57, "summarize_featur": 72, "tabl": 61, "tag": 48, "take": 59, "team": [55, 61, 62], "technic": 11, "temporal_featur": 23, "textblob": [56, 57], "textblob_sentiment_analysi": 24, "time": 58, "token": 60, "toolkit": 61, "touch": 0, "train": 47, "troubleshoot": [1, 61], "turn": 59, "turn_taking_featur": 25, "type": 60, "us": 61, "user": 11, "util": 69, "utter": [11, 39, 62, 69], "variance_in_dd": 26, "virtual": [1, 61], "walkthrough": 1, "within_person_discursive_rang": 27, "word": [31, 36, 42, 60], "word_mimicri": 28, "work": 1, "your": 1, "z": [41, 52], "zscore_chats_and_convers": 73}}) \ No newline at end of file +Search.setIndex({"alltitles": {"A Light-Touch, One-Function Package": [[0, "a-light-touch-one-function-package"]], "Additional FeatureBuilder Considerations": [[1, "additional-featurebuilder-considerations"]], "Advanced Configuration Columns": [[1, "advanced-configuration-columns"]], "Basic Input Columns": [[1, "basic-input-columns"]], "Certainty": [[30, null]], "Citation": [[29, "citation"], [30, "citation"], [31, "citation"], [32, "citation"], [33, "citation"], [34, "citation"], [35, "citation"], [36, "citation"], [37, "citation"], [38, "citation"], [40, "citation"], [41, "citation"], [42, "citation"], [43, "citation"], [44, "citation"], [45, "citation"], [46, "citation"], [47, "citation"], [48, "citation"], [49, "citation"], [50, "citation"], [51, "citation"], [52, "citation"], [53, "citation"], [54, "citation"], [55, "citation"], [56, "citation"], [57, "citation"], [58, "citation"], [59, "citation"], [60, "citation"]], "Configuring the FeatureBuilder": [[1, "configuring-the-featurebuilder"]], "Content Word Accommodation": [[31, null]], "Contents:": [[61, null]], "Conversation-Level Features": [[11, "conversation-level-features"], [39, "conversation-level-features"]], "Conversational Repair": [[32, null]], "Customizable Parameters": [[0, "customizable-parameters"]], "Dale-Chall Score": [[33, null]], "Demo / Sample Code": [[0, "demo-sample-code"], [1, "demo-sample-code"]], "Discursive Diversity": [[34, null]], "Example:": [[41, "example"]], "FEATURE NAME": [[29, null]], "Feature Documentation": [[62, "feature-documentation"]], "Features: Conceptual Documentation": [[39, null]], "Features: Technical Documentation": [[11, null]], "Forward Flow": [[35, null]], "Function Word Accommodation": [[36, null]], "Generating Features: Utterance-, Speaker-, and Conversation-Level": [[62, "generating-features-utterance-speaker-and-conversation-level"]], "Getting Started": [[1, "getting-started"], [61, "getting-started"], [62, "getting-started"]], "Gini Coefficient": [[37, null]], "Hedge": [[38, null]], "High*Level Intuition": [[54, "high-level-intuition"]], "High-Level Intuition": [[29, "high-level-intuition"], [30, "high-level-intuition"], [31, "high-level-intuition"], [32, "high-level-intuition"], [33, "high-level-intuition"], [34, "high-level-intuition"], [35, "high-level-intuition"], [36, "high-level-intuition"], [37, "high-level-intuition"], [38, "high-level-intuition"], [40, "high-level-intuition"], [41, "high-level-intuition"], [42, "high-level-intuition"], [43, "high-level-intuition"], [44, "high-level-intuition"], [45, "high-level-intuition"], [46, "high-level-intuition"], [47, "high-level-intuition"], [48, "high-level-intuition"], [49, "high-level-intuition"], [50, "high-level-intuition"], [51, "high-level-intuition"], [52, "high-level-intuition"], [53, "high-level-intuition"], [55, "high-level-intuition"], [56, "high-level-intuition"], [57, "high-level-intuition"], [58, "high-level-intuition"], [59, "high-level-intuition"], [60, "high-level-intuition"]], "Implementation": [[32, "implementation"], [42, "implementation"], [52, "implementation"], [54, "implementation"]], "Implementation Basics": [[29, "implementation-basics"], [30, "implementation-basics"], [31, "implementation-basics"], [33, "implementation-basics"], [34, "implementation-basics"], [35, "implementation-basics"], [36, "implementation-basics"], [37, "implementation-basics"], [38, "implementation-basics"], [40, "implementation-basics"], [41, "implementation-basics"], [43, "implementation-basics"], [44, "implementation-basics"], [45, "implementation-basics"], [46, "implementation-basics"], [47, "implementation-basics"], [48, "implementation-basics"], [49, "implementation-basics"], [50, "implementation-basics"], [51, "implementation-basics"], [53, "implementation-basics"], [55, "implementation-basics"], [56, "implementation-basics"], [57, "implementation-basics"], [58, "implementation-basics"], [59, "implementation-basics"], [60, "implementation-basics"]], "Implementation Notes/Caveats": [[29, "implementation-notes-caveats"], [30, "implementation-notes-caveats"], [31, "implementation-notes-caveats"], [33, "implementation-notes-caveats"], [34, "implementation-notes-caveats"], [35, "implementation-notes-caveats"], [36, "implementation-notes-caveats"], [38, "implementation-notes-caveats"], [40, "implementation-notes-caveats"], [41, "implementation-notes-caveats"], [43, "implementation-notes-caveats"], [44, "implementation-notes-caveats"], [45, "implementation-notes-caveats"], [46, "implementation-notes-caveats"], [47, "implementation-notes-caveats"], [48, "implementation-notes-caveats"], [49, "implementation-notes-caveats"], [50, "implementation-notes-caveats"], [51, "implementation-notes-caveats"], [53, "implementation-notes-caveats"], [55, "implementation-notes-caveats"], [56, "implementation-notes-caveats"], [57, "implementation-notes-caveats"], [58, "implementation-notes-caveats"], [59, "implementation-notes-caveats"]], "Import Recommendations: Virtual Environment and Pip": [[1, "import-recommendations-virtual-environment-and-pip"], [61, "import-recommendations-virtual-environment-and-pip"]], "Importing the Package": [[1, "importing-the-package"]], "Indices and Tables": [[61, "indices-and-tables"]], "Information Diversity": [[40, null]], "Information Exchange": [[41, null]], "Input File": [[34, "id2"]], "Interpretation:": [[41, "interpretation"]], "Interpreting the Feature": [[29, "interpreting-the-feature"], [30, "interpreting-the-feature"], [31, "interpreting-the-feature"], [32, "interpreting-the-feature"], [33, "interpreting-the-feature"], [34, "interpreting-the-feature"], [35, "interpreting-the-feature"], [36, "interpreting-the-feature"], [37, "interpreting-the-feature"], [38, "interpreting-the-feature"], [40, "interpreting-the-feature"], [41, "interpreting-the-feature"], [42, "interpreting-the-feature"], [43, "interpreting-the-feature"], [44, "interpreting-the-feature"], [45, "interpreting-the-feature"], [46, "interpreting-the-feature"], [47, "interpreting-the-feature"], [48, "interpreting-the-feature"], [49, "interpreting-the-feature"], [50, "interpreting-the-feature"], [51, "interpreting-the-feature"], [52, "interpreting-the-feature"], [53, "interpreting-the-feature"], [54, "interpreting-the-feature"], [55, "interpreting-the-feature"], [56, "interpreting-the-feature"], [57, "interpreting-the-feature"], [58, "interpreting-the-feature"], [59, "interpreting-the-feature"], [60, "interpreting-the-feature"]], "Introduction": [[62, null]], "Key Assumptions and Parameters": [[0, "key-assumptions-and-parameters"]], "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons": [[42, null]], "Message Length": [[43, null]], "Message Quantity": [[44, null]], "Mimicry (BERT)": [[45, null]], "Motivation": [[62, "motivation"]], "Moving Mimicry": [[46, null]], "Named Entity Recognition": [[47, null]], "Named Entity Training Examples": [[47, "id2"]], "Online Discussion Tags": [[48, null]], "Other Utilities": [[69, "other-utilities"]], "Ouput File": [[34, "id3"]], "Our Team": [[62, "our-team"]], "Output File": [[30, "id2"], [35, "id2"], [45, "id2"], [46, "id2"], [47, "id3"], [51, "id1"]], "Package Assumptions": [[0, "package-assumptions"]], "Politeness Strategies": [[50, null]], "Politeness/Receptiveness Markers": [[49, null]], "Positivity Z-Score": [[52, null]], "Proportion of First Person Pronouns": [[53, null]], "Question (Naive)": [[54, null]], "Related Features": [[29, "related-features"], [30, "related-features"], [31, "related-features"], [32, "related-features"], [33, "related-features"], [34, "related-features"], [35, "related-features"], [36, "related-features"], [37, "related-features"], [38, "related-features"], [40, "related-features"], [41, "related-features"], [42, "related-features"], [43, "related-features"], [44, "related-features"], [45, "related-features"], [46, "related-features"], [47, "related-features"], [48, "related-features"], [49, "related-features"], [50, "related-features"], [51, "related-features"], [52, "related-features"], [53, "related-features"], [54, "related-features"], [55, "related-features"], [56, "related-features"], [57, "related-features"], [58, "related-features"], [59, "related-features"], [60, "related-features"]], "Sentiment (RoBERTa)": [[51, null]], "Speaker Turn Counts": [[59, "id2"]], "Speaker- (User) Level Features": [[11, "speaker-user-level-features"]], "Team Burstiness": [[55, null]], "Textblob Polarity": [[56, null]], "Textblob Subjectivity": [[57, null]], "The Basics": [[0, null]], "The FeatureBuilder": [[62, "the-featurebuilder"]], "The Team Communication Toolkit": [[61, null]], "Time Difference": [[58, null]], "Troubleshooting": [[1, "troubleshooting"], [61, "troubleshooting"]], "Turn Taking Index": [[59, null]], "Using the Package": [[61, "using-the-package"]], "Utilities": [[69, null]], "Utterance- (Chat) Level Features": [[11, "utterance-chat-level-features"], [39, "utterance-chat-level-features"]], "Walkthrough: Running the FeatureBuilder on Your Data": [[1, "walkthrough-running-the-featurebuilder-on-your-data"]], "Word Type-Token Ratio": [[60, null]], "Worked Example": [[1, null]], "assign_chunk_nums module": [[63, null]], "basic_features module": [[3, null]], "burstiness module": [[4, null]], "calculate_chat_level_features module": [[64, null]], "calculate_conversation_level_features module": [[65, null]], "calculate_user_level_features module": [[66, null]], "certainty module": [[5, null]], "check_embeddings module": [[67, null]], "discursive_diversity module": [[6, null]], "feature_builder module": [[2, null]], "fflow module": [[7, null]], "get_all_DD_features module": [[8, null]], "get_user_network module": [[9, null]], "gini_coefficient module": [[68, null]], "hedge module": [[10, null]], "info_exchange_zscore module": [[12, null]], "information_diversity module": [[13, null]], "lexical_features_v2 module": [[14, null]], "named_entity_recognition_features module": [[15, null]], "other_lexical_features module": [[16, null]], "politeness_features module": [[17, null]], "politeness_v2 module": [[18, null]], "politeness_v2_helper module": [[19, null]], "preload_word_lists module": [[70, null]], "preprocess module": [[71, null]], "question_num module": [[20, null]], "readability module": [[21, null]], "reddit_tags module": [[22, null]], "summarize_features module": [[72, null]], "temporal_features module": [[23, null]], "textblob_sentiment_analysis module": [[24, null]], "turn_taking_features module": [[25, null]], "variance_in_DD module": [[26, null]], "within_person_discursive_range module": [[27, null]], "word_mimicry module": [[28, null]], "z-scores:": [[41, "z-scores"]], "zscore_chats_and_conversation module": [[73, null]], "\u201cDriver\u201d Classes: Utterance-, Conversation-, and Speaker-Level Features": [[69, "driver-classes-utterance-conversation-and-speaker-level-features"]]}, "docnames": ["basics", "examples", "feature_builder", "features/basic_features", "features/burstiness", "features/certainty", "features/discursive_diversity", "features/fflow", "features/get_all_DD_features", "features/get_user_network", "features/hedge", "features/index", "features/info_exchange_zscore", "features/information_diversity", "features/lexical_features_v2", "features/named_entity_recognition_features", "features/other_lexical_features", "features/politeness_features", "features/politeness_v2", "features/politeness_v2_helper", "features/question_num", "features/readability", "features/reddit_tags", "features/temporal_features", "features/textblob_sentiment_analysis", "features/turn_taking_features", "features/variance_in_DD", "features/within_person_discursive_range", "features/word_mimicry", "features_conceptual/TEMPLATE", "features_conceptual/certainty", "features_conceptual/content_word_accommodation", "features_conceptual/conversational_repair", "features_conceptual/dale_chall_score", "features_conceptual/discursive_diversity", "features_conceptual/forward_flow", "features_conceptual/function_word_accommodation", "features_conceptual/gini_coefficient", "features_conceptual/hedge", "features_conceptual/index", "features_conceptual/information_diversity", "features_conceptual/information_exchange", "features_conceptual/liwc", "features_conceptual/message_length", "features_conceptual/message_quantity", "features_conceptual/mimicry_bert", "features_conceptual/moving_mimicry", "features_conceptual/named_entity_recognition", "features_conceptual/online_discussions_tags", "features_conceptual/politeness_receptiveness_markers", "features_conceptual/politeness_strategies", "features_conceptual/positivity_bert", "features_conceptual/positivity_z_score", "features_conceptual/proportion_of_first_person_pronouns", "features_conceptual/questions", "features_conceptual/team_burstiness", "features_conceptual/textblob_polarity", "features_conceptual/textblob_subjectivity", "features_conceptual/time_difference", "features_conceptual/turn_taking_index", "features_conceptual/word_ttr", "index", "intro", "utils/assign_chunk_nums", "utils/calculate_chat_level_features", "utils/calculate_conversation_level_features", "utils/calculate_user_level_features", "utils/check_embeddings", "utils/gini_coefficient", "utils/index", "utils/preload_word_lists", "utils/preprocess", "utils/summarize_features", "utils/zscore_chats_and_conversation"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["basics.rst", "examples.rst", "feature_builder.rst", "features/basic_features.rst", "features/burstiness.rst", "features/certainty.rst", "features/discursive_diversity.rst", "features/fflow.rst", "features/get_all_DD_features.rst", "features/get_user_network.rst", "features/hedge.rst", "features/index.rst", "features/info_exchange_zscore.rst", "features/information_diversity.rst", "features/lexical_features_v2.rst", "features/named_entity_recognition_features.rst", "features/other_lexical_features.rst", "features/politeness_features.rst", "features/politeness_v2.rst", "features/politeness_v2_helper.rst", "features/question_num.rst", "features/readability.rst", "features/reddit_tags.rst", "features/temporal_features.rst", "features/textblob_sentiment_analysis.rst", "features/turn_taking_features.rst", "features/variance_in_DD.rst", "features/within_person_discursive_range.rst", "features/word_mimicry.rst", "features_conceptual/TEMPLATE.rst", "features_conceptual/certainty.rst", "features_conceptual/content_word_accommodation.rst", "features_conceptual/conversational_repair.rst", "features_conceptual/dale_chall_score.rst", "features_conceptual/discursive_diversity.rst", "features_conceptual/forward_flow.rst", "features_conceptual/function_word_accommodation.rst", "features_conceptual/gini_coefficient.rst", "features_conceptual/hedge.rst", "features_conceptual/index.rst", "features_conceptual/information_diversity.rst", "features_conceptual/information_exchange.rst", "features_conceptual/liwc.rst", "features_conceptual/message_length.rst", "features_conceptual/message_quantity.rst", "features_conceptual/mimicry_bert.rst", "features_conceptual/moving_mimicry.rst", "features_conceptual/named_entity_recognition.rst", "features_conceptual/online_discussions_tags.rst", "features_conceptual/politeness_receptiveness_markers.rst", "features_conceptual/politeness_strategies.rst", "features_conceptual/positivity_bert.rst", "features_conceptual/positivity_z_score.rst", "features_conceptual/proportion_of_first_person_pronouns.rst", "features_conceptual/questions.rst", "features_conceptual/team_burstiness.rst", "features_conceptual/textblob_polarity.rst", "features_conceptual/textblob_subjectivity.rst", "features_conceptual/time_difference.rst", "features_conceptual/turn_taking_index.rst", "features_conceptual/word_ttr.rst", "index.rst", "intro.rst", "utils/assign_chunk_nums.rst", "utils/calculate_chat_level_features.rst", "utils/calculate_conversation_level_features.rst", "utils/calculate_user_level_features.rst", "utils/check_embeddings.rst", "utils/gini_coefficient.rst", "utils/index.rst", "utils/preload_word_lists.rst", "utils/preprocess.rst", "utils/summarize_features.rst", "utils/zscore_chats_and_conversation.rst"], "indexentries": {"adverb_limiter() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.adverb_limiter", false]], "assert_key_columns_present() (in module utils.preprocess)": [[71, "utils.preprocess.assert_key_columns_present", false]], "assign_chunk_nums() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.assign_chunk_nums", false]], "bare_command() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.bare_command", false]], "built_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.built_spacy_ner", false]], "burstiness() (in module features.burstiness)": [[4, "features.burstiness.burstiness", false]], "calculate_chat_level_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_chat_level_features", false]], "calculate_conversation_level_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_conversation_level_features", false]], "calculate_hedge_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_hedge_features", false]], "calculate_id_score() (in module features.information_diversity)": [[13, "features.information_diversity.calculate_ID_score", false]], "calculate_info_diversity() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_info_diversity", false]], "calculate_named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.calculate_named_entities", false]], "calculate_num_question_naive() (in module features.question_num)": [[20, "features.question_num.calculate_num_question_naive", false]], "calculate_politeness_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_sentiment", false]], "calculate_politeness_v2() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_v2", false]], "calculate_team_burstiness() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_team_burstiness", false]], "calculate_textblob_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_textblob_sentiment", false]], "calculate_user_level_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.calculate_user_level_features", false]], "calculate_vector_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_vector_word_mimicry", false]], "calculate_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_word_mimicry", false]], "chat_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.chat_level_features", false]], "chatlevelfeaturescalculator (class in utils.calculate_chat_level_features)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator", false]], "check_embeddings() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.check_embeddings", false]], "classify_ntri() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.classify_NTRI", false]], "classify_text_dalechall() (in module features.readability)": [[21, "features.readability.classify_text_dalechall", false]], "clean_text() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.clean_text", false]], "coerce_to_date_or_number() (in module features.temporal_features)": [[23, "features.temporal_features.coerce_to_date_or_number", false]], "commit_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.commit_data", false]], "compress() (in module utils.preprocess)": [[71, "utils.preprocess.compress", false]], "compute_frequency() (in module features.word_mimicry)": [[28, "features.word_mimicry.compute_frequency", false]], "computetf() (in module features.word_mimicry)": [[28, "features.word_mimicry.computeTF", false]], "concat_bert_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.concat_bert_features", false]], "conjection_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.conjection_seperator", false]], "content_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.Content_mimicry_score", false]], "conv_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.conv_level_features", false]], "conv_to_float_arr() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.conv_to_float_arr", false]], "conversationlevelfeaturescalculator (class in utils.calculate_conversation_level_features)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator", false]], "count_all_caps() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_all_caps", false]], "count_bullet_points() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_bullet_points", false]], "count_characters() (in module features.basic_features)": [[3, "features.basic_features.count_characters", false]], "count_difficult_words() (in module features.readability)": [[21, "features.readability.count_difficult_words", false]], "count_ellipses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_ellipses", false]], "count_emojis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emojis", false]], "count_emphasis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emphasis", false]], "count_line_breaks() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_line_breaks", false]], "count_links() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_links", false]], "count_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_matches", false]], "count_messages() (in module features.basic_features)": [[3, "features.basic_features.count_messages", false]], "count_numbering() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_numbering", false]], "count_parentheses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_parentheses", false]], "count_quotes() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_quotes", false]], "count_responding_to_someone() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_responding_to_someone", false]], "count_spacy_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_spacy_matches", false]], "count_syllables() (in module features.readability)": [[21, "features.readability.count_syllables", false]], "count_turn_taking_index() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turn_taking_index", false]], "count_turns() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turns", false]], "count_user_references() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_user_references", false]], "count_words() (in module features.basic_features)": [[3, "features.basic_features.count_words", false]], "create_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks", false]], "create_chunks_messages() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks_messages", false]], "create_cumulative_rows() (in module utils.preprocess)": [[71, "utils.preprocess.create_cumulative_rows", false]], "dale_chall_helper() (in module features.readability)": [[21, "features.readability.dale_chall_helper", false]], "feat_counts() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.feat_counts", false]], "feature_builder": [[2, "module-feature_builder", false]], "featurebuilder (class in feature_builder)": [[2, "feature_builder.FeatureBuilder", false]], "features.basic_features": [[3, "module-features.basic_features", false]], "features.burstiness": [[4, "module-features.burstiness", false]], "features.certainty": [[5, "module-features.certainty", false]], "features.discursive_diversity": [[6, "module-features.discursive_diversity", false]], "features.fflow": [[7, "module-features.fflow", false]], "features.get_all_dd_features": [[8, "module-features.get_all_DD_features", false]], "features.get_user_network": [[9, "module-features.get_user_network", false]], "features.hedge": [[10, "module-features.hedge", false]], "features.info_exchange_zscore": [[12, "module-features.info_exchange_zscore", false]], "features.information_diversity": [[13, "module-features.information_diversity", false]], "features.lexical_features_v2": [[14, "module-features.lexical_features_v2", false]], "features.named_entity_recognition_features": [[15, "module-features.named_entity_recognition_features", false]], "features.other_lexical_features": [[16, "module-features.other_lexical_features", false]], "features.politeness_features": [[17, "module-features.politeness_features", false]], "features.politeness_v2": [[18, "module-features.politeness_v2", false]], "features.politeness_v2_helper": [[19, "module-features.politeness_v2_helper", false]], "features.question_num": [[20, "module-features.question_num", false]], "features.readability": [[21, "module-features.readability", false]], "features.reddit_tags": [[22, "module-features.reddit_tags", false]], "features.temporal_features": [[23, "module-features.temporal_features", false]], "features.textblob_sentiment_analysis": [[24, "module-features.textblob_sentiment_analysis", false]], "features.turn_taking_features": [[25, "module-features.turn_taking_features", false]], "features.variance_in_dd": [[26, "module-features.variance_in_DD", false]], "features.within_person_discursive_range": [[27, "module-features.within_person_discursive_range", false]], "features.word_mimicry": [[28, "module-features.word_mimicry", false]], "featurize() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.featurize", false]], "function_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.function_mimicry_score", false]], "generate_bert() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_bert", false]], "generate_certainty_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_certainty_pkl", false]], "generate_lexicon_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_lexicon_pkl", false]], "generate_vect() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_vect", false]], "get_average() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_average", false]], "get_centroids() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_centroids", false]], "get_certainty() (in module features.certainty)": [[5, "features.certainty.get_certainty", false]], "get_certainty_score() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_certainty_score", false]], "get_content_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_content_words_in_message", false]], "get_conversation_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_conversation_level_aggregates", false]], "get_cosine_similarity() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_cosine_similarity", false]], "get_dale_chall_easy_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_dale_chall_easy_words", false]], "get_dale_chall_score_and_classfication() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_dale_chall_score_and_classfication", false]], "get_dd() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_DD", false]], "get_dd_features() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.get_DD_features", false]], "get_dep_pairs() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs", false]], "get_dep_pairs_noneg() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs_noneg", false]], "get_discursive_diversity_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_discursive_diversity_features", false]], "get_first_pct_of_chat() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.get_first_pct_of_chat", false]], "get_first_person_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_first_person_words", false]], "get_forward_flow() (in module features.fflow)": [[7, "features.fflow.get_forward_flow", false]], "get_forward_flow() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_forward_flow", false]], "get_function_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_function_words", false]], "get_function_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_function_words_in_message", false]], "get_gini() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.get_gini", false]], "get_gini_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_gini_features", false]], "get_info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.get_info_diversity", false]], "get_info_exchange_wordcount() (in module features.info_exchange_zscore)": [[12, "features.info_exchange_zscore.get_info_exchange_wordcount", false]], "get_liwc_rate() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.get_liwc_rate", false]], "get_max() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_max", false]], "get_mimicry_bert() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_mimicry_bert", false]], "get_min() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_min", false]], "get_moving_mimicry() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_moving_mimicry", false]], "get_named_entity() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_named_entity", false]], "get_nan_vector() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_nan_vector", false]], "get_polarity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_polarity_score", false]], "get_politeness_strategies() (in module features.politeness_features)": [[17, "features.politeness_features.get_politeness_strategies", false]], "get_politeness_v2() (in module features.politeness_v2)": [[18, "features.politeness_v2.get_politeness_v2", false]], "get_proportion_first_pronouns() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_proportion_first_pronouns", false]], "get_question_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_question_words", false]], "get_reddit_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_reddit_features", false]], "get_sentiment() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.get_sentiment", false]], "get_stdev() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_stdev", false]], "get_subjectivity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_subjectivity_score", false]], "get_sum() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_sum", false]], "get_team_burstiness() (in module features.burstiness)": [[4, "features.burstiness.get_team_burstiness", false]], "get_temporal_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_temporal_features", false]], "get_time_diff() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff", false]], "get_time_diff_startend() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff_startend", false]], "get_turn() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.get_turn", false]], "get_turn_id() (in module utils.preprocess)": [[71, "utils.preprocess.get_turn_id", false]], "get_turn_taking_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_turn_taking_features", false]], "get_unique_pairwise_combos() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_unique_pairwise_combos", false]], "get_user_average_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_average_dataframe", false]], "get_user_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_user_level_aggregates", false]], "get_user_level_averaged_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_averaged_features", false]], "get_user_level_summary_statistics_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summary_statistics_features", false]], "get_user_level_summed_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summed_features", false]], "get_user_network() (in module features.get_user_network)": [[9, "features.get_user_network.get_user_network", false]], "get_user_network() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_network", false]], "get_user_sum_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_sum_dataframe", false]], "get_variance_in_dd() (in module features.variance_in_dd)": [[26, "features.variance_in_DD.get_variance_in_DD", false]], "get_within_person_disc_range() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_within_person_disc_range", false]], "get_word_ttr() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_word_TTR", false]], "get_zscore_across_all_chats() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_chats", false]], "get_zscore_across_all_conversations() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_conversations", false]], "gini_coefficient() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.gini_coefficient", false]], "info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.info_diversity", false]], "info_exchange() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.info_exchange", false]], "is_hedged_sentence_1() (in module features.hedge)": [[10, "features.hedge.is_hedged_sentence_1", false]], "lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.lexical_features", false]], "liwc_features() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.liwc_features", false]], "load_saved_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_saved_data", false]], "load_to_dict() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_dict", false]], "load_to_lists() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_lists", false]], "merge_conv_data_with_original() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.merge_conv_data_with_original", false]], "mimic_words() (in module features.word_mimicry)": [[28, "features.word_mimicry.mimic_words", false]], "module": [[2, "module-feature_builder", false], [3, "module-features.basic_features", false], [4, "module-features.burstiness", false], [5, "module-features.certainty", false], [6, "module-features.discursive_diversity", false], [7, "module-features.fflow", false], [8, "module-features.get_all_DD_features", false], [9, "module-features.get_user_network", false], [10, "module-features.hedge", false], [12, "module-features.info_exchange_zscore", false], [13, "module-features.information_diversity", false], [14, "module-features.lexical_features_v2", false], [15, "module-features.named_entity_recognition_features", false], [16, "module-features.other_lexical_features", false], [17, "module-features.politeness_features", false], [18, "module-features.politeness_v2", false], [19, "module-features.politeness_v2_helper", false], [20, "module-features.question_num", false], [21, "module-features.readability", false], [22, "module-features.reddit_tags", false], [23, "module-features.temporal_features", false], [24, "module-features.textblob_sentiment_analysis", false], [25, "module-features.turn_taking_features", false], [26, "module-features.variance_in_DD", false], [27, "module-features.within_person_discursive_range", false], [28, "module-features.word_mimicry", false], [63, "module-utils.assign_chunk_nums", false], [64, "module-utils.calculate_chat_level_features", false], [65, "module-utils.calculate_conversation_level_features", false], [66, "module-utils.calculate_user_level_features", false], [67, "module-utils.check_embeddings", false], [68, "module-utils.gini_coefficient", false], [70, "module-utils.preload_word_lists", false], [71, "module-utils.preprocess", false], [72, "module-utils.summarize_features", false], [73, "module-utils.zscore_chats_and_conversation", false]], "named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.named_entities", false]], "num_named_entity() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.num_named_entity", false]], "other_lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.other_lexical_features", false]], "phrase_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.phrase_split", false]], "positivity_zscore() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.positivity_zscore", false]], "prep_simple() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_simple", false]], "prep_whole() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_whole", false]], "preprocess_chat_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.preprocess_chat_data", false]], "preprocess_conversation_columns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_conversation_columns", false]], "preprocess_naive_turns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_naive_turns", false]], "preprocess_text() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text", false]], "preprocess_text_lowercase_but_retain_punctuation() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text_lowercase_but_retain_punctuation", false]], "preprocessing() (in module features.information_diversity)": [[13, "features.information_diversity.preprocessing", false]], "punctuation_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.punctuation_seperator", false]], "question() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.Question", false]], "read_in_lexicons() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.read_in_lexicons", false]], "reduce_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.reduce_chunks", false]], "remove_active_user() (in module features.get_user_network)": [[9, "features.get_user_network.remove_active_user", false]], "save_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.save_features", false]], "sentence_pad() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_pad", false]], "sentence_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_split", false]], "sentenciser() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentenciser", false]], "set_self_conv_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.set_self_conv_data", false]], "text_based_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.text_based_features", false]], "token_count() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.token_count", false]], "train_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.train_spacy_ner", false]], "user_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.user_level_features", false]], "userlevelfeaturescalculator (class in utils.calculate_user_level_features)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator", false]], "utils.assign_chunk_nums": [[63, "module-utils.assign_chunk_nums", false]], "utils.calculate_chat_level_features": [[64, "module-utils.calculate_chat_level_features", false]], "utils.calculate_conversation_level_features": [[65, "module-utils.calculate_conversation_level_features", false]], "utils.calculate_user_level_features": [[66, "module-utils.calculate_user_level_features", false]], "utils.check_embeddings": [[67, "module-utils.check_embeddings", false]], "utils.gini_coefficient": [[68, "module-utils.gini_coefficient", false]], "utils.preload_word_lists": [[70, "module-utils.preload_word_lists", false]], "utils.preprocess": [[71, "module-utils.preprocess", false]], "utils.summarize_features": [[72, "module-utils.summarize_features", false]], "utils.zscore_chats_and_conversation": [[73, "module-utils.zscore_chats_and_conversation", false]], "word_start() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.word_start", false]]}, "objects": {"": [[2, 0, 0, "-", "feature_builder"]], "feature_builder": [[2, 1, 1, "", "FeatureBuilder"]], "feature_builder.FeatureBuilder": [[2, 2, 1, "", "chat_level_features"], [2, 2, 1, "", "conv_level_features"], [2, 2, 1, "", "featurize"], [2, 2, 1, "", "get_first_pct_of_chat"], [2, 2, 1, "", "merge_conv_data_with_original"], [2, 2, 1, "", "preprocess_chat_data"], [2, 2, 1, "", "save_features"], [2, 2, 1, "", "set_self_conv_data"], [2, 2, 1, "", "user_level_features"]], "features": [[3, 0, 0, "-", "basic_features"], [4, 0, 0, "-", "burstiness"], [5, 0, 0, "-", "certainty"], [6, 0, 0, "-", "discursive_diversity"], [7, 0, 0, "-", "fflow"], [8, 0, 0, "-", "get_all_DD_features"], [9, 0, 0, "-", "get_user_network"], [10, 0, 0, "-", "hedge"], [12, 0, 0, "-", "info_exchange_zscore"], [13, 0, 0, "-", "information_diversity"], [14, 0, 0, "-", "lexical_features_v2"], [15, 0, 0, "-", "named_entity_recognition_features"], [16, 0, 0, "-", "other_lexical_features"], [17, 0, 0, "-", "politeness_features"], [18, 0, 0, "-", "politeness_v2"], [19, 0, 0, "-", "politeness_v2_helper"], [20, 0, 0, "-", "question_num"], [21, 0, 0, "-", "readability"], [22, 0, 0, "-", "reddit_tags"], [23, 0, 0, "-", "temporal_features"], [24, 0, 0, "-", "textblob_sentiment_analysis"], [25, 0, 0, "-", "turn_taking_features"], [26, 0, 0, "-", "variance_in_DD"], [27, 0, 0, "-", "within_person_discursive_range"], [28, 0, 0, "-", "word_mimicry"]], "features.basic_features": [[3, 3, 1, "", "count_characters"], [3, 3, 1, "", "count_messages"], [3, 3, 1, "", "count_words"]], "features.burstiness": [[4, 3, 1, "", "burstiness"], [4, 3, 1, "", "get_team_burstiness"]], "features.certainty": [[5, 3, 1, "", "get_certainty"]], "features.discursive_diversity": [[6, 3, 1, "", "get_DD"], [6, 3, 1, "", "get_cosine_similarity"], [6, 3, 1, "", "get_unique_pairwise_combos"]], "features.fflow": [[7, 3, 1, "", "get_forward_flow"]], "features.get_all_DD_features": [[8, 3, 1, "", "conv_to_float_arr"], [8, 3, 1, "", "get_DD_features"]], "features.get_user_network": [[9, 3, 1, "", "get_user_network"], [9, 3, 1, "", "remove_active_user"]], "features.hedge": [[10, 3, 1, "", "is_hedged_sentence_1"]], "features.info_exchange_zscore": [[12, 3, 1, "", "get_info_exchange_wordcount"]], "features.information_diversity": [[13, 3, 1, "", "calculate_ID_score"], [13, 3, 1, "", "get_info_diversity"], [13, 3, 1, "", "info_diversity"], [13, 3, 1, "", "preprocessing"]], "features.lexical_features_v2": [[14, 3, 1, "", "get_liwc_rate"], [14, 3, 1, "", "liwc_features"]], "features.named_entity_recognition_features": [[15, 3, 1, "", "built_spacy_ner"], [15, 3, 1, "", "calculate_named_entities"], [15, 3, 1, "", "named_entities"], [15, 3, 1, "", "num_named_entity"], [15, 3, 1, "", "train_spacy_ner"]], "features.other_lexical_features": [[16, 3, 1, "", "classify_NTRI"], [16, 3, 1, "", "get_proportion_first_pronouns"], [16, 3, 1, "", "get_word_TTR"]], "features.politeness_features": [[17, 3, 1, "", "get_politeness_strategies"]], "features.politeness_v2": [[18, 3, 1, "", "get_politeness_v2"]], "features.politeness_v2_helper": [[19, 3, 1, "", "Question"], [19, 3, 1, "", "adverb_limiter"], [19, 3, 1, "", "bare_command"], [19, 3, 1, "", "clean_text"], [19, 3, 1, "", "commit_data"], [19, 3, 1, "", "conjection_seperator"], [19, 3, 1, "", "count_matches"], [19, 3, 1, "", "count_spacy_matches"], [19, 3, 1, "", "feat_counts"], [19, 3, 1, "", "get_dep_pairs"], [19, 3, 1, "", "get_dep_pairs_noneg"], [19, 3, 1, "", "load_saved_data"], [19, 3, 1, "", "load_to_dict"], [19, 3, 1, "", "load_to_lists"], [19, 3, 1, "", "phrase_split"], [19, 3, 1, "", "prep_simple"], [19, 3, 1, "", "prep_whole"], [19, 3, 1, "", "punctuation_seperator"], [19, 3, 1, "", "sentence_pad"], [19, 3, 1, "", "sentence_split"], [19, 3, 1, "", "sentenciser"], [19, 3, 1, "", "token_count"], [19, 3, 1, "", "word_start"]], "features.question_num": [[20, 3, 1, "", "calculate_num_question_naive"]], "features.readability": [[21, 3, 1, "", "classify_text_dalechall"], [21, 3, 1, "", "count_difficult_words"], [21, 3, 1, "", "count_syllables"], [21, 3, 1, "", "dale_chall_helper"]], "features.reddit_tags": [[22, 3, 1, "", "count_all_caps"], [22, 3, 1, "", "count_bullet_points"], [22, 3, 1, "", "count_ellipses"], [22, 3, 1, "", "count_emojis"], [22, 3, 1, "", "count_emphasis"], [22, 3, 1, "", "count_line_breaks"], [22, 3, 1, "", "count_links"], [22, 3, 1, "", "count_numbering"], [22, 3, 1, "", "count_parentheses"], [22, 3, 1, "", "count_quotes"], [22, 3, 1, "", "count_responding_to_someone"], [22, 3, 1, "", "count_user_references"]], "features.temporal_features": [[23, 3, 1, "", "coerce_to_date_or_number"], [23, 3, 1, "", "get_time_diff"], [23, 3, 1, "", "get_time_diff_startend"]], "features.textblob_sentiment_analysis": [[24, 3, 1, "", "get_polarity_score"], [24, 3, 1, "", "get_subjectivity_score"]], "features.turn_taking_features": [[25, 3, 1, "", "count_turn_taking_index"], [25, 3, 1, "", "count_turns"], [25, 3, 1, "", "get_turn"]], "features.variance_in_DD": [[26, 3, 1, "", "get_variance_in_DD"]], "features.within_person_discursive_range": [[27, 3, 1, "", "get_nan_vector"], [27, 3, 1, "", "get_within_person_disc_range"]], "features.word_mimicry": [[28, 3, 1, "", "Content_mimicry_score"], [28, 3, 1, "", "computeTF"], [28, 3, 1, "", "compute_frequency"], [28, 3, 1, "", "function_mimicry_score"], [28, 3, 1, "", "get_content_words_in_message"], [28, 3, 1, "", "get_function_words_in_message"], [28, 3, 1, "", "get_mimicry_bert"], [28, 3, 1, "", "get_moving_mimicry"], [28, 3, 1, "", "mimic_words"]], "utils": [[63, 0, 0, "-", "assign_chunk_nums"], [64, 0, 0, "-", "calculate_chat_level_features"], [65, 0, 0, "-", "calculate_conversation_level_features"], [66, 0, 0, "-", "calculate_user_level_features"], [67, 0, 0, "-", "check_embeddings"], [68, 0, 0, "-", "gini_coefficient"], [70, 0, 0, "-", "preload_word_lists"], [71, 0, 0, "-", "preprocess"], [72, 0, 0, "-", "summarize_features"], [73, 0, 0, "-", "zscore_chats_and_conversation"]], "utils.assign_chunk_nums": [[63, 3, 1, "", "assign_chunk_nums"], [63, 3, 1, "", "create_chunks"], [63, 3, 1, "", "create_chunks_messages"], [63, 3, 1, "", "reduce_chunks"]], "utils.calculate_chat_level_features": [[64, 1, 1, "", "ChatLevelFeaturesCalculator"]], "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator": [[64, 2, 1, "", "calculate_chat_level_features"], [64, 2, 1, "", "calculate_hedge_features"], [64, 2, 1, "", "calculate_politeness_sentiment"], [64, 2, 1, "", "calculate_politeness_v2"], [64, 2, 1, "", "calculate_textblob_sentiment"], [64, 2, 1, "", "calculate_vector_word_mimicry"], [64, 2, 1, "", "calculate_word_mimicry"], [64, 2, 1, "", "concat_bert_features"], [64, 2, 1, "", "get_certainty_score"], [64, 2, 1, "", "get_dale_chall_score_and_classfication"], [64, 2, 1, "", "get_forward_flow"], [64, 2, 1, "", "get_named_entity"], [64, 2, 1, "", "get_reddit_features"], [64, 2, 1, "", "get_temporal_features"], [64, 2, 1, "", "info_exchange"], [64, 2, 1, "", "lexical_features"], [64, 2, 1, "", "other_lexical_features"], [64, 2, 1, "", "positivity_zscore"], [64, 2, 1, "", "text_based_features"]], "utils.calculate_conversation_level_features": [[65, 1, 1, "", "ConversationLevelFeaturesCalculator"]], "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator": [[65, 2, 1, "", "calculate_conversation_level_features"], [65, 2, 1, "", "calculate_info_diversity"], [65, 2, 1, "", "calculate_team_burstiness"], [65, 2, 1, "", "get_conversation_level_aggregates"], [65, 2, 1, "", "get_discursive_diversity_features"], [65, 2, 1, "", "get_gini_features"], [65, 2, 1, "", "get_turn_taking_features"], [65, 2, 1, "", "get_user_level_aggregates"]], "utils.calculate_user_level_features": [[66, 1, 1, "", "UserLevelFeaturesCalculator"]], "utils.calculate_user_level_features.UserLevelFeaturesCalculator": [[66, 2, 1, "", "calculate_user_level_features"], [66, 2, 1, "", "get_centroids"], [66, 2, 1, "", "get_user_level_averaged_features"], [66, 2, 1, "", "get_user_level_summary_statistics_features"], [66, 2, 1, "", "get_user_level_summed_features"], [66, 2, 1, "", "get_user_network"]], "utils.check_embeddings": [[67, 3, 1, "", "check_embeddings"], [67, 3, 1, "", "generate_bert"], [67, 3, 1, "", "generate_certainty_pkl"], [67, 3, 1, "", "generate_lexicon_pkl"], [67, 3, 1, "", "generate_vect"], [67, 3, 1, "", "get_sentiment"], [67, 3, 1, "", "read_in_lexicons"]], "utils.gini_coefficient": [[68, 3, 1, "", "get_gini"], [68, 3, 1, "", "gini_coefficient"]], "utils.preload_word_lists": [[70, 3, 1, "", "get_dale_chall_easy_words"], [70, 3, 1, "", "get_first_person_words"], [70, 3, 1, "", "get_function_words"], [70, 3, 1, "", "get_question_words"]], "utils.preprocess": [[71, 3, 1, "", "assert_key_columns_present"], [71, 3, 1, "", "compress"], [71, 3, 1, "", "create_cumulative_rows"], [71, 3, 1, "", "get_turn_id"], [71, 3, 1, "", "preprocess_conversation_columns"], [71, 3, 1, "", "preprocess_naive_turns"], [71, 3, 1, "", "preprocess_text"], [71, 3, 1, "", "preprocess_text_lowercase_but_retain_punctuation"]], "utils.summarize_features": [[72, 3, 1, "", "get_average"], [72, 3, 1, "", "get_max"], [72, 3, 1, "", "get_min"], [72, 3, 1, "", "get_stdev"], [72, 3, 1, "", "get_sum"], [72, 3, 1, "", "get_user_average_dataframe"], [72, 3, 1, "", "get_user_sum_dataframe"]], "utils.zscore_chats_and_conversation": [[73, 3, 1, "", "get_zscore_across_all_chats"], [73, 3, 1, "", "get_zscore_across_all_conversations"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function"}, "terms": {"": [0, 1, 2, 4, 5, 9, 11, 13, 25, 28, 29, 31, 32, 34, 35, 36, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 64, 65, 66], "0": [0, 1, 2, 5, 10, 13, 16, 21, 24, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 45, 46, 47, 50, 51, 53, 55, 59, 61], "000": 42, "00222437221134802": [5, 64], "01": 51, "02": 51, "04": 40, "0496": [21, 33], "05": [13, 40, 50, 51], "06": 51, "08": 50, "09": [45, 46, 50], "1": [0, 1, 2, 3, 10, 13, 22, 24, 32, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 51, 53, 55, 56, 57, 59, 62], "10": [1, 5, 6, 21, 24, 33, 42, 59, 61, 64], "100": [1, 14, 21, 33, 37, 42, 47, 62], "1000": 42, "10th": 33, "1145": [21, 24], "1177": [5, 64], "11th": 33, "12": [35, 45, 46, 50], "1287": 6, "12th": 33, "13": 50, "14": 50, "15": [37, 50], "1579": [21, 33], "17": 50, "1948": 33, "195": 36, "1977": 62, "1lpngokujsx": 5, "1st": 50, "1st_person": 50, "1st_person_pl": 50, "1st_person_start": 50, "2": [1, 2, 34, 35, 41, 47, 59, 61, 62], "20": [37, 59], "2004": 42, "2007": [5, 42], "2009": 60, "2012": 55, "2013": [12, 16, 31, 32, 36, 37, 38, 41, 43, 50, 52, 54, 70], "2015": [53, 58, 60], "2016": 4, "2017": 13, "2018": [40, 44, 55], "2019": [35, 52], "2020": [18, 21, 24, 33, 49, 50, 56, 57], "2021": [1, 6, 43, 44], "2022": [13, 34], "2023": [5, 14, 30, 42, 59, 64], "2024": 40, "21": 59, "22": [41, 50], "2384068": 4, "24": [1, 61], "25": 47, "27": 50, "28": 50, "29": 50, "2nd": 50, "2nd_person": 50, "2nd_person_start": 50, "3": [0, 1, 2, 21, 34, 41, 51, 59, 61, 71], "30": 50, "3000": 33, "32": [34, 50], "3432929": [21, 24], "35": 51, "36": 50, "38": 50, "39": 49, "39512260": 68, "3n": 59, "4": [0, 5, 13, 21, 30, 33, 41, 56, 62], "42": 14, "4274": 6, "43": 50, "45": 50, "47": 50, "49": 50, "4pit4bqz6": 5, "4th": [21, 33], "5": [1, 5, 21, 30, 33, 37, 41, 59], "50": [1, 47], "52": 50, "53": 50, "57": 50, "58": 50, "5th": 33, "6": [1, 33, 43], "60": 51, "63": 50, "6365": 21, "64": 67, "68": 47, "6th": 33, "7": [30, 33, 48], "70": 50, "78": [35, 50], "7th": 33, "8": [1, 30, 33], "80": [21, 70], "82": 41, "85": 34, "86": 35, "87": 50, "89": [45, 46], "8th": 33, "9": [2, 5, 21, 30, 33, 40, 47, 50], "9123": 47, "92": 51, "93chall_readability_formula": [21, 70], "94": 15, "95": 47, "97": 51, "9855072464": 47, "9992": 47, "99954": 47, "9th": 33, "A": [1, 2, 4, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 28, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 47, 49, 50, 51, 52, 57, 59, 60, 61, 62, 66, 67, 68, 70, 71, 72, 73], "And": [1, 62], "As": [1, 31, 35, 36, 40, 45, 61], "But": [1, 50, 62], "By": [1, 42, 50], "For": [0, 1, 31, 34, 37, 41, 42, 43, 47, 49, 54, 56, 59, 62, 65], "If": [0, 1, 2, 5, 21, 29, 30, 35, 45, 47, 50, 55, 61, 62, 63, 64, 67, 71], "In": [1, 21, 30, 31, 34, 35, 36, 37, 39, 41, 42, 45, 46, 47, 50, 55, 59, 61, 62], "It": [1, 2, 31, 32, 33, 36, 37, 41, 44, 45, 46, 50, 64, 65, 66, 67, 71], "NO": 37, "NOT": [1, 61], "No": [19, 53], "Not": 41, "One": [1, 37, 61], "That": [29, 55], "The": [1, 2, 3, 4, 5, 7, 9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "Then": [1, 55, 61], "There": [1, 11, 32, 61, 66], "These": [1, 11, 17, 32, 34, 42, 48, 52, 62, 69], "To": [0, 1, 29, 31, 34, 37, 40, 55, 56, 57, 61, 62], "WITH": 21, "Will": 50, "_deviat": 55, "_preprocessed_": 0, "abil": [13, 29], "abl": [31, 36, 61], "abort": 1, "about": [1, 12, 29, 31, 36, 41, 47, 61, 62], "abov": [1, 21, 34, 61], "abstract_id": 4, "accept": [0, 1, 58, 61], "access": [0, 1, 15], "accommod": [28, 32, 39, 45, 46, 64, 65, 66], "accord": [21, 37, 59, 64, 70], "accordingli": 63, "account": [1, 29, 32, 42], "accus": 50, "achiev": [50, 62], "acknowledg": 49, "acm": [21, 24], "acommod": 36, "across": [1, 13, 28, 31, 34, 40, 41, 42, 50, 62, 64, 73], "action": 59, "activ": [1, 9, 44, 55, 71], "actual": [41, 56], "ad": [61, 62, 71], "adapt": 59, "add": [0, 1, 2, 21, 51, 61], "addit": [0, 2, 32, 34, 42, 63, 69], "addition": [0, 30, 31, 32, 54], "address": 1, "adjac": 71, "adjust": [0, 21, 37, 63], "advanc": [31, 36], "advantag": 4, "adverb": [19, 31, 36], "adverb_limit": [19, 49], "affect": [0, 1, 29, 35, 44], "affirm": 49, "after": [0, 1, 31, 34, 36, 43, 61, 62, 64], "again": [32, 34], "against": [28, 31, 36, 52], "agarw": 62, "aggreg": [0, 3, 11, 37, 44, 62, 65, 66, 72], "agre": 47, "agreement": 49, "ah": [31, 36], "ai": 62, "aim": [39, 62], "airtim": [37, 62], "al": [1, 5, 14, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "algorithm": [56, 57], "align": [35, 51], "all": [0, 1, 2, 6, 12, 13, 15, 19, 22, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 46, 48, 49, 51, 52, 55, 58, 61, 62, 64, 66, 71, 73], "allow": 1, "almaatouq": 59, "along": 1, "alongsid": 1, "alphabet": 49, "alphanumer": 71, "alreadi": [0, 1, 2, 4, 10, 12, 16, 67], "also": [0, 1, 2, 28, 30, 31, 32, 34, 36, 37, 38, 42, 47, 51, 54, 60, 61, 62, 64, 65, 67, 69, 71], "alsobai": 59, "altern": 59, "although": [1, 23, 31, 36], "alwai": [1, 55], "am": [31, 36, 42, 54, 62], "amaz": [48, 56], "ambient": 32, "american": 33, "ami": [47, 59, 62], "amic": 62, "among": [36, 37, 52, 55, 62], "amongst": [6, 35, 48], "an": [1, 2, 5, 8, 11, 12, 13, 21, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 45, 47, 48, 50, 51, 52, 54, 59, 60, 61, 62, 63, 65, 66, 68], "analys": [1, 62], "analysi": [1, 11, 52, 62, 67, 71], "analyt": 62, "analyz": [0, 1, 2, 13, 14, 16, 17, 19, 20, 21, 22, 24, 28, 43, 52, 62, 67, 71], "analyze_first_pct": [0, 1, 2], "angri": 47, "ani": [0, 1, 29, 31, 33, 38, 54, 62, 71], "annot": [17, 50], "anoth": [30, 34, 36, 48], "answer": 29, "anybodi": [31, 36], "anyth": [1, 2, 23, 31, 36, 56], "anywher": [31, 36], "apartment": 42, "api": 47, "api_refer": 24, "apolog": [17, 50], "apologi": 49, "appear": [0, 15, 37, 38, 42, 64], "append": [1, 17, 64, 65, 66, 67], "appli": [4, 13, 14, 18, 62, 64, 69], "applic": [29, 71], "appreci": 50, "approach": [32, 38, 42, 45, 46, 49, 53, 64], "appropri": 69, "ar": [0, 1, 2, 3, 5, 9, 10, 11, 15, 17, 19, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 51, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 71], "arcross": 34, "area": 62, "aren": [31, 36], "around": 2, "arous": 48, "arrai": [6, 8, 68], "articl": [37, 50], "ask": [20, 47, 54], "ask_ag": 49, "aspect": [50, 62], "assert_key_columns_pres": 71, "assign": [31, 36, 38, 45, 46, 52, 59, 63, 71], "assign_chunk_num": 69, "associ": [4, 15, 21, 29, 30, 31, 32, 36, 40, 45, 46, 47, 48, 61], "assum": [0, 1, 2, 10, 12, 16, 23, 41, 60, 71], "assumign": 1, "assumpt": [1, 41, 61], "asterisk": 22, "attribut": [1, 11, 34, 51, 52, 56, 62], "author": [5, 31, 36, 59], "automat": [1, 61, 69], "auxiliari": [31, 36], "avail": [62, 63, 64, 67], "averag": [11, 13, 28, 30, 33, 34, 35, 40, 41, 46, 52, 64, 65, 66, 72], "avil": 62, "avoid": 30, "awar": 29, "awesom": 62, "b": [4, 34, 35, 45, 46, 55, 62], "back": 62, "bag": [32, 38, 42, 45, 46, 49, 53, 56, 57], "bare_command": [19, 49], "base": [1, 2, 15, 18, 19, 31, 32, 34, 35, 36, 37, 40, 42, 51, 52, 53, 54, 55, 56, 57, 62, 63, 64, 65, 66, 71], "basic": [10, 11, 12, 16, 61, 62], "basic_featur": 11, "batch": 67, "batch_num": 1, "batch_siz": 67, "bay": [56, 57], "bbevi": 18, "becaus": [1, 2, 12, 21, 31, 36, 40, 56, 61], "becom": [44, 61, 62], "been": [1, 2, 12, 16, 31, 36, 61], "befor": [0, 1, 2, 17, 31, 36, 45, 48], "beforehand": 64, "begin": [34, 54, 58, 61, 62, 63], "behavior": [0, 62, 63], "being": [4, 13, 14, 16, 17, 20, 21, 24, 31, 32, 36, 43, 47, 51, 55, 56, 60], "belong": [1, 42], "below": [1, 11, 21, 33, 36, 45, 48, 51, 61, 62, 69], "ber": 54, "bert": [0, 1, 31, 35, 36, 39, 46, 61, 64, 67], "bert_path": 67, "bert_sentiment_data": 64, "best": [14, 29], "better": 61, "between": [4, 6, 13, 21, 23, 24, 28, 30, 31, 34, 35, 36, 37, 40, 45, 46, 55, 58, 59, 62, 64, 65], "betwen": 34, "beyond": 2, "big": 59, "binari": [10, 32, 38], "blame": 47, "blob": 24, "block": [22, 32, 48, 59], "blog": 15, "bold": [22, 64], "bool": [2, 63, 67, 71], "bootstrap": 62, "both": [1, 2, 42, 52, 54, 55, 59, 62], "bother": 50, "bottom": 59, "bought": 41, "bound": [29, 35, 36, 37, 42, 52, 55], "boundari": [34, 35], "break": [22, 48, 64], "brief": 44, "broader": 52, "broken": 59, "btw": 50, "bug": [1, 61], "build": [1, 7, 34, 45, 46, 62], "built": 11, "built_spacy_n": 15, "bullet": [22, 48, 64], "bunch": 59, "burst": 58, "bursti": [11, 39, 58, 65], "by_the_wai": 49, "c": [12, 34, 35, 45, 46, 62], "cach": [0, 1, 2, 51, 61], "calcul": [2, 5, 11, 12, 16, 18, 21, 28, 33, 41, 48, 49, 50, 56, 57, 58, 60, 62, 63, 64, 65, 66, 67, 68, 72, 73], "calculate_chat_level_featur": 69, "calculate_conversation_level_featur": 69, "calculate_hedge_featur": 64, "calculate_id_scor": 13, "calculate_info_divers": 65, "calculate_named_ent": 15, "calculate_num_question_na": 20, "calculate_politeness_senti": 64, "calculate_politeness_v2": 64, "calculate_team_bursti": 65, "calculate_textblob_senti": 64, "calculate_user_level_featur": 69, "calculate_vector_word_mimicri": 64, "calculate_word_mimicri": 64, "call": [1, 2, 8, 13, 61, 62, 64, 69], "can": [0, 1, 11, 23, 31, 32, 33, 34, 36, 37, 42, 43, 44, 47, 48, 49, 50, 52, 54, 60, 61, 62, 69], "can_you": 49, "cannot": [1, 31, 36, 45, 46, 49, 62], "cao": [21, 24, 33, 43, 44, 56, 57, 62], "cap": [22, 48, 64], "capit": [0, 48], "captur": [29, 30, 32, 34, 35, 38, 41, 42, 55], "caract": 40, "carefulli": 60, "casa_token": 5, "case": [1, 13, 16, 29, 30, 31, 36, 37, 41, 45, 46, 51, 55, 56, 59, 61], "casual": 43, "categori": [21, 32, 45, 46, 49, 52], "caus": [31, 32, 36, 59], "caveat": 1, "center": 62, "central": 34, "centroid": [34, 66], "certain": [5, 19, 30, 42, 45, 46, 49], "certainli": 42, "certainti": [11, 38, 39, 42, 64, 67], "cfm": 4, "chall": [1, 21, 39, 64, 70], "chang": [1, 34, 50, 61, 71], "charact": [2, 3, 15, 19, 37, 49, 62, 64, 65, 66, 71], "characterist": 62, "chat": [0, 1, 2, 4, 5, 6, 7, 8, 12, 13, 14, 16, 23, 25, 28, 29, 32, 35, 36, 41, 44, 45, 46, 49, 59, 61, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "chat_data": [2, 6, 7, 8, 26, 27, 28, 63, 64, 65, 66, 67, 71], "chat_df": 14, "chat_level_data": 72, "chat_level_featur": 2, "chatlevelfeaturescalcul": [2, 17, 21, 64, 69], "chats_data": 73, "check": [19, 23, 44, 64, 67, 71], "check_embed": 69, "chen": 62, "choos": 60, "chose": 1, "chunk": [34, 59, 63], "chunk_num": 63, "circlelyt": 13, "citat": [21, 24], "cite": 50, "clarif": [16, 32, 64], "class": [1, 2, 31, 61, 62, 64, 65, 66], "classif": [21, 64], "classifi": [16, 21, 50, 56, 57], "classify_ntri": 16, "classify_text_dalechal": 21, "clean": [2, 17, 19, 67], "clean_text": 19, "clear": 1, "close": [31, 48, 62], "closer": [45, 46, 59], "clue": 62, "cmu": 12, "code": [6, 18, 29, 32, 51, 55, 61, 62, 68], "coeffici": [4, 39, 62, 65, 68], "coerce_to_date_or_numb": 23, "cognit": 62, "col": 2, "colab": [0, 1], "collabor": [59, 62], "collaps": 2, "collect": [2, 34, 49, 50, 52, 62], "colleg": 33, "column": [0, 2, 4, 6, 7, 8, 9, 12, 13, 14, 16, 18, 23, 25, 28, 51, 56, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "column_count_frequ": 28, "column_count_mim": 28, "column_mimc": 28, "column_nam": 71, "column_to_summar": 72, "com": [1, 2, 4, 5, 13, 15, 18, 64, 68, 71], "comb": 62, "combin": [0, 1, 6, 28, 61, 64, 71], "come": [12, 13, 21, 32, 33, 58, 61], "comm": [1, 61], "command": [1, 61], "comment": 48, "commit": 23, "commit_data": 19, "common": [32, 62, 64], "commonli": 37, "commun": [0, 1, 11, 44, 48, 55, 60, 62, 64], "companion": 1, "compar": [2, 31, 35, 42, 44, 45, 52, 64, 71, 73], "compat": [1, 61], "complement": [31, 36], "complet": [1, 2, 55], "complex": [35, 43, 50, 62], "compon": 50, "comprehens": [33, 48], "compress": 71, "comput": [0, 2, 4, 5, 6, 10, 11, 12, 13, 14, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 45, 46, 49, 52, 55, 62, 64, 65, 66, 69, 73], "compute_frequ": 28, "compute_vectors_from_preprocess": 0, "computetf": 28, "conain": 61, "concat_bert_featur": 64, "concaten": [19, 49, 64, 71], "concentr": 55, "concept": [29, 39, 42, 62], "conceptu": [61, 62], "concis": 43, "concret": 29, "conduct": 1, "confid": [2, 5, 15, 30, 47, 64], "conflict": 62, "confound": 44, "congruent": 34, "conjection_seper": 19, "conjunct": [19, 31, 36, 49], "conjunction_start": 49, "connect": 39, "conscious": 35, "consecut": 22, "consequ": 0, "consid": [1, 33, 37], "consider": [61, 62], "consist": [36, 40, 41], "constitut": 41, "constrain": [34, 35], "construct": [11, 55, 62], "constructor": 47, "consult": 5, "contain": [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 29, 30, 35, 38, 42, 47, 49, 55, 61, 62, 63, 64, 67, 71, 72, 73], "content": [0, 1, 12, 13, 28, 34, 36, 39, 41, 42, 45, 46, 62, 64, 67], "content_mimicry_scor": 28, "content_word_mimicri": 28, "context": [2, 32, 42, 48, 62, 71], "continu": [56, 57], "contract": 49, "contrast": 39, "contribut": [13, 34, 37, 62], "control": 1, "conv": 1, "conv_data": [2, 65], "conv_level_featur": 2, "conv_to_float_arr": 8, "convei": [6, 34, 52], "convers": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 23, 25, 28, 29, 31, 34, 35, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 58, 59, 61, 63, 64, 65, 66, 68, 71, 72, 73], "conversation_id": [2, 28, 61, 71], "conversation_id_col": [0, 1, 2, 4, 6, 7, 8, 9, 13, 23, 25, 26, 27, 61, 63, 64, 65, 66, 68, 72, 73], "conversation_num": [0, 1, 2, 6, 7, 66, 71, 73], "conversationlevelfeaturescalcul": [2, 65, 69], "convert": [8, 41, 49, 71], "convict": 5, "convokit": [17, 50, 62, 64], "coordin": 55, "copi": [0, 1], "copular": [31, 36], "core": [34, 69], "cornel": 17, "corpu": 50, "corrado": 37, "correl": [41, 55], "correspond": [30, 34, 35, 40, 49, 55, 66], "cosin": [6, 7, 13, 28, 31, 34, 35, 36, 40, 45, 46, 65], "could": [1, 31, 33, 36, 50, 54], "could_you": 49, "couldn": [31, 36], "count": [1, 3, 12, 14, 15, 16, 19, 21, 25, 28, 30, 31, 32, 36, 39, 41, 43, 44, 49, 52, 53, 54, 56, 58, 64, 65, 66], "count_all_cap": 22, "count_bullet_point": 22, "count_charact": 3, "count_difficult_word": 21, "count_ellips": 22, "count_emoji": 22, "count_emphasi": 22, "count_line_break": 22, "count_link": 22, "count_match": [19, 49], "count_messag": 3, "count_numb": 22, "count_parenthes": 22, "count_quot": 22, "count_responding_to_someon": 22, "count_spacy_match": 19, "count_syl": 21, "count_turn": 25, "count_turn_taking_index": 25, "count_user_refer": 22, "count_word": 3, "countabl": 65, "countd": 36, "counterfactu": 50, "cours": [16, 31, 34, 36, 63], "creat": [0, 1, 2, 13, 19, 31, 40, 42, 61, 62, 64, 65, 66, 71], "create_chunk": 63, "create_chunks_messag": 63, "create_cumulative_row": 71, "credit": 33, "crowd": 13, "csv": [0, 1, 2, 61, 62, 67], "cumul": [1, 2, 71], "cumulative_group": [0, 1, 2, 71], "current": [1, 11, 23, 31, 34, 35, 36, 40, 45, 46, 58, 61, 64, 71], "curt": 43, "custom": [0, 62], "custom_featur": [0, 1, 2, 61], "customiz": 62, "cut": 1, "cutoff": [2, 15, 47, 64], "d": [1, 31, 34, 36], "dale": [1, 21, 39, 64, 70], "dale_chall_help": 21, "danescu": 50, "dash": 22, "data": [0, 2, 6, 7, 8, 9, 13, 19, 20, 32, 37, 40, 41, 47, 51, 55, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "datafram": [0, 1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 37, 47, 49, 59, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "dataknowsal": 15, "dataset": [1, 2, 9, 12, 13, 28, 31, 41, 47, 52, 61, 64, 65, 66, 73], "date": [1, 61], "datetim": [23, 58], "dcosta": 62, "deal": [50, 59], "death": 1, "debat": 59, "decid": 62, "decis": [1, 13, 62], "declar": [1, 61, 62, 69], "deepli": 62, "default": [0, 1, 2, 5, 13, 16, 30, 34, 35, 42, 47, 62, 63, 66, 67, 71, 73], "defer": [17, 50], "defin": [0, 11, 21, 31, 34, 36, 40, 59, 62, 64, 65, 66, 70], "definit": [1, 3, 44], "degre": [6, 30, 36, 45, 46, 55], "delet": 29, "deliber": 1, "demo": 61, "democrat": 1, "demystifi": 62, "denomin": 59, "densiti": 60, "dep_": 49, "dep_pair": 19, "depend": [0, 1, 10, 19, 32, 49, 52, 61, 63], "deriv": [2, 11, 65, 66], "describ": [11, 62], "design": [0, 1, 2, 13, 34, 62], "desir": [2, 63, 72], "detail": [0, 1, 33, 41, 43, 61, 62], "detect": [1, 32, 37, 38, 47, 48, 49, 54], "determin": [13, 18, 31, 35, 36, 40, 45, 46, 71], "dev": 24, "develop": [5, 37, 40, 62], "deviat": [4, 5, 29, 40, 41, 55, 58, 65, 72, 73], "df": [4, 8, 9, 12, 13, 16, 18, 23, 28, 63, 71], "dict": [17, 19, 28, 67], "dictionari": [15, 17, 19, 28, 30, 42, 49, 67], "did": [1, 31, 36, 37, 47, 50, 54, 62], "didn": [31, 36], "differ": [1, 2, 4, 11, 12, 23, 29, 31, 34, 36, 37, 39, 40, 44, 45, 46, 47, 49, 55, 62, 63, 64, 65, 66, 71], "differenti": [49, 59], "difficult": [21, 33], "difficult_word": 21, "difficulti": 33, "dimens": [40, 62], "dimension": [34, 35], "dinner": 41, "direct": [34, 43, 45, 47, 50, 69], "direct_quest": [32, 50, 54], "direct_start": 50, "directli": [1, 62, 69], "directori": [0, 1, 2, 19, 61, 65, 67], "disagr": 49, "disagre": 51, "discours": [31, 36], "discret": [31, 36, 45, 46], "discurs": [0, 1, 6, 8, 39, 40, 61, 65, 66], "discursive_divers": 11, "discus": 8, "discuss": [0, 1, 31, 34, 39, 40, 42, 43, 61, 62, 71], "dispers": 68, "displai": [34, 42, 46], "dispos": 1, "distanc": [34, 35, 40], "distinct": [36, 59], "distinguish": 59, "div": 16, "diverg": [6, 34, 35], "divers": [0, 1, 6, 8, 13, 39, 61, 65], "divid": [16, 34, 59, 63], "dl": [21, 24], "do": [0, 1, 29, 31, 34, 36, 37, 43, 49, 50, 54, 62, 69], "doc": 19, "doc_top": 13, "document": [1, 17, 61, 69], "doe": [1, 2, 29, 40, 42, 43, 45, 47, 54, 61, 71], "doesn": [0, 1, 2, 29, 31, 36, 45, 61], "doi": [5, 6, 21, 24, 64], "domain": 50, "don": [31, 36, 49, 54, 62, 67], "done": [2, 50], "dot": 22, "doubl": 30, "down": [31, 36], "download": [1, 61], "download_resourc": [1, 61], "downstream": [17, 62], "dozen": 62, "drive": [62, 69], "driver": [2, 61, 64, 65, 66], "drop": [0, 2, 64], "due": [34, 59], "duncan": 62, "duplic": [1, 2, 71], "durat": [58, 63], "dure": [2, 55, 59, 62], "dynam": [59, 61], "e": [0, 1, 2, 4, 15, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 47, 48, 49, 52, 54, 56, 59, 63, 65, 66, 71], "e2": [21, 70], "each": [0, 1, 2, 3, 4, 7, 8, 9, 11, 12, 15, 17, 19, 23, 25, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "earlier": [0, 1, 2], "easi": [1, 21, 62, 70], "easier": [21, 42], "easili": 33, "easy_word": 21, "eat": 34, "echo": 31, "econom": 37, "edg": [29, 59], "edu": [1, 12, 16, 17, 70], "effect": [1, 41], "effici": 1, "effort": 55, "either": [20, 23, 52, 55], "elaps": [23, 58], "element": [1, 6], "ellips": [22, 48, 64], "els": [1, 22, 47, 64], "embed": [8, 31, 34, 35, 36, 45, 46, 65, 66, 67, 69], "emili": [30, 35, 45, 46, 47, 59, 62], "emoji": [22, 48, 64], "emoticon": 48, "emphas": [22, 48, 64], "emphasi": 48, "empirica": [1, 2, 71], "emploi": 45, "empti": [0, 2, 13], "en": [21, 24, 70], "en_core_web_sm": [1, 61], "enabl": 71, "enclos": 22, "encod": [1, 8], "encompass": 62, "encount": [1, 34, 35, 61], "encourag": 64, "end": [0, 1, 15, 20, 23, 34, 54, 62, 63], "engag": 43, "engin": 2, "english": [34, 42], "enjoi": 62, "ensur": [0, 1, 40, 49, 61, 63, 67, 71], "entir": [1, 12, 36, 40, 41, 52, 59, 62, 73], "entiti": [0, 1, 2, 15, 39, 64], "entityrecogn": 47, "entri": 28, "ep8dauru1ogvjurwdbof5h6ayfbslvughjyiv31d_as6ppbt": 5, "equal": [1, 21, 23, 34, 37, 40, 55, 59, 61, 62, 63], "equival": [0, 1, 41, 55, 61], "eric": 62, "error": [1, 16, 61], "especi": [41, 62], "essenti": [51, 71], "estim": 31, "et": [1, 5, 14, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "etc": [10, 15, 16, 17, 42], "evalu": [5, 47, 50], "evan": 62, "even": [0, 1, 2, 34, 37, 42, 62, 63, 67], "evenli": [34, 55], "event": [1, 34, 55, 61], "ever": 62, "everi": [1, 4, 13, 31, 34, 35, 36, 62], "everybodi": [31, 36], "everyon": [31, 36, 47, 62], "everyth": [31, 36, 56], "everywher": [31, 36], "evolut": 35, "evolv": [35, 71], "exactli": [1, 2, 71], "examin": [40, 62, 63], "exampl": [0, 10, 11, 15, 21, 24, 29, 31, 32, 34, 37, 42, 43, 48, 50, 51, 54, 56, 59, 60, 61, 62], "example_data": 1, "exce": 15, "exchang": [12, 35, 39, 40, 45, 55, 64], "exclud": [0, 41, 42], "exclus": [41, 42], "excus": 32, "exhibit": 35, "exist": [0, 1, 2, 55, 61, 62, 63, 64, 67], "expand": 49, "expect": [1, 37, 47], "expected_valu": 47, "explain": 29, "explan": [29, 43], "explor": [61, 62], "express": [5, 14, 30, 31, 32, 36, 38, 42, 64], "extend": 1, "extens": [43, 44], "extent": [1, 4, 7, 12, 31, 34, 35, 37, 51, 55, 59], "extern": 48, "extra": 51, "extract": [1, 17, 19, 28, 40, 50, 64], "extrem": [55, 56, 57], "face": 51, "facilit": [62, 71], "fact": [4, 35, 50, 54, 59], "factual": [17, 24, 50], "fail": [1, 61], "fals": [0, 1, 2, 31, 54, 61, 71], "famili": 42, "far": [34, 35, 46, 50, 62], "faster": 14, "feat_count": 19, "featuer": 2, "featur": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 61, 63, 64, 65, 66, 67], "feature_build": [0, 1, 61], "feature_method": [64, 65], "featurebuild": [0, 2, 47, 61, 69], "few": [48, 62], "fewer": [12, 60], "fflow": 11, "field": [13, 17], "file": [0, 1, 2, 12, 14, 19, 65, 67], "filenam": [0, 1, 19], "filenotfounderror": 67, "fill": 71, "filler": [37, 60], "filler_paus": 49, "filter": [19, 62], "final": [1, 2, 34, 42, 62], "find": [1, 19, 28, 50], "fingertip": 62, "finit": 55, "first": [0, 1, 2, 11, 12, 16, 19, 31, 34, 35, 36, 39, 40, 41, 42, 45, 46, 49, 52, 54, 59, 62, 64, 70, 71], "first_person": 12, "first_person_plur": 49, "first_person_raw": [12, 16], "first_person_singl": 49, "five": 37, "fix": 52, "flag": 71, "float": [2, 4, 5, 6, 8, 10, 13, 14, 16, 21, 24, 25, 28, 68], "floor": 59, "flow": [0, 1, 7, 31, 36, 39, 41, 45, 46, 61, 64], "focal": [31, 36], "focu": 41, "folder": [0, 1, 19], "follow": [1, 2, 14, 16, 17, 29, 31, 32, 33, 41, 42, 47, 49, 50, 53, 55, 59, 60, 61, 64, 65], "for_m": 49, "for_you": 49, "forc": [0, 1, 61], "form": 1, "formal_titl": 49, "format": [1, 8, 17, 22, 47, 48, 61, 62, 64], "former": [45, 46], "formula": [14, 33, 59, 64, 70], "fornt": 1, "forward": [0, 1, 7, 39, 41, 61, 64], "forward_flow": 35, "found": [1, 5, 28, 30, 33, 61, 69], "four": [1, 8], "fourth": 33, "frac": 55, "fraction": 59, "framework": [49, 50, 62], "frequenc": [28, 31, 44, 64], "frequency_dict": 28, "fridai": 34, "from": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 19, 21, 28, 29, 31, 32, 33, 34, 35, 36, 39, 41, 42, 49, 50, 51, 53, 55, 56, 57, 58, 61, 62, 64, 65, 66, 67, 71], "full": [1, 2, 37], "full_empirical_dataset": 1, "fulli": [32, 48], "functinon": 12, "function": [1, 2, 3, 4, 10, 11, 12, 13, 14, 16, 20, 21, 23, 28, 31, 39, 44, 45, 46, 50, 56, 57, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73], "function_mimic_word": 28, "function_mimicry_scor": 28, "function_word_mimicri": 28, "function_word_refer": 28, "fund": 62, "further": [1, 2, 61, 71], "futur": [23, 66], "g": [0, 1, 4, 15, 20, 29, 31, 32, 36, 37, 38, 41, 42, 47, 48, 52, 54, 59, 63, 65, 66, 71], "game": [1, 2, 59, 71], "gaug": [33, 52], "gener": [0, 1, 2, 9, 11, 12, 16, 21, 31, 34, 35, 36, 40, 42, 45, 46, 49, 51, 59, 61, 67, 69, 71, 72], "generaliz": 23, "generate_bert": 67, "generate_certainty_pkl": 67, "generate_lexicon_pkl": 67, "generate_vect": 67, "gensim": 40, "get": [0, 16, 20, 21, 28, 30, 31, 36, 49, 66], "get_all_dd_featur": 11, "get_averag": 72, "get_centroid": 66, "get_certainti": 5, "get_certainty_scor": 64, "get_content_words_in_messag": 28, "get_conversation_level_aggreg": 65, "get_cosine_similar": 6, "get_dale_chall_easy_word": [21, 70], "get_dale_chall_score_and_classf": 64, "get_dd": 6, "get_dd_featur": 8, "get_dep_pair": [19, 49], "get_dep_pairs_noneg": [19, 49], "get_discursive_diversity_featur": 65, "get_first_pct_of_chat": 2, "get_first_person_word": [12, 70], "get_forward_flow": [7, 64], "get_function_word": 70, "get_function_words_in_messag": 28, "get_gini": 68, "get_gini_featur": 65, "get_info_divers": 13, "get_info_exchange_wordcount": 12, "get_liwc_r": 14, "get_max": 72, "get_mimicry_bert": 28, "get_min": 72, "get_moving_mimicri": 28, "get_named_ent": 64, "get_nan_vector": 27, "get_polarity_scor": 24, "get_politeness_strategi": 17, "get_politeness_v2": 18, "get_proportion_first_pronoun": 16, "get_question_word": 70, "get_reddit_featur": 64, "get_senti": 67, "get_stdev": 72, "get_subjectivity_scor": 24, "get_sum": 72, "get_team_bursti": 4, "get_temporal_featur": [4, 64], "get_time_diff": 23, "get_time_diff_startend": 23, "get_turn": 25, "get_turn_id": 71, "get_turn_taking_featur": 65, "get_unique_pairwise_combo": 6, "get_user_average_datafram": 72, "get_user_level_aggreg": 65, "get_user_level_averaged_featur": 66, "get_user_level_summary_statistics_featur": 66, "get_user_level_summed_featur": 66, "get_user_network": [11, 66], "get_user_sum_datafram": 72, "get_variance_in_dd": 26, "get_within_person_disc_rang": 27, "get_word_ttr": 16, "get_zscore_across_all_chat": 73, "get_zscore_across_all_convers": 73, "gina": 62, "gini": [39, 62, 65, 68], "gini_coeffici": [11, 69], "github": [0, 1, 2, 18, 71], "give": [1, 29, 37, 61], "give_ag": 49, "given": [5, 6, 13, 14, 28, 30, 31, 33, 34, 35, 36, 40, 41, 55, 59, 66, 67, 71], "go": [1, 34, 35, 45, 46, 50, 62], "goal": 62, "good": [50, 56, 62], "goodby": 49, "googl": [0, 1], "got": [31, 36], "gotta": [31, 36], "grade": 33, "grader": 21, "grai": 35, "grammat": 36, "granularli": 35, "grate": 62, "gratitud": [17, 49, 50], "great": [47, 50, 51, 56, 59, 60, 62], "greater": 55, "greet": 50, "groceri": 41, "group": [0, 1, 2, 4, 13, 29, 33, 34, 41, 52, 59, 62, 68, 71, 72], "grouping_kei": [0, 1, 2, 71], "gt": 22, "guess": 10, "gun": 1, "gy": 15, "gym": 34, "ha": [0, 1, 2, 32, 34, 35, 37, 42, 43, 46, 52, 54, 55, 56, 59, 62, 63, 71], "had": [1, 31, 36, 54, 61], "hadn": [31, 36], "handl": [19, 29, 71], "happen": [1, 2, 55, 62, 63], "happi": 42, "harder": 21, "hashedg": [17, 50], "hasn": [31, 36], "hasneg": 50, "hasposit": 50, "hate": 31, "have": [0, 1, 2, 10, 12, 16, 31, 34, 36, 37, 40, 41, 42, 45, 46, 50, 54, 59, 60, 61, 62, 71], "haven": [31, 36], "he": [1, 31, 36], "header": 18, "hear": 32, "heart": [61, 62], "heat": 1, "heavi": 62, "hedg": [11, 30, 39, 49, 50, 64], "hei": [1, 35, 45, 46, 50], "helena": [47, 62], "hello": [0, 43, 49], "help": [0, 31, 34, 36, 43, 45, 46, 52, 58, 69], "helper": [23, 67], "her": [30, 31, 36], "here": [0, 1, 29, 34, 41, 42, 47, 61, 66], "herself": [31, 36], "hesit": [60, 64], "hi": [31, 35, 36, 43, 45, 46], "hierach": 71, "hierarch": 71, "high": [0, 1, 2, 61, 62, 71], "higher": [21, 31, 34, 36, 40, 41, 42, 44, 45, 46, 55, 60], "highest": 71, "highlight": 1, "him": [31, 36], "himself": [31, 36], "hmm": [31, 36], "hoc": 62, "hole": 62, "home": 42, "homework": 34, "homonym": 31, "hope": 35, "host": [45, 46], "hour": 48, "how": [1, 5, 29, 30, 31, 34, 35, 36, 39, 43, 45, 51, 52, 54, 56, 62], "howev": [0, 1, 3, 35, 40, 42, 44, 54, 56, 61, 62], "howitwork": 1, "html": [15, 17, 24], "http": [1, 2, 4, 5, 6, 12, 13, 14, 15, 16, 17, 18, 21, 24, 41, 45, 46, 47, 64, 68, 70, 71], "hu": [1, 42, 62], "hug": 51, "huggingfac": 1, "huh": [31, 32, 36], "human": [37, 50, 62], "hyperlink": 48, "hyphen": [1, 61], "hypothet": 42, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 73], "iby1": 5, "id": [2, 4, 7, 23, 28, 62, 66, 68, 71, 72, 73], "idea": [12, 35, 40, 47, 51], "ident": [34, 35], "identif": 1, "identifi": [0, 1, 2, 4, 8, 9, 15, 23, 25, 30, 41, 47, 50, 52, 61, 63, 64, 71, 72], "identiif": [13, 71], "ignor": [1, 32], "illustr": [1, 41, 48, 62], "imagin": 1, "immedi": [31, 35, 64], "impact": [1, 60], "impersonal_pronoun": 49, "implement": 64, "impli": 37, "import": [31, 32, 36, 44, 45, 62, 69], "incent": 13, "includ": [0, 1, 2, 10, 17, 22, 31, 32, 35, 36, 42, 45, 46, 51, 52, 56, 62, 66, 71], "inclus": [13, 71], "incongru": [8, 34], "incorpor": [1, 42, 45, 46], "increas": [1, 42, 62], "increment": 71, "independ": 1, "index": [1, 2, 4, 13, 25, 37, 39, 55, 61, 65], "indic": [1, 2, 16, 21, 22, 30, 32, 34, 35, 36, 40, 41, 43, 44, 48, 49, 50, 52, 55, 60, 63, 71], "indirect": 50, "indirect_btw": 50, "indirect_greet": 50, "indirectli": 69, "individu": [0, 1, 5, 11, 31, 34, 37, 45, 50, 59, 60, 62, 72], "inequ": 37, "infer": [1, 51, 67], "influenc": 1, "info": [13, 14, 18, 64], "info_divers": 13, "info_exchang": 64, "info_exchange_wordcount": [41, 64], "info_exchange_zscor": 11, "inform": [1, 6, 11, 12, 13, 24, 32, 34, 39, 48, 62, 64, 65], "informal_titl": 49, "information_divers": 11, "initi": [2, 62, 63, 64, 65, 66], "input": [0, 2, 4, 6, 12, 13, 14, 15, 16, 19, 20, 21, 22, 28, 31, 50, 55, 60, 62, 63, 64, 65, 66, 67, 71, 72], "input_column": [65, 66], "input_data": [25, 68, 72], "input_df": [1, 2, 61, 71], "inquiri": [30, 39, 52], "insid": 1, "insight": 1, "inspir": 15, "instal": [1, 61, 62], "instanc": [1, 22, 50, 59, 66], "instanti": 2, "insteac": 1, "instead": [1, 2, 62], "instruct": [1, 61], "int": [2, 3, 10, 13, 15, 16, 19, 20, 21, 22, 28, 63, 64, 67], "intact": 71, "integ": [13, 40, 47], "intend": 59, "interact": [1, 11, 43, 44, 62, 69], "interconnect": 62, "interest": [1, 61, 62], "interfac": 62, "intermedi": [59, 64], "intern": 29, "interpret": [0, 23], "interrupt": 59, "interv": [58, 65], "introduc": 62, "introduct": [11, 61], "invalid": 67, "invers": 64, "involv": [41, 62, 65], "io": [24, 47], "ipynb": [0, 1], "is_hedged_sentence_1": 10, "isn": [1, 31, 36], "issu": [1, 31, 36, 37, 42, 61], "ital": 64, "italic": 22, "item": [0, 71], "its": [0, 2, 15, 31, 35, 36, 40, 41, 47, 54, 55, 64, 69], "itself": [23, 31, 36, 44], "john": 1, "jonson": 62, "journal": [5, 64], "jurafski": 70, "juri": 1, "juries_df": 1, "jury_conversations_with_outcome_var": 1, "jury_feature_build": 1, "jury_output_chat_level": 1, "jury_output_conversation_level": 1, "jury_output_turn_level": 1, "jury_output_user_level": 1, "just": [0, 1, 2, 31, 36, 46, 50, 59, 61, 62], "katharina": 34, "keep": [1, 71], "kei": [1, 2, 4, 19, 28, 30, 54, 61, 71], "keyerror": 71, "keyword": [19, 49], "kind": [10, 62], "kitchen": 42, "knob": 0, "know": [1, 30], "knowledg": 29, "known": [1, 32, 61], "kumar": 62, "kw": 19, "lab": [1, 2, 62, 71], "label": [1, 15, 21, 51], "lack": [31, 38, 45, 46], "languag": [15, 34, 42, 50, 62], "larg": [31, 69], "larger": [0, 61], "last": [1, 31], "late": 32, "later": [0, 1, 2, 61], "latter": [31, 36], "lda": [13, 40], "learn": [61, 62], "least": [10, 32, 42, 63, 67], "led": 62, "legal": 49, "lemmat": [13, 40], "len": 28, "length": [14, 35, 39, 41, 42, 44], "less": [13, 32, 50, 52, 55, 62, 63], "let": [41, 49, 53], "let_me_know": 49, "letter": [49, 71], "level": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 14, 16, 23, 61, 64, 65, 66, 71, 72], "lexic": [10, 12, 14, 16, 31, 32, 36, 60, 62, 64], "lexical_featur": [14, 64], "lexical_features_v2": [10, 11], "lexicon": [5, 10, 14, 30, 39, 50, 52, 67, 69], "lexicons_dict": 67, "librari": [34, 51, 56, 57], "lift": 62, "light": 61, "like": [1, 22, 31, 34, 36, 41, 50, 61, 62], "limiat": 32, "limit": [11, 32, 37, 42, 54], "line": [0, 1, 19, 22, 48, 61, 62, 64], "linear": 64, "linguist": [18, 19, 30, 39, 50, 52], "link": [22, 29, 48, 50, 64], "list": [1, 2, 6, 7, 10, 11, 12, 13, 15, 19, 20, 21, 22, 28, 31, 33, 36, 37, 42, 48, 49, 50, 53, 54, 61, 64, 65, 66, 67, 68, 70, 71], "literatur": 62, "littl": 38, "littlehors": 1, "liu": [42, 52], "live": [1, 54], "liwc": [14, 30, 39, 51, 52, 56, 62], "liwc_featur": [10, 14], "lix": 34, "ll": [1, 31, 36, 61], "load": [19, 69], "load_saved_data": 19, "load_to_dict": 19, "load_to_list": 19, "loc": 15, "local": [1, 51, 61], "locat": [1, 62], "long": [4, 42], "longer": [30, 41, 43, 48, 61, 62], "look": [2, 34, 61, 65, 66], "loos": 36, "lot": [31, 36], "loud": 60, "love": [31, 56], "low": [1, 2, 29, 55, 60, 71], "lower": [21, 31, 33, 36, 41, 42, 44, 55, 60], "lowercas": [2, 13, 40, 48, 49, 71], "lowest": 71, "lpearl": 16, "lst": 6, "m": [30, 31, 36], "made": [1, 23, 35, 59, 61, 62], "magnitud": 55, "mai": [1, 2, 11, 31, 32, 35, 36, 37, 41, 42, 43, 44, 54, 61, 62, 71], "main": [1, 2, 5, 62, 64, 65, 66], "make": [0, 1, 5, 34, 42, 55, 56, 62, 66, 69, 71], "man": 62, "mani": [1, 4, 11, 32, 37, 41, 60, 62, 66], "manner": [55, 62], "manual": [1, 61], "map": [13, 34], "mark": [19, 20, 22, 43, 54, 64, 71], "marker": [18, 32, 39, 42, 50, 51, 52, 54, 56], "marlow": 44, "matarazzo": 62, "match": [5, 16, 19, 30], "math": 34, "matter": 47, "max": 66, "max_num_chunk": 63, "maxim": [34, 35, 37], "maximum": [63, 65, 72], "mayb": [38, 47], "mcfarland": 70, "me": [31, 32, 36, 41, 50, 53], "mean": [0, 1, 4, 6, 11, 13, 21, 29, 31, 34, 36, 40, 41, 42, 47, 55, 56, 58, 61, 62, 65, 66, 73], "meaning": [41, 55], "meaningless": 41, "meant": 39, "measur": [0, 7, 12, 13, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 64, 68], "mechan": 32, "medium": 21, "meet": 48, "member": [13, 34, 37, 55], "merg": [2, 8, 65, 66], "merge_conv_data_with_origin": 2, "messag": [0, 1, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 39, 41, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 71, 73], "messaga": 61, "message_col": [0, 1, 2, 12, 13, 14, 61, 64, 65, 67, 71], "message_embed": [6, 7, 8], "message_lower_with_punc": 71, "metadata": [0, 1], "method": [5, 31, 41, 50, 62], "metric": [0, 1, 8, 30, 34, 35, 46, 47, 48, 55, 66], "michael": 1, "mid": [1, 2, 71], "middl": [21, 34, 63], "might": [0, 1, 29, 43, 48, 53], "mikeyeoman": [14, 18, 64], "mileston": 34, "mimic": [28, 31, 36, 45], "mimic_word": 28, "mimick": [28, 31, 64], "mimicri": [0, 1, 28, 31, 35, 36, 39, 61, 64], "mimicry_bert": [45, 46], "mind": [1, 35, 50], "mine": [31, 36, 53, 59], "minim": [0, 41, 60], "minimum": [65, 72], "minu": [12, 41, 64], "minut": [55, 58], "mirror": 1, "miss": [1, 32, 61, 71], "mitig": [31, 36], "mizil": 50, "mm": [31, 36], "mnsc": 6, "modal": 50, "mode": 60, "model": [1, 13, 15, 31, 34, 35, 36, 40, 45, 46, 47, 51, 62, 67], "modif": 35, "modifi": [9, 19, 32, 64], "modul": [0, 1, 11, 34, 49, 61, 69], "monologu": 59, "more": [0, 1, 2, 11, 12, 22, 23, 24, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 45, 46, 50, 52, 55, 59, 61, 62, 71], "morn": 1, "most": [24, 31, 55, 62, 69], "motiv": 61, "move": [0, 1, 28, 31, 36, 39, 45, 59, 61], "movi": 31, "much": [1, 31, 34, 35, 36, 45, 62], "multi": [1, 2, 71], "multidimension": [45, 46], "multipl": [0, 1, 2, 19, 62, 71], "must": [1, 6, 62, 71], "my": [30, 31, 35, 36, 45, 46, 50, 53], "my_chat_featur": 1, "my_feature_build": 61, "my_fil": [0, 1], "my_output_chat_level": 61, "my_output_conversation_level": 61, "my_output_user_level": 61, "my_pandas_datafram": 61, "myself": [31, 36, 53], "n": [35, 45, 46, 47, 57, 59, 60], "n_chat": 59, "na": [5, 33, 43, 44, 48, 49, 50, 53, 58], "naiv": [2, 20, 32, 34, 38, 39, 53, 56, 57, 64], "name": [0, 1, 2, 4, 7, 8, 9, 12, 13, 14, 15, 17, 19, 23, 25, 28, 30, 32, 35, 39, 45, 46, 50, 51, 56, 61, 63, 64, 66, 67, 68, 71, 72, 73], "name_to_train": 47, "named_ent": [15, 47], "named_entity_recognition_featur": 11, "nan": [0, 34], "nate": [35, 45, 46], "nathaniel": [35, 45, 46], "nativ": 50, "natur": [43, 55], "ndarrai": 68, "nearest": [13, 40], "nearli": 62, "necessari": [63, 67], "need": [0, 1, 2, 21, 62, 66, 67], "need_sent": 67, "need_senti": 67, "neg": [24, 29, 31, 34, 35, 36, 42, 50, 51, 52, 54, 56, 62, 67], "negat": [19, 49], "negative_bert": 51, "negative_emot": [49, 51, 52, 56], "negoti": 62, "neighborhood": 54, "neither": 30, "ner": 15, "ner_cutoff": [0, 1, 2, 47, 64], "ner_train": 64, "ner_training_df": [0, 1, 2, 47, 64], "nest": [0, 1, 2, 22, 71], "net": [45, 46], "network": 11, "neutral": [5, 24, 30, 51, 55, 67], "neutral_bert": 51, "never": 1, "new": [1, 4, 13, 34, 64, 65, 66, 72], "new_column_nam": 72, "next": [1, 32, 47, 58], "nice": [50, 54], "nicknam": 1, "niculescu": 50, "night": 31, "nikhil": [59, 62], "nltk": [1, 42, 61], "nobodi": [31, 36], "nois": 32, "non": [1, 2, 28, 31, 37, 48, 62, 71], "none": [2, 19, 23, 37, 55, 64, 65, 66, 67], "nor": 30, "normal": [19, 31], "notabl": 62, "note": [0, 1, 2, 12, 16, 20, 42, 61, 71], "notebook": [0, 1], "noth": [31, 36, 56], "noun": 1, "novel": [45, 46], "now": [0, 1], "nowher": [31, 36], "np": 68, "ntri": 32, "null": 34, "num": 48, "num_char": 65, "num_chunk": [27, 63], "num_hedge_word": 10, "num_messag": 65, "num_named_ent": [15, 47], "num_row": 63, "num_top": 13, "num_word": [12, 16, 65], "number": [0, 3, 11, 12, 13, 15, 16, 19, 20, 21, 22, 23, 25, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 47, 48, 49, 54, 56, 58, 59, 60, 62, 63, 64, 66, 69, 71, 72], "numer": [0, 1, 13, 33, 68, 72, 73], "numpi": [1, 61, 68], "o": 35, "object": [1, 2, 19, 44, 50, 57, 58, 61, 62, 64, 65, 66], "obtain": [13, 17, 23, 24, 34], "occur": [0, 4, 31, 42, 71], "occurr": 19, "off": [1, 31, 36], "offer": 0, "offici": 61, "often": [36, 47, 48, 62], "oh": [31, 36, 48], "okai": [31, 36], "older": [1, 61], "on_column": [18, 23, 28, 68, 72, 73], "onc": [2, 11, 58, 61, 62], "one": [0, 1, 2, 4, 10, 12, 19, 23, 25, 29, 31, 32, 36, 37, 47, 51, 56, 59, 61, 62, 67, 68, 71, 73], "ones": [31, 36], "onli": [0, 1, 2, 5, 11, 23, 29, 31, 32, 34, 36, 37, 45, 53, 58, 59, 61, 62, 71], "onlin": [1, 32, 39, 64], "onward": 0, "open": [0, 62, 66], "operation": [39, 50, 59], "opinion": [24, 31], "oppos": [2, 31, 34, 35, 55], "opposit": 34, "option": [1, 2, 37, 62, 63, 67, 71], "order": [0, 1, 35, 37, 42, 71], "org": [6, 15, 21, 24, 41, 70], "origin": [1, 2, 5, 12, 21, 31, 32, 35, 36, 37, 45, 46, 49, 59], "orthogon": 34, "other": [1, 9, 11, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 45, 46, 48, 51, 52, 54, 56, 58, 59, 61, 62, 64, 66, 71], "other_lexical_featur": [11, 64], "otherwis": [2, 10, 21, 23, 32, 38, 63, 67], "our": [0, 1, 2, 11, 13, 29, 31, 32, 36, 37, 39, 53, 59, 61, 71], "ourselv": 53, "out": [1, 2, 16, 19, 31, 36, 55, 60, 62], "outcom": [1, 44, 62], "output": [0, 1, 2, 10, 17, 19, 40, 61, 62, 64, 67], "output_file_path_chat_level": [0, 1, 2, 61], "output_file_path_conv_level": [0, 1, 2, 61], "output_file_path_user_level": [0, 1, 2, 61], "output_path": 67, "outsid": [1, 2, 12], "over": [1, 16, 29, 31, 34, 35, 36, 37, 53, 55, 60, 62, 71], "overal": [30, 31, 34, 36, 45, 46], "overrid": [0, 1], "overview": [0, 61, 62], "overwritten": 1, "own": [0, 1, 9, 35, 62], "p": [14, 55], "pacakg": 24, "pace": [43, 62], "packag": [17, 18, 40, 62], "pad": 19, "page": [1, 11, 29, 39, 61, 62, 69], "pair": [6, 19, 34, 49, 71], "pairwis": [6, 34], "panda": [0, 1, 2, 12, 14, 16, 23, 47, 64, 65, 66, 71, 72, 73], "paper": [4, 5, 12, 14, 18, 29, 40, 50, 64], "paragraph": 22, "param": 71, "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 47, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "paramt": 1, "pardon": 32, "parenthes": [22, 48, 64], "parenthet": [22, 48], "pars": [16, 50, 60], "part": [1, 10, 13, 29, 36, 42, 52, 71], "particip": [1, 9, 37, 62], "particl": [31, 36], "particular": [11, 32, 34, 41, 45, 47, 51, 59, 62], "particularli": 42, "partner": 32, "pass": [1, 13, 21, 47, 71], "path": [0, 1, 2, 19, 67], "path_in": 19, "pattern": [4, 11, 19, 55, 62, 67], "paus": 4, "pd": [1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 19, 23, 25, 63, 64, 65, 66, 67, 68, 71], "pdf": [5, 12, 13, 14, 16, 18, 21, 24, 64, 70], "penalti": 1, "pennebak": [12, 37, 41, 42, 52], "pennyslvania": 62, "peopl": [1, 32, 59, 62], "per": [1, 6, 9, 14, 19, 42, 63, 66, 72], "percentag": [2, 21], "perfect": [37, 59], "perform": [0, 1, 2, 16, 50], "perhap": 1, "period": [4, 34, 55], "person": [1, 8, 12, 15, 16, 32, 34, 39, 41, 42, 50, 59, 62, 64, 70], "perspect": 1, "petrocelli": 5, "pgcr_yeoman": 14, "phrase": [19, 30, 38, 54], "phrase_split": 19, "pickl": [19, 67], "piec": [36, 42, 59, 63], "pl": 50, "place": [55, 61, 62], "plan": [34, 35, 45, 46], "player": 59, "pleas": [0, 1, 38, 49, 50, 61, 62], "please_start": 50, "point": [22, 24, 34, 35, 45, 46, 48, 52, 64, 66], "poisson": 55, "polar": [24, 39, 51, 52, 64], "polit": [1, 17, 18, 30, 32, 38, 39, 42, 51, 52, 54, 56, 64], "politeness_featur": 11, "politeness_v2": 11, "politeness_v2_help": 11, "politenessstrategi": [17, 50], "portion": 0, "posit": [0, 11, 15, 24, 29, 31, 39, 42, 50, 51, 54, 56, 62, 64, 67], "positive_affect_lexical_per_100": [51, 52, 56], "positive_bert": 51, "positive_emot": [49, 51, 52, 56], "positivity_zscor": 64, "positivity_zscore_chat": 52, "positivity_zscore_convers": 52, "possess": 31, "possibl": [1, 34, 62, 66], "possibli": [38, 62], "practic": [14, 34, 35], "pre": [1, 4, 21, 37, 49, 64], "preced": [31, 35, 71], "precend": 35, "precis": 47, "precomput": 51, "predefin": 19, "predetermin": [31, 36], "predict": [2, 47, 51, 64], "prefer": [0, 1], "preload_word_list": 69, "prep_simpl": 19, "prep_whol": 19, "preposit": [31, 36], "preproces": 48, "preprocess": [0, 2, 13, 19, 40, 43, 49, 51, 69], "preprocess_chat_data": 2, "preprocess_conversation_column": 71, "preprocess_naive_turn": 71, "preprocess_text": 71, "preprocess_text_lowercase_but_retain_punctu": 71, "presenc": [2, 32, 67], "present": [1, 2, 30, 31, 38, 55, 62, 71], "prespecifi": 19, "prevent": 51, "previou": [1, 7, 28, 31, 36, 45, 46, 58, 64, 71], "primari": 34, "print": 2, "prior": [2, 64, 71], "priya": [47, 62], "probabl": [15, 47], "problem": 62, "procedur": 62, "proceed": 46, "process": [0, 1, 2, 4, 10, 21, 37, 55, 62, 64, 65, 67, 69, 71], "prodi": 15, "produc": [2, 34], "product": 15, "professor": 62, "progress": [1, 2], "project": [54, 62], "pronoun": [12, 16, 31, 36, 39, 41, 42, 64, 70], "proper": 1, "proport": [16, 39, 42, 64], "propos": 37, "provid": [0, 1, 2, 15, 29, 30, 33, 36, 39, 44, 47, 54, 62], "proxi": 42, "pseudonym": 1, "psycholog": 42, "pub": 70, "publish": [5, 30, 64], "pubsonlin": 6, "punctuat": [0, 16, 19, 20, 21, 28, 43, 54, 60, 71], "punctuation_seper": 19, "puncut": 48, "pure": [24, 36], "purpos": 1, "put": [34, 50, 62, 66], "py": [0, 1, 14, 49], "pypi": [1, 61], "python": [1, 32, 41, 56, 57, 61, 62, 68], "qtd": 62, "qualiti": 41, "quantifi": [31, 36, 62], "quantiti": [37, 39, 41, 47], "quartil": 50, "question": [16, 19, 20, 29, 32, 39, 49, 50, 64, 66, 68, 70], "question_num": 11, "question_word": 20, "quick": [1, 43], "quickli": 0, "quit": 40, "quot": [22, 48, 64], "quotat": [22, 48], "rabbit": 62, "rain": 41, "rais": [67, 71], "random": 55, "rang": [5, 8, 24, 30, 33, 34, 35, 40, 51, 53, 55, 56, 57], "ranganath": [16, 31, 32, 36, 38, 43, 54, 70], "ranganath2013": 70, "ranganathetal2013_detectingflirt": 16, "rapid": [1, 4], "rare": [34, 35], "rate": [14, 42, 51], "rather": [31, 34, 35, 36, 37, 45, 46, 63], "ratio": [16, 39, 64], "raw": [0, 12, 16, 21, 31, 33, 50, 64], "re": [1, 31, 36, 42, 50, 61], "read": [0, 1, 2, 16, 21, 29, 33, 61, 62, 64, 65, 66, 67], "read_csv": 1, "read_in_lexicon": 67, "readabl": [11, 33, 64, 70], "reader": 33, "readi": 1, "readili": 62, "readthedoc": 24, "real": [1, 55], "realit": 13, "realli": [31, 36, 50], "reason": [31, 36, 45, 46, 49], "reassur": 49, "recal": 47, "recept": [18, 32, 39, 42, 50, 51, 52, 54, 56, 62, 64], "recogn": [1, 43, 47], "recognit": [0, 1, 2, 39, 64], "recommend": [42, 62], "reddit": [48, 64], "reddit_tag": 11, "redditus": 48, "reduc": 63, "reduce_chunk": 63, "redund": [42, 62], "refer": [0, 1, 11, 22, 24, 28, 31, 42, 48, 52, 62, 64, 70], "reflect": [37, 43], "regardless": 1, "regener": [0, 2, 51, 67], "regenerate_vector": [0, 1, 2, 67], "regex": [14, 16, 49], "regist": 37, "regress": 1, "regular": [5, 14, 30, 32, 42, 55, 58], "reichel": [53, 58, 60], "reidl": [4, 13], "reinvent": 62, "rel": [41, 51, 52, 55, 60, 64], "relat": [1, 61, 62, 64], "relationship": 36, "relev": [1, 29, 42, 44, 49, 51, 56, 61, 64, 65], "reli": [31, 34, 35, 36, 69], "reliabl": [33, 42], "remain": [1, 30, 71], "rememb": 1, "remov": [0, 2, 9, 13, 19, 28, 40, 43, 48, 49, 50, 71], "remove_active_us": 9, "renam": 1, "repair": [16, 39], "repeat": [60, 71], "repetit": 60, "replac": 19, "report": [1, 61], "repres": [2, 4, 6, 7, 11, 13, 23, 31, 34, 36, 42, 45, 46, 66, 67, 68, 71, 72, 73], "represent": [34, 38], "reproduc": [36, 62], "republican": 1, "request": [32, 50, 51], "requir": [0, 1, 20, 21, 31, 55, 61, 62, 64, 65, 66, 67], "research": [1, 2, 62], "reserv": 0, "resolv": 62, "resourc": [1, 39, 48, 61, 62], "respect": [1, 2, 12, 31, 36, 37, 69], "respons": [22, 48, 55, 58, 64], "restaur": [34, 56], "restor": 0, "restrict": 71, "result": [40, 55, 65, 72], "retain": [2, 16, 20, 21, 60, 71], "retriev": 50, "retunr": 3, "return": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 32, 43, 49, 50, 51, 55, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "reveal": 62, "revert": 50, "review": 62, "rewrit": 50, "rich": 62, "riedl": [13, 40, 55], "right": [31, 36, 61, 62], "roberta": [1, 39, 42, 52, 56, 64, 67], "robust": 13, "rocklag": [5, 30, 64], "room": 59, "root": [13, 40], "rough": [12, 54], "roughli": 31, "round": [13, 40, 59, 71], "round_num": 1, "row": [0, 1, 2, 9, 13, 25, 37, 40, 59, 63, 68, 71, 72, 73], "rowbotham": 62, "rucker": 5, "rule": [1, 69], "run": [0, 10, 12, 16, 35, 46, 47, 48, 51, 61, 69], "runtim": [1, 35], "sagepub": [5, 64], "sai": [1, 32, 50, 59], "said": [1, 36, 62], "same": [0, 1, 2, 31, 34, 37, 45, 48, 52, 59, 60, 61, 62, 71], "sampl": [61, 62], "sarcast": 48, "save": [0, 1, 2, 19, 64, 67], "save_featur": 2, "sbert": [1, 28, 31, 34, 35, 36, 45, 46, 64, 65, 67], "scale": [42, 51], "school": [21, 62], "scienc": [29, 39, 62], "scientist": [61, 62], "score": [4, 5, 11, 12, 13, 15, 21, 24, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 45, 46, 47, 51, 53, 56, 57, 64, 65, 67, 73], "script": [1, 61], "sea": 1, "seamless": 62, "search": [19, 61], "second": [0, 1, 4, 34, 42, 58, 59], "second_person": 49, "secr": [18, 49, 64], "section": [1, 29, 61], "see": [0, 1, 2, 30, 34, 38, 41, 45, 46, 47, 55, 62, 71], "seek": [5, 62], "segment": [0, 19], "select": [2, 4, 23, 28, 36, 45, 66, 67, 68, 71, 72, 73], "self": 2, "semant": [31, 34, 35, 41], "send": [1, 37, 55], "sens": [5, 31, 54, 66], "sent": [1, 37, 64], "sentenc": [0, 1, 10, 15, 19, 20, 21, 33, 34, 35, 36, 42, 45, 46, 47, 48, 54, 56, 61, 67], "sentence_pad": 19, "sentence_split": 19, "sentence_to_train": 47, "sentencis": 19, "sentiment": [0, 1, 24, 31, 39, 42, 52, 56, 62, 64, 67], "separ": [1, 2, 19, 34, 51], "sepcifi": 1, "septemb": 40, "sequenc": [1, 59], "sequenti": 1, "seri": [12, 16, 23, 28, 42, 71, 73], "serv": 12, "set": [0, 1, 2, 13, 23, 34, 48, 59], "set_self_conv_data": 2, "sever": [1, 30, 41, 42, 48, 51, 56, 61], "shall": 54, "share": [31, 36, 37], "she": [30, 31, 36], "shift": 34, "shop": 62, "short": [55, 58], "shorter": [13, 40, 41, 42, 43], "should": [0, 1, 2, 4, 14, 23, 28, 29, 31, 36, 47, 48, 54, 61, 62, 65, 66, 67, 68, 69, 71, 72, 73], "shouldn": [31, 36], "show": 37, "showeth": 62, "shruti": [35, 45, 46, 47, 62], "side": 31, "signal": [45, 55], "signifi": 42, "signific": [1, 61], "silent": 37, "similar": [1, 6, 7, 13, 28, 29, 31, 34, 35, 36, 40, 45, 46, 49, 62, 65], "similarli": [1, 35], "simpl": [0, 1, 16, 19, 42, 61, 62], "simpli": [1, 5, 11, 28, 56, 62], "simplifi": 1, "simplist": 41, "sinc": [1, 32, 41, 71], "singh": 62, "singl": [0, 1, 2, 11, 12, 19, 23, 31, 34, 35, 36, 37, 41, 45, 46, 59, 62, 71, 72], "singular": [12, 41, 64], "site": 16, "situat": 37, "size": [1, 13, 63, 67], "skip": 1, "slightli": [32, 62, 63], "slow": 1, "small": 40, "so": [1, 2, 10, 30, 31, 36, 37, 50, 61, 62, 66], "social": [29, 39, 61, 62], "socsci": 16, "softwar": 62, "sohi": 62, "sol3": 4, "solut": 59, "solv": 62, "some": [0, 1, 11, 17, 29, 32, 34, 35, 37, 41, 61, 63], "somebodi": [31, 36], "someon": [22, 29, 31, 36, 47, 48, 61, 64], "someplac": [31, 36], "someth": 47, "sometim": 1, "somewhat": 35, "soon": 62, "sorri": [16, 32, 50], "sort": 10, "sound": [47, 51], "sourc": [4, 5, 6, 12, 13, 16, 17, 21, 34, 35, 50, 64, 68], "space": [34, 40, 71], "spaci": [1, 19, 47, 49, 50, 61], "span": 63, "spars": 32, "speak": [1, 31, 36, 37, 59, 60, 62], "speaker": [0, 1, 2, 6, 8, 9, 25, 31, 34, 35, 37, 38, 42, 45, 46, 61, 66, 71, 72], "speaker_id": [2, 61, 72], "speaker_id_col": [0, 1, 2, 6, 8, 9, 25, 26, 27, 61, 65, 66, 71, 72], "speaker_nicknam": [0, 1, 2, 6, 9, 59, 66, 71], "special": [0, 1, 2, 48, 71], "specif": [1, 2, 12, 32, 41, 48, 55, 61, 62, 69, 71], "specifi": [1, 2, 19, 47, 49, 67, 68, 71, 72, 73], "speciifc": 63, "spend": [51, 62], "spike": 55, "split": [19, 21, 43, 63], "spoke": 59, "spoken": [11, 37], "spread": 55, "squar": [13, 40], "ssrn": 4, "stabl": 40, "stack": 14, "stackoverflow": 68, "stage": [1, 2, 34, 71], "stamp": 55, "standard": [1, 4, 37, 40, 41, 49, 55, 58, 60, 65, 72, 73], "stanford": 70, "start": [0, 15, 19, 20, 22, 23, 50], "statement": [38, 42, 47, 48, 62, 64], "statist": [65, 66, 68], "statologi": 41, "stem": 42, "step": [1, 4, 28, 41, 45, 46, 51], "still": [41, 45, 46], "stochast": 40, "stop": [40, 62], "stopword": [13, 19], "store": [1, 12, 16, 41, 49, 51, 65, 67], "stoword": 42, "str": [2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 63, 64, 65, 66, 67, 68, 71, 72, 73], "straightforward": 29, "strategi": [17, 30, 32, 38, 39, 42, 49, 54, 64], "stream": 35, "strictli": 1, "string": [0, 1, 2, 4, 8, 12, 13, 14, 19, 23, 24, 50, 66, 67, 68, 71, 72, 73], "strongli": [1, 41, 61], "structur": [0, 36, 49], "student": [21, 33], "studi": [1, 34, 62], "style": [1, 31, 36, 59], "sub": [0, 1, 71], "subfold": 1, "subject": [5, 24, 39, 49, 64], "subjunct": 50, "sublist": 28, "submiss": 55, "subpart": [1, 71], "subsequ": [1, 30, 51, 58], "subset": 62, "substanc": 36, "substant": 31, "substanti": 1, "substr": 30, "subtask": 1, "subtract": [41, 58], "succe": 62, "success": [0, 1, 4, 31, 36, 43, 55, 58, 61], "suggest": [1, 13, 34, 42, 44, 50], "suit": [62, 64], "sum": [28, 34, 64, 65, 66, 72], "summar": [0, 1, 65, 66, 69], "summari": [65, 66, 72], "summariz": [0, 65], "summarize_featur": 69, "suppl": 6, "support": [1, 15, 61], "suppos": 1, "sure": 30, "swear": 49, "syllabl": 21, "syntax": [1, 32, 61], "system": [2, 59, 64], "t": [0, 1, 2, 15, 29, 31, 36, 45, 49, 54, 61, 62, 67], "tabl": 62, "tag": 39, "take": [1, 4, 5, 9, 14, 25, 29, 31, 34, 37, 39, 42, 55, 61, 65, 71], "taken": [59, 71], "talk": [1, 37, 47, 59, 62], "target": 15, "task": [1, 2, 59, 71], "tausczik": [12, 37, 41, 52], "tausczikpennebaker2013": 12, "team": [0, 1, 4, 11, 12, 13, 34, 39, 40, 59, 65], "team_bursti": 4, "team_comm_tool": [1, 61], "teamcommtool": 1, "technic": [29, 39, 61, 62], "teghxgbqdhgaaaaa": 5, "tempor": [0, 2, 55, 58, 64, 71], "temporal_featur": 11, "tend": [1, 34, 60], "term": [1, 28, 59], "termin": [1, 2, 61], "terribl": 51, "test": [13, 33, 47], "text": [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 28, 32, 33, 36, 42, 48, 55, 62, 64, 67, 71], "text_based_featur": 64, "textblob": [24, 39, 51, 52, 64], "textblob_sentiment_analysi": 11, "than": [0, 1, 2, 11, 13, 31, 34, 35, 36, 37, 40, 41, 45, 46, 54, 60, 62, 63], "thee": 62, "thei": [0, 1, 29, 31, 34, 36, 37, 39, 47, 58, 59, 61, 62, 67], "them": [1, 2, 19, 28, 29, 31, 36, 50, 51, 55, 59, 61, 62, 64, 65, 66, 67], "themselv": [31, 36, 60], "theoret": 35, "theori": [34, 50], "therefor": [0, 1, 11, 37, 45, 59, 62, 69], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 18, 20, 21, 23, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 73], "thing": [48, 61], "think": [1, 38, 47], "thorough": [43, 62], "those": [1, 21, 31, 36, 61], "though": [34, 42], "thought": [1, 35, 45], "thread": [1, 61], "three": [0, 1, 22, 34, 37, 40, 51, 62, 69, 71], "threshold": [15, 47], "through": [1, 45, 46, 50, 61, 62], "throughout": [31, 35, 36, 40, 45, 46, 55, 63], "tht": 35, "thu": [1, 2, 34, 35, 36, 37, 46, 55, 71], "time": [0, 1, 4, 23, 34, 35, 39, 42, 48, 51, 55, 59, 61, 62, 63, 64, 65, 66, 71], "time_diff": 55, "timediff": 4, "timestamp": [0, 1, 2, 8, 23, 58, 61, 62, 63, 64, 71], "timestamp_col": [0, 1, 2, 8, 61, 63, 64, 65, 71], "timestamp_end": [1, 23, 61, 64], "timestamp_start": [1, 23, 61, 64], "todai": [34, 35, 41, 43, 45, 46, 47], "todo": 66, "togeth": [0, 62, 66], "token": [16, 19, 39, 49, 54, 64], "token_count": [19, 49], "too": [30, 31, 36, 62], "took": [1, 59], "tool": [1, 61, 62], "toolkit": [0, 1, 11, 42, 45, 46, 55, 62], "top": [1, 50, 59], "topic": [1, 13, 34, 40, 42, 43, 65], "tormala": 5, "total": [3, 12, 16, 25, 31, 34, 36, 37, 41, 44, 53, 59, 60, 62, 63, 64, 66, 72], "touch": [1, 61], "toward": [31, 36, 38, 42, 45, 46], "tradit": 49, "train": [1, 2, 15, 64], "train_spacy_n": 15, "transcript": 0, "transfom": [45, 46], "transform": [31, 34, 35, 36, 51], "transform_utter": 50, "treat": [1, 59, 61], "tri": [50, 64], "trivial": [3, 44, 62], "troubl": [1, 61], "true": [0, 1, 2, 37, 61, 63, 67, 71], "truncat": 2, "truth_intensifi": 49, "ttr": 64, "tupl": [0, 1, 2, 15, 19, 64], "turn": [0, 1, 2, 25, 28, 31, 32, 37, 39, 61, 64, 65, 71], "turn_count": 59, "turn_df": 71, "turn_id": 71, "turn_taking_featur": 11, "twice": 63, "twitter": 51, "two": [0, 1, 2, 23, 31, 34, 36, 41, 45, 46, 52, 62, 63], "txt": 19, "type": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 37, 39, 52, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "typic": [1, 34, 40, 41, 42, 52, 60], "u": [1, 22, 31, 36, 48, 49, 58, 61], "uci": 16, "uh": [31, 36], "ulrich": 55, "um": [31, 36, 60], "umbrella": [8, 29, 34], "uncertain": [5, 30], "uncertainti": 30, "under": [0, 1, 10, 11, 12, 28, 40], "underli": 1, "underscor": [1, 61], "understand": [0, 33, 39, 43, 48, 58, 61, 62], "understood": 33, "uninterrupt": 59, "uniqu": [0, 1, 2, 6, 9, 13, 16, 23, 25, 41, 47, 52, 60, 61, 63, 71], "univers": 62, "unix": 58, "unless": [31, 36], "unpack": 62, "unpreprocess": 0, "until": [31, 36, 45, 46], "unzip": [1, 61], "up": [1, 17, 21, 28, 31, 35, 36, 37, 45, 46, 51, 59, 61], "updat": [1, 9, 40, 54, 61], "upenn": 1, "upload": 13, "upon": 33, "upper": 42, "us": [0, 1, 2, 3, 5, 11, 12, 13, 14, 17, 19, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 60, 62, 64, 65, 66, 67, 71], "usag": [21, 24], "use_time_if_poss": 63, "user": [0, 1, 2, 9, 15, 22, 37, 47, 48, 51, 61, 62, 63, 64, 65, 66, 69, 72], "user_data": [2, 65, 66], "user_df": 9, "user_level_featur": 2, "user_list": 9, "userlevelfeaturescalcul": [2, 66, 69], "usernam": [22, 48], "utf": 1, "util": [12, 21, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "utilti": 62, "utter": [0, 1, 2, 3, 4, 5, 13, 14, 15, 16, 17, 20, 21, 23, 24, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52, 54, 58, 60, 61, 67], "v": [0, 13], "v0": 0, "valenc": 51, "valid": [23, 55], "valu": [1, 2, 5, 6, 10, 12, 13, 18, 19, 23, 28, 30, 31, 34, 36, 37, 40, 41, 42, 45, 46, 47, 55, 59, 64, 68, 71, 72, 73], "vari": [13, 31, 34, 35, 42], "variabl": [1, 56, 57, 64, 65, 66], "varianc": [8, 34], "variance_in_dd": 11, "variat": [4, 32], "varieti": [42, 62], "variou": [19, 42, 64, 65, 66], "vast": 62, "ve": [0, 31, 36, 50, 61], "vec": 6, "vect_data": [7, 8, 28, 64, 65, 66], "vect_path": 67, "vector": [0, 1, 2, 6, 7, 8, 13, 28, 34, 35, 40, 55, 61, 64, 65, 67], "vector_data": [1, 61], "vector_directori": [0, 1, 2, 61, 65], "vein": 45, "verb": [19, 31, 36], "verbal": 32, "veri": [5, 30, 31, 34, 35, 36, 42, 49, 54], "verifi": 2, "verit": 62, "version": [1, 12, 14, 21, 40, 51, 61], "versu": [4, 29, 47, 55, 59], "via": [3, 44], "view": 50, "visit": 41, "voila": 62, "w": 31, "wa": [0, 1, 2, 5, 12, 31, 32, 35, 36, 47, 51, 56, 59, 62, 71], "wai": [1, 2, 29, 30, 31, 32, 34, 49, 50, 54, 56, 57, 61, 62, 66], "waiai": 62, "wait": [4, 55], "walk": 1, "walkthrough": [0, 61, 62], "want": [1, 28, 34, 59, 61, 62, 67], "warn": 50, "watt": [1, 2, 62, 71], "we": [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 14, 15, 16, 18, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 57, 58, 59, 61, 62, 66, 67, 71], "web": 70, "week": 47, "weight": 66, "welcom": 61, "well": [29, 31, 36, 55, 62], "went": 41, "were": [1, 12, 31, 36], "western": 1, "wh": [19, 31, 36], "wh_question": [32, 49, 54], "wharton": 62, "what": [1, 2, 12, 16, 20, 29, 31, 32, 34, 35, 36, 39, 41, 45, 46, 47, 50, 54, 62, 63], "whatev": [1, 31, 36], "wheel": 62, "when": [1, 16, 20, 31, 33, 36, 47, 54, 55, 59, 60, 61, 62, 69, 71], "whenev": 71, "where": [0, 1, 2, 19, 20, 28, 31, 32, 36, 37, 40, 41, 42, 48, 50, 51, 54, 59, 61, 65, 68, 73], "wherea": [31, 34, 35, 36, 43], "wherev": [31, 36], "whether": [1, 2, 10, 16, 19, 32, 37, 38, 41, 43, 47, 57, 58, 62, 63, 64, 67, 71], "which": [0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 14, 15, 16, 18, 23, 25, 28, 31, 34, 35, 36, 37, 38, 40, 41, 42, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 66, 68, 69, 71, 72, 73], "while": [31, 32, 34, 36, 37, 44, 45, 46, 55, 62, 71], "whitespac": 43, "who": [20, 31, 32, 36, 47, 51, 54, 59, 60, 62], "whole": [28, 59, 62, 71], "whom": [31, 36, 54], "whose": [31, 36, 54], "why": [20, 29, 31, 36, 54], "wide": 31, "wien": 62, "wiki": [21, 29, 70], "wikipedia": [21, 33, 37, 70], "williamson": 60, "wish": [1, 2, 18], "within": [0, 1, 2, 8, 11, 16, 30, 34, 35, 36, 41, 45, 46, 52, 55, 59, 60, 62, 63, 64, 68, 71, 73], "within_group": 2, "within_person_discursive_rang": 11, "within_task": [0, 1, 2, 71], "without": [1, 19, 31, 36, 42, 47, 54, 62, 69], "won": [0, 31, 36, 45], "wonder": 56, "woolei": 4, "woollei": [13, 40, 55], "wooten": 55, "word": [3, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 28, 30, 32, 33, 37, 38, 39, 40, 41, 43, 45, 46, 48, 49, 52, 53, 54, 56, 57, 62, 64, 65, 66, 69, 70], "word_mimicri": 11, "word_start": [19, 49], "wordnet": [1, 61], "words_in_lin": 19, "work": [0, 47, 50, 55, 61, 62], "world": 55, "worri": 62, "would": [1, 29, 31, 34, 35, 36, 37, 42, 50, 54, 62], "wouldn": [31, 36], "wow": 50, "wp": 13, "write": [2, 29, 60], "www": [12, 13, 14, 18, 41, 64], "x": [0, 1, 2, 4, 46, 68], "xinlan": 62, "yashveer": 62, "ye": 19, "yeah": [31, 36], "yeoman": [14, 18, 42, 49], "yesno_quest": [32, 49, 54], "yet": 48, "ylatau": 12, "you": [0, 1, 2, 11, 24, 29, 31, 36, 37, 43, 47, 50, 59, 61, 62, 69], "your": [0, 29, 31, 32, 36, 37, 50, 59, 61, 62], "yourself": [31, 36, 50], "yuluan": 62, "yup": [31, 36], "yuxuan": 62, "z": [12, 39, 49, 51, 64, 73], "zero": [13, 52], "zhang": 62, "zheng": 62, "zhong": 62, "zhou": 62, "zscore": 41, "zscore_chat": 41, "zscore_chats_and_convers": 69, "zscore_convers": 41, "\u00bc": 47, "\u03c4": 55}, "titles": ["The Basics", "Worked Example", "feature_builder module", "basic_features module", "burstiness module", "certainty module", "discursive_diversity module", "fflow module", "get_all_DD_features module", "get_user_network module", "hedge module", "Features: Technical Documentation", "info_exchange_zscore module", "information_diversity module", "lexical_features_v2 module", "named_entity_recognition_features module", "other_lexical_features module", "politeness_features module", "politeness_v2 module", "politeness_v2_helper module", "question_num module", "readability module", "reddit_tags module", "temporal_features module", "textblob_sentiment_analysis module", "turn_taking_features module", "variance_in_DD module", "within_person_discursive_range module", "word_mimicry module", "FEATURE NAME", "Certainty", "Content Word Accommodation", "Conversational Repair", "Dale-Chall Score", "Discursive Diversity", "Forward Flow", "Function Word Accommodation", "Gini Coefficient", "Hedge", "Features: Conceptual Documentation", "Information Diversity", "Information Exchange", "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons", "Message Length", "Message Quantity", "Mimicry (BERT)", "Moving Mimicry", "Named Entity Recognition", "Online Discussion Tags", "Politeness/Receptiveness Markers", "Politeness Strategies", "Sentiment (RoBERTa)", "Positivity Z-Score", "Proportion of First Person Pronouns", "Question (Naive)", "Team Burstiness", "Textblob Polarity", "Textblob Subjectivity", "Time Difference", "Turn Taking Index", "Word Type-Token Ratio", "The Team Communication Toolkit", "Introduction", "assign_chunk_nums module", "calculate_chat_level_features module", "calculate_conversation_level_features module", "calculate_user_level_features module", "check_embeddings module", "gini_coefficient module", "Utilities", "preload_word_lists module", "preprocess module", "summarize_features module", "zscore_chats_and_conversation module"], "titleterms": {"A": 0, "One": 0, "The": [0, 61, 62], "accommod": [31, 36], "addit": 1, "advanc": 1, "assign_chunk_num": 63, "assumpt": 0, "basic": [0, 1, 29, 30, 31, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59, 60], "basic_featur": 3, "bert": 45, "bursti": [4, 55], "calculate_chat_level_featur": 64, "calculate_conversation_level_featur": 65, "calculate_user_level_featur": 66, "caveat": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "certainti": [5, 30], "chall": 33, "chat": [11, 39], "check_embed": 67, "citat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "class": 69, "code": [0, 1], "coeffici": 37, "column": 1, "commun": 61, "conceptu": 39, "configur": 1, "consider": 1, "content": [31, 61], "convers": [11, 32, 39, 62, 69], "count": [42, 59], "customiz": 0, "dale": 33, "data": 1, "demo": [0, 1], "differ": 58, "discurs": 34, "discursive_divers": 6, "discuss": 48, "divers": [34, 40], "document": [11, 39, 62], "driver": 69, "entiti": 47, "environ": [1, 61], "exampl": [1, 41, 47], "exchang": 41, "featur": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "feature_build": 2, "featurebuild": [1, 62], "fflow": 7, "file": [30, 34, 35, 45, 46, 47, 51], "first": 53, "flow": 35, "forward": 35, "function": [0, 36], "gener": 62, "get": [1, 61, 62], "get_all_dd_featur": 8, "get_user_network": 9, "gini": 37, "gini_coeffici": 68, "hedg": [10, 38], "high": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "implement": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "import": [1, 61], "index": 59, "indic": 61, "info_exchange_zscor": 12, "inform": [40, 41], "information_divers": 13, "input": [1, 34], "inquiri": 42, "interpret": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "introduct": 62, "intuit": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "kei": 0, "length": 43, "level": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "lexical_features_v2": 14, "lexicon": 42, "light": 0, "linguist": 42, "liwc": 42, "marker": 49, "messag": [43, 44], "mimicri": [45, 46], "modul": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "motiv": 62, "move": 46, "naiv": 54, "name": [29, 47], "named_entity_recognition_featur": 15, "note": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "onlin": 48, "other": [42, 69], "other_lexical_featur": 16, "ouput": 34, "our": 62, "output": [30, 35, 45, 46, 47, 51], "packag": [0, 1, 61], "paramet": 0, "person": 53, "pip": [1, 61], "polar": 56, "polit": [49, 50], "politeness_featur": 17, "politeness_v2": 18, "politeness_v2_help": 19, "posit": 52, "preload_word_list": 70, "preprocess": 71, "pronoun": 53, "proport": 53, "quantiti": 44, "question": 54, "question_num": 20, "ratio": 60, "readabl": 21, "recept": 49, "recognit": 47, "recommend": [1, 61], "reddit_tag": 22, "relat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "repair": 32, "roberta": 51, "run": 1, "sampl": [0, 1], "score": [33, 41, 52], "sentiment": 51, "speaker": [11, 59, 62, 69], "start": [1, 61, 62], "strategi": 50, "subject": 57, "summarize_featur": 72, "tabl": 61, "tag": 48, "take": 59, "team": [55, 61, 62], "technic": 11, "temporal_featur": 23, "textblob": [56, 57], "textblob_sentiment_analysi": 24, "time": 58, "token": 60, "toolkit": 61, "touch": 0, "train": 47, "troubleshoot": [1, 61], "turn": 59, "turn_taking_featur": 25, "type": 60, "us": 61, "user": 11, "util": 69, "utter": [11, 39, 62, 69], "variance_in_dd": 26, "virtual": [1, 61], "walkthrough": 1, "within_person_discursive_rang": 27, "word": [31, 36, 42, 60], "word_mimicri": 28, "work": 1, "your": 1, "z": [41, 52], "zscore_chats_and_convers": 73}}) \ No newline at end of file diff --git a/docs/build/html/utils/check_embeddings.html b/docs/build/html/utils/check_embeddings.html index 8f98f7b0..ba19ea73 100644 --- a/docs/build/html/utils/check_embeddings.html +++ b/docs/build/html/utils/check_embeddings.html @@ -132,7 +132,7 @@
-utils.check_embeddings.generate_bert(chat_data, output_path, message_col)
+utils.check_embeddings.generate_bert(chat_data, output_path, message_col, batch_size=64)

Generates RoBERTa sentiment scores for the given chat data and saves them to a CSV file.

Parameters:
@@ -140,6 +140,7 @@
  • chat_data (pd.DataFrame) – Contains message data to be analyzed for sentiments.

  • output_path (str) – Path to save the CSV file containing sentiment scores.

  • message_col (str, optional) – A string representing the column name that should be selected as the message. Defaults to “message”.

  • +
  • batch_size (int) – The size of each batch for processing sentiment analysis. Defaults to 64.

  • Raises:
    @@ -224,17 +225,17 @@
    -utils.check_embeddings.get_sentiment(text)
    -

    Analyzes the sentiment of the given text using a BERT model and returns the scores for positive, negative, and neutral sentiments.

    +utils.check_embeddings.get_sentiment(texts) +

    Analyzes the sentiment of the given list of texts using a BERT model and returns a DataFrame with scores for positive, negative, and neutral sentiments.

    Parameters:
    -

    text (str or None) – The input text to analyze.

    +

    texts (list of str) – The list of input texts to analyze.

    Returns:
    -

    A dictionary with sentiment scores.

    +

    A DataFrame with sentiment scores.

    Return type:
    -

    dict

    +

    pd.DataFrame

    diff --git a/docs/source/basics.rst b/docs/source/basics.rst index 6f5d1c9e..9409d48f 100644 --- a/docs/source/basics.rst +++ b/docs/source/basics.rst @@ -49,6 +49,10 @@ Package Assumptions 8. **Vector Data Cache**: Your data's vector data will be cached in **vector_directory**. This directory will be created if it doesn’t exist, but its contents should be reserved for cached vector files. + * Note: v0.1.3 and earlier compute vectors using _preprocessed_ text by default, which drops capitalization and punctuation. However, this can affect the interpretation of sentiment vectors; for example, "Hello!" has more positive sentiment than "hello." Consequently, from v0.1.4 onwards, we compute vectors using the raw input text, including punctuation and capitalization. To restore this behavior, please set **compute_vectors_from_preprocessed** to True. + + * Additionally, we assume that empty messages are equivalent to "NaN vector," defined `here `_. + 9. **Output Files**: We generate three outputs: **output_file_path_chat_level** (Utterance- or Chat-Level Features), **output_file_path_user_level** (Speaker- or User-Level Features), and **output_file_path_conv_level** (Conversation-Level Features). * This should be a *path*, not just a filename. For example, "./my_file.csv", not just "my_file.csv." @@ -79,4 +83,6 @@ Here are some parameters that can be customized. For more details, refer to the 4. **ner_training_df** and **ner_cutoff**: Measure the number of named entities in each utterance (see :ref:`named_entity_recognition`). -5. **regenerate_vectors**: Force-regenerate vector data even if it already exists. \ No newline at end of file +5. **regenerate_vectors**: Force-regenerate vector data even if it already exists. + +6. **compute_vectors_from_preprocessed**: Computes vectors using preprocessed text (that is, with capitalization and punctuation removed). This was the default behavior for v.0.1.3 and earlier, but we now default to computing metrics on the unpreprocessed text (which INCLUDES capitalization and punctuation), and this parameter now defaults to False. \ No newline at end of file diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 8d4e89e1..b65bc187 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -90,7 +90,7 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de output_file_path_conv_level = "./jury_output_conversation_level.csv", turns = True ) - jury_feature_builder.featurize(col="message") + jury_feature_builder.featurize() Basic Input Columns ^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/index.rst b/docs/source/index.rst index e3a9e994..f17dbd8b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -76,7 +76,7 @@ Once you import the tool, you will be able to declare a FeatureBuilder object, w ) # this line of code runs the FeatureBuilder on your data - my_feature_builder.featurize(col="message") + my_feature_builder.featurize() Use the Table of Contents below to learn more about our tool. We recommend that you begin in the "Introduction" section, then explore other sections of the documentation as they become relevant to you. We recommend reading :ref:`basics` for a high-level overview of the requirements and parameters, and then reading through the :ref:`examples` for a detailed walkthrough and discussion of considerations. diff --git a/examples/featurize.py b/examples/featurize.py index cb751e6f..f4288fe8 100644 --- a/examples/featurize.py +++ b/examples/featurize.py @@ -45,9 +45,14 @@ output_file_path_chat_level = "./jury_TINY_output_chat_level.csv", output_file_path_user_level = "./jury_TINY_output_user_level.csv", output_file_path_conv_level = "./jury_TINY_output_conversation_level.csv", - turns = False + turns = False, + custom_features = [ + "(BERT) Mimicry", + "Moving Mimicry", + "Forward Flow", + "Discursive Diversity"] ) - tiny_juries_feature_builder.featurize(col="message") + tiny_juries_feature_builder.featurize() # Tiny multi-task tiny_multi_task_feature_builder = FeatureBuilder( @@ -59,21 +64,26 @@ output_file_path_conv_level = "./multi_task_TINY_output_conversation_level_stageId_cumulative.csv", turns = False ) - tiny_multi_task_feature_builder.featurize(col="message") + tiny_multi_task_feature_builder.featurize() # FULL DATASETS BELOW ------------------------------------- # # Juries # jury_feature_builder = FeatureBuilder( # input_df = juries_df, - # grouping_keys = ["batch_num", "round_num"], + # grouping_keys = ["batch_num", "round_num"], # vector_directory = "./vector_data/", # output_file_path_chat_level = "./jury_output_chat_level.csv", # output_file_path_user_level = "./jury_output_user_level.csv", # output_file_path_conv_level = "./jury_output_conversation_level.csv", - # turns = True + # turns = True, + # custom_features = [ + # "(BERT) Mimicry", + # "Moving Mimicry", + # "Forward Flow", + # "Discursive Diversity"] # ) - # jury_feature_builder.featurize(col="message") + # jury_feature_builder.featurize() # # CSOP (Abdullah) # csop_feature_builder = FeatureBuilder( @@ -84,7 +94,7 @@ # output_file_path_conv_level = "./csop_output_conversation_level.csv", # turns = True # ) - # csop_feature_builder.featurize(col="message") + # csop_feature_builder.featurize() # # CSOP II (Nak Won Rim) @@ -96,4 +106,4 @@ # output_file_path_conv_level = "./csopII_output_conversation_level.csv", # turns = True # ) - # csopII_feature_builder.featurize(col="message") + # csopII_feature_builder.featurize() diff --git a/pyproject.toml b/pyproject.toml index d13448e3..229cd3c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "team_comm_tools" -version = "0.1.3" +version = "0.1.4" requires-python = ">= 3.10" dependencies = [ "chardet>=3.0.4", @@ -36,6 +36,7 @@ dependencies = [ "torchaudio==2.4.1", "torchvision==0.19.1", "transformers==4.44.0", + "tqdm>=4.66.5", "tzdata>=2023.3", "tzlocal==5.2" ] diff --git a/requirements.txt b/requirements.txt index ce9b755c..3c570b21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,5 +25,6 @@ torch==2.4.1 torchaudio==2.4.1 torchvision==0.19.1 transformers==4.44.0 +tqdm>=4.66.5 tzdata>=2023.3 tzlocal==5.2 diff --git a/src/team_comm_tools/feature_builder.py b/src/team_comm_tools/feature_builder.py index 54fced50..84a08631 100644 --- a/src/team_comm_tools/feature_builder.py +++ b/src/team_comm_tools/feature_builder.py @@ -85,6 +85,9 @@ class FeatureBuilder: :param regenerate_vectors: If true, will regenerate vector data even if it already exists. Defaults to False. :type regenerate_vectors: bool, optional + :param compute_vectors_from_preprocessed: If true, computes vectors using preprocessed text (that is, with capitalization and punctuation removed). This was the default behavior for v.0.1.3 and earlier, but we now default to computing metrics on the unpreprocessed text (which INCLUDES capitalization and punctuation). Defaults to False. + :type compute_vectors_from_preprocessed: bool, optional + :return: The FeatureBuilder doesn't return anything; instead, it writes the generated features to files in the specified paths. It will also print out its progress, so you should see "All Done!" in the terminal, which will indicate that the features have been generated. :rtype: None @@ -108,7 +111,8 @@ def __init__( within_task = False, ner_training_df: pd.DataFrame = None, ner_cutoff: int = 0.9, - regenerate_vectors: bool = False + regenerate_vectors: bool = False, + compute_vectors_from_preprocessed: bool = False ) -> None: # Defining input and output paths. @@ -116,6 +120,7 @@ def __init__( self.orig_data = input_df.copy() self.ner_training = ner_training_df self.vector_directory = vector_directory + print("Initializing Featurization...") self.output_file_path_conv_level = output_file_path_conv_level self.output_file_path_user_level = output_file_path_user_level @@ -218,6 +223,11 @@ def __init__( self.ner_cutoff = ner_cutoff self.regenerate_vectors = regenerate_vectors + if(compute_vectors_from_preprocessed == True): + self.vector_colname = self.message_col # because the message col will eventually get preprocessed + else: + self.vector_colname = self.message_col + "_original" # because this contains the original message + # check grouping rules if self.conversation_id_col not in self.chat_data.columns and len(self.grouping_keys)==0: if(self.conversation_id_col == "conversation_num"): @@ -240,7 +250,7 @@ def __init__( # set new identifier column for cumulative grouping. if self.cumulative_grouping and len(grouping_keys) == 3: - print("NOTE: User has requested cumulative grouping. Auto-generating the key `conversation_num` as the conversation identifier for cumulative convrersations.") + print("NOTE: User has requested cumulative grouping. Auto-generating the key `conversation_num` as the conversation identifier for cumulative conversations.") self.conversation_id_col = "conversation_num" # Input columns are the columns that come in the raw chat data @@ -338,7 +348,7 @@ def __init__( if(not need_sentiment and feature_dict[feature]["bert_sentiment_data"]): need_sentiment = True - check_embeddings(self.chat_data, self.vect_path, self.bert_path, need_sentence, need_sentiment, self.regenerate_vectors, self.message_col) + check_embeddings(self.chat_data, self.vect_path, self.bert_path, need_sentence, need_sentiment, self.regenerate_vectors, message_col = self.vector_colname) if(need_sentence): self.vect_data = pd.read_csv(self.vect_path, encoding='mac_roman') @@ -401,7 +411,7 @@ def merge_conv_data_with_original(self) -> None: if {'index'}.issubset(self.conv_data.columns): self.conv_data = self.conv_data.drop(columns=['index']) - def featurize(self, col: str="message") -> None: + def featurize(self) -> None: """ Main driver function for feature generation. @@ -410,9 +420,6 @@ def featurize(self, col: str="message") -> None: conversation-level features. Finally, the features are saved into the designated output files. - :param col: Column to preprocess, defaults to "message" - :type col: str, optional - :return: None :rtype: None """ @@ -494,7 +501,7 @@ def preprocess_chat_data(self) -> None: # create new column that retains punctuation self.chat_data["message_lower_with_punc"] = self.chat_data[self.message_col].astype(str).apply(preprocess_text_lowercase_but_retain_punctuation) - # Preprocessing the text in `col` and then overwriting the column `col`. + # Preprocessing the text in `message_col` and then overwriting the column `message_col`. # TODO: We should probably use classes to abstract preprocessing module as well? self.chat_data[self.message_col] = self.chat_data[self.message_col].astype(str).apply(preprocess_text) diff --git a/src/team_comm_tools/utils/calculate_chat_level_features.py b/src/team_comm_tools/utils/calculate_chat_level_features.py index 52c73465..f1f5e06a 100644 --- a/src/team_comm_tools/utils/calculate_chat_level_features.py +++ b/src/team_comm_tools/utils/calculate_chat_level_features.py @@ -20,6 +20,9 @@ from .preload_word_lists import * from .zscore_chats_and_conversation import get_zscore_across_all_chats, get_zscore_across_all_conversations +# Loading bar +from tqdm import tqdm + class ChatLevelFeaturesCalculator: """ Initialize variables and objects used by the ChatLevelFeaturesCalculator class. @@ -74,7 +77,7 @@ def calculate_chat_level_features(self, feature_methods: list) -> pd.DataFrame: :rtype: pd.DataFrame """ - for method in feature_methods: + for method in tqdm(feature_methods): method(self) # Return the input dataset with the chat level features appended (as columns) diff --git a/src/team_comm_tools/utils/check_embeddings.py b/src/team_comm_tools/utils/check_embeddings.py index 602f42ee..2c56b3b6 100644 --- a/src/team_comm_tools/utils/check_embeddings.py +++ b/src/team_comm_tools/utils/check_embeddings.py @@ -4,6 +4,7 @@ import os import pickle +from tqdm import tqdm from pathlib import Path import torch @@ -179,14 +180,17 @@ def generate_vect(chat_data, output_path, message_col): print(f"Generating SBERT sentence vectors...") - embedding_arr = [row.tolist() for row in model_vect.encode(chat_data[message_col])] + # Ensure empty strings are encoded as NaN + empty_to_nan = [text if text and text.strip() else np.nan for text in chat_data[message_col].tolist()] + embeddings = model_vect.encode(empty_to_nan) + embedding_arr = [row.tolist() for row in tqdm(embeddings, total=len(chat_data[message_col]))] embedding_df = pd.DataFrame({'message': chat_data[message_col], 'message_embedding': embedding_arr}) # Create directories along the path if they don't exist Path(output_path).parent.mkdir(parents=True, exist_ok=True) embedding_df.to_csv(output_path, index=False) -def generate_bert(chat_data, output_path, message_col): +def generate_bert(chat_data, output_path, message_col, batch_size=64): """ Generates RoBERTa sentiment scores for the given chat data and saves them to a CSV file. @@ -196,42 +200,60 @@ def generate_bert(chat_data, output_path, message_col): :type output_path: str :param message_col: A string representing the column name that should be selected as the message. Defaults to "message". :type message_col: str, optional + :param batch_size: The size of each batch for processing sentiment analysis. Defaults to 64. + :type batch_size: int :raises FileNotFoundError: If the output path is invalid. :return: None :rtype: None """ print(f"Generating RoBERTa sentiments...") - messages = chat_data[message_col] - sentiments = messages.apply(get_sentiment) + messages = chat_data[message_col].tolist() + batch_sentiments_df = pd.DataFrame() - sent_arr = [list(dict.values()) for dict in sentiments] + for i in tqdm(range(0, len(messages), batch_size)): + batch = messages[i:i + batch_size] + batch_df = get_sentiment(batch) + batch_sentiments_df = pd.concat([batch_sentiments_df, batch_df], ignore_index=True) - sent_df = pd.DataFrame(sent_arr, columns =['positive_bert', 'negative_bert', 'neutral_bert']) - # Create directories along the path if they don't exist Path(output_path).parent.mkdir(parents=True, exist_ok=True) - sent_df.to_csv(output_path, index=False) + batch_sentiments_df.to_csv(output_path, index=False) -def get_sentiment(text): +def get_sentiment(texts): """ - Analyzes the sentiment of the given text using a BERT model and returns the scores for positive, negative, and neutral sentiments. + Analyzes the sentiment of the given list of texts using a BERT model and returns a DataFrame with scores for positive, negative, and neutral sentiments. - :param text: The input text to analyze. - :type text: str or None - :return: A dictionary with sentiment scores. - :rtype: dict + :param texts: The list of input texts to analyze. + :type texts: list of str + :return: A DataFrame with sentiment scores. + :rtype: pd.DataFrame """ - if (pd.isnull(text)): - return({'positive': np.nan, 'negative': np.nan, 'neutral': np.nan}) - - text = ' '.join(text.split()[:512]) # handle cases when the text is too long: just take the first 512 chars (hacky, but BERT context window cannot be changed) - encoded = tokenizer(text, return_tensors='pt') + # Handle and tokenize non-null and non-empty texts + texts_series = pd.Series(texts) + non_null_non_empty_texts = texts_series[texts_series.apply(lambda x: pd.notnull(x) and x.strip() != '')].tolist() + + if not non_null_non_empty_texts: + # Return a DataFrame with NaN if there are no valid texts to process + return pd.DataFrame(np.nan, index=texts_series.index, columns=['positive_bert', 'negative_bert', 'neutral_bert']) + + encoded = tokenizer(non_null_non_empty_texts, padding=True, truncation=True, max_length=512, return_tensors='pt') output = model_bert(**encoded) - scores = output[0][0].detach().numpy() - scores = softmax(scores) + scores = output[0].detach().numpy() + scores = softmax(scores, axis=1) + + sent_dict = { + 'positive_bert': scores[:, 2], + 'negative_bert': scores[:, 0], + 'neutral_bert': scores[:, 1] + } + + non_null_sent_df = pd.DataFrame(sent_dict) + + # Initialize the DataFrame such that null texts and empty texts get np.nan + sent_df = pd.DataFrame(np.nan, index=texts_series.index, columns=['positive_bert', 'negative_bert', 'neutral_bert']) + sent_df.loc[texts_series.apply(lambda x: pd.notnull(x) and x.strip() != ''), ['positive_bert', 'negative_bert', 'neutral_bert']] = non_null_sent_df.values - # sample output format - return({'positive': scores[2], 'negative': scores[0], 'neutral': scores[1]}) \ No newline at end of file + return sent_df \ No newline at end of file diff --git a/tests/data/cleaned_data/test_vector_edge_cases.csv b/tests/data/cleaned_data/test_vector_edge_cases.csv new file mode 100644 index 00000000..0659f844 --- /dev/null +++ b/tests/data/cleaned_data/test_vector_edge_cases.csv @@ -0,0 +1,6 @@ +conversation_num,speaker_nickname,message +1,A,"Once upon a time in the bustling city of Byteville, there was a young programmer named Alex who had a passion for coding. Alex loved the feeling of creating something out of nothing, of transforming ideas into functional software that could help people in their daily lives. Byteville was a city that thrived on technology, and programmers like Alex were considered artisans of the modern age. Alex had always been intrigued by the possibilities of coding, but there was one lesson that stood out more than any other during their journey: the paramount importance of testing code. In the early days of Alex's career, they were eager to dive straight into writing code. The thrill of seeing their ideas come to life was intoxicating, and Alex quickly built a reputation for developing features at a swift pace. However, this rapid development came at a cost. Despite the initial excitement, Alex found that their code often contained bugs, leading to frequent crashes and frustrated users. It was a humbling experience, and Alex realized that there was more to being a great programmer than just writing code. One day, Alex was working on a project for a new client. The client had requested a complex application that promised to revolutionize the way people managed their daily tasks. Excitement coursed through Alex's veins, and they threw themselves into the project with gusto. Hours turned into days, and days into weeks, as Alex painstakingly coded every feature that the client had envisioned. Finally, the day came when Alex was ready to present the finished product. The client gathered their team to witness the unveiling, and Alex began the demonstration with confidence. But soon, things started to go awry. Buttons that were supposed to trigger specific actions did nothing. Data that was meant to be saved was lost. The application crashed multiple times, leaving the client and their team frustrated and unimpressed. Alex felt a pang of embarrassment and disappointment. After the failed presentation, Alex decided to seek advice from an experienced programmer named Maya, who was known for her meticulous and bug-free code. Maya had been in the industry for many years and had a wealth of knowledge about best practices in software development. Alex visited Maya's office, and after explaining the situation, Maya nodded knowingly. 'Alex,' she said gently, 'coding is both an art and a science. While your enthusiasm and creativity are wonderful, you must also embrace the discipline of testing. Testing your code is essential to ensure that it functions as intended and to deliver a reliable product to your users.' Maya spent the next several hours teaching Alex about different types of testing. They covered unit testing, which involves testing individual components of the code to ensure they work correctly. They delved into integration testing, where multiple components are tested together to ensure they function seamlessly as a whole. Maya also explained the importance of system testing, where the entire application is tested in an environment that simulates real-world usage. She emphasized the value of automated testing frameworks, which could run tests repeatedly, catch regressions, and provide quick feedback during the development process. Alex began to see testing in a new light. It wasn't just an afterthought or a tedious chore; it was an integral part of the development cycle that could make the difference between a functional, reliable application and a buggy, frustrating one. Inspired by Maya's wisdom, Alex returned to their project with renewed determination. They wrote unit tests for every component, ensuring that each piece of the codebase was robust and free from errors. They created integration tests to check how well different parts of the application worked together. Finally, they conducted system tests to simulate how users would interact with the application in the real world. The process was time-consuming, but Alex quickly discovered that it was worth every minute. The tests caught several issues that would have otherwise slipped through the cracks, and Alex was able to fix these problems before they reached the client. The application became more stable, reliable, and user-friendly. When the time came for the second presentation, Alex stood before the client with newfound confidence. The application ran smoothly, every feature working as intended. The client and their team were impressed, and Alex couldn't help but feel a sense of pride and accomplishment. This was the result of not just hard work, but also a commitment to quality through thorough testing. Word of Alex's successful project spread throughout Byteville, and soon, other programmers and clients began to take notice. Alex became an advocate for the importance of testing code, sharing the lessons they had learned with anyone willing to listen. Testing became a mantra not just for Alex, but for a new generation of programmers who understood that excellence in coding wasn't just about rapid development but also about ensuring reliability and functionality. From that day forward, Alex continued to create innovative applications, each one meticulously tested to ensure it met the highest standards of quality. Byteville thrived on these advancements, and the city's residents knew that whatever challenges lay ahead, they could rely on the software created by dedicated programmers like Alex. And so, the story of Alex and the importance of testing code became a legend in Byteville, a timeless reminder that behind every great line of code is the diligent effort to make sure it works flawlessly." +1,B,This is a much shorter message. +2,B,:-) +2,A, +2,A, \ No newline at end of file diff --git a/tests/run_package_grouping_tests.py b/tests/run_package_grouping_tests.py index 9fefd43a..6df66cc1 100644 --- a/tests/run_package_grouping_tests.py +++ b/tests/run_package_grouping_tests.py @@ -34,7 +34,7 @@ output_file_path_conv_level = "./tiny_multi_task_PT1_level_conv", turns = False, ) - testing_package_task_1.featurize(col="message") + testing_package_task_1.featurize() """ Testing Package Task 1 Advanced Features @@ -64,9 +64,9 @@ output_file_path_chat_level = "./output/chat/tiny_multi_task_case2_level_chat.csv", output_file_path_user_level = "./output/user/tiny_multi_task_case2_level_user.csv", output_file_path_conv_level = "./output/conv/tiny_multi_task_case2_level_conv.csv", - turns = False, + turns = False ) - testing_case_2.featurize(col="message") + testing_case_2.featurize() print("TESTING CASE 3A .....") testing_case_3_a = FeatureBuilder( @@ -82,9 +82,9 @@ output_file_path_chat_level = "./output/chat/tiny_multi_task_case3a_level_chat.csv", output_file_path_user_level = "./output/user/tiny_multi_task_case3a_level_user.csv", output_file_path_conv_level = "./output/conv/tiny_multi_task_case3a_level_conv.csv", - turns = False, + turns = False ) - testing_case_3_a.featurize(col="message") + testing_case_3_a.featurize() print("TESTING CASE 3B .....") testing_case_3_b = FeatureBuilder( @@ -100,9 +100,9 @@ output_file_path_chat_level = "./output/chat/tiny_multi_task_case3b_level_chat.csv", output_file_path_user_level = "./output/user/tiny_multi_task_case3b_level_user.csv", output_file_path_conv_level = "./output/conv/tiny_multi_task_case3b_level_conv.csv", - turns = False, + turns = False ) - testing_case_3_b.featurize(col="message") + testing_case_3_b.featurize() print("TESTING CASE 3C .....") testing_case_3_c = FeatureBuilder( @@ -118,9 +118,9 @@ output_file_path_chat_level = "./output/chat/tiny_multi_task_case3c_level_chat.csv", output_file_path_user_level = "./output/user/tiny_multi_task_case3c_level_user.csv", output_file_path_conv_level = "./output/conv/tiny_multi_task_case3c_level_conv.csv", - turns = False, + turns = False ) - testing_case_3_c.featurize(col="message") + testing_case_3_c.featurize() print("TESTING IMPROPER CASE .....") testing_case_improper = FeatureBuilder( @@ -136,9 +136,9 @@ output_file_path_chat_level = "./output/chat/tiny_multi_task_improper_level_chat.csv", output_file_path_user_level = "./output/user/tiny_multi_task_improper_level_user.csv", output_file_path_conv_level = "./output/conv/tiny_multi_task_improper_level_conv.csv", - turns = False, + turns = False ) - testing_case_improper.featurize(col="message") + testing_case_improper.featurize() """ Test robustness of the FeatureBuilder to taking in an input that contains existing feature names. @@ -150,6 +150,7 @@ testing_chat_existing = FeatureBuilder( input_df = chat_df_existing_output, vector_directory = "./vector_data/", + message_col = "message_original", output_file_path_chat_level = "./output/chat/test_chat_level_existing_chat.csv", output_file_path_user_level = "./output/user/test_chat_level_existing_user.csv", output_file_path_conv_level = "./output/conv/test_chat_level_existing_conv.csv", @@ -158,7 +159,34 @@ "Moving Mimicry", "Forward Flow", "Discursive Diversity" + ], + turns = False + ) + testing_chat_existing.featurize() + + """ + Test robustness of the vector pipeline to weird inputs: + - Super long input + - Input containing only symbols (e.g,. ":-)") + - Empty input + - Input with many spaces + """ + vector_testing_input = pd.read_csv("data/cleaned_data/test_vector_edge_cases.csv", encoding='latin-1') + + test_vectors = FeatureBuilder( + input_df = vector_testing_input, + vector_directory = "./vector_data/", + output_file_path_chat_level = "./output/chat/test_vectors_chat.csv", + output_file_path_user_level = "./output/user/test_vectors_user.csv", + output_file_path_conv_level = "./output/conv/test_vectors_conv.csv", + custom_features = [ + "(BERT) Mimicry", + "Moving Mimicry", + "Forward Flow", + "Discursive Diversity" ], turns = False, + regenerate_vectors = True ) - testing_chat_existing.featurize(col="message") + test_vectors.featurize() + diff --git a/tests/run_tests.py b/tests/run_tests.py index 8fb938a9..aadd767c 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -43,9 +43,9 @@ "Forward Flow", "Discursive Diversity" ], - turns = False, + turns = False ) - testing_chat.featurize(col="message") + testing_chat.featurize() testing_conv = FeatureBuilder( input_df = conv_df, @@ -59,9 +59,9 @@ "Forward Flow", "Discursive Diversity" ], - turns = False, + turns = False ) - testing_conv.featurize(col="message") + testing_conv.featurize() test_ner_feature_builder = FeatureBuilder( input_df = test_ner_df, @@ -76,9 +76,9 @@ "Forward Flow", "Discursive Diversity" ], - turns = False, + turns = False ) - test_ner_feature_builder.featurize(col="message") + test_ner_feature_builder.featurize() # testing perturbed chat level features testing_chat_complex = FeatureBuilder( @@ -93,9 +93,9 @@ "Forward Flow", "Discursive Diversity" ], - turns = False, + turns = False ) - testing_chat_complex.featurize(col="message") + testing_chat_complex.featurize() # testing conv features testing_conv_complex = FeatureBuilder( @@ -110,9 +110,9 @@ "Forward Flow", "Discursive Diversity" ], - turns = False, + turns = False ) - testing_conv_complex.featurize(col="message") + testing_conv_complex.featurize() testing_conv_complex_ts = FeatureBuilder( input_df = conv_complex_timestamps_df, @@ -126,9 +126,9 @@ "Forward Flow", "Discursive Diversity" ], - turns = False, + turns = False ) - testing_conv_complex_ts.featurize(col="message") + testing_conv_complex_ts.featurize() # testing forward flow testing_forward_flow = FeatureBuilder( @@ -143,7 +143,7 @@ "Forward Flow", "Discursive Diversity" ], - turns = False, + turns = False ) - testing_forward_flow.featurize(col="message") \ No newline at end of file + testing_forward_flow.featurize() \ No newline at end of file diff --git a/tests/test_feature_metrics.py b/tests/test_feature_metrics.py index 42ad513a..4c1844c4 100644 --- a/tests/test_feature_metrics.py +++ b/tests/test_feature_metrics.py @@ -175,8 +175,8 @@ def test_chat_complex(batch): inv_result = batch[1][1][feature] dir_result = batch[2][1][feature] - inv_distance = og_result - inv_result - dir_distance = og_result - dir_result + inv_distance = abs(og_result - inv_result) + dir_distance = abs(og_result - dir_result) # calculate ratio between inv and dir ratio = dir_distance / inv_distance diff --git a/tests/test_package.py b/tests/test_package.py index 0106b230..ac145b66 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -4,6 +4,8 @@ from numpy import nan import logging import itertools +import os +from sklearn.metrics.pairwise import cosine_similarity # Import Test Outputs input_data = pd.read_csv("data/cleaned_data/multi_task_TINY_cols_renamed.csv", encoding='utf-8') @@ -13,6 +15,9 @@ case3b_chatdf = pd.read_csv("./output/chat/tiny_multi_task_case3b_level_chat.csv") case3c_chatdf = pd.read_csv("./output/chat/tiny_multi_task_case3c_level_chat.csv") impropercase_chatdf = pd.read_csv("./output/chat/tiny_multi_task_improper_level_chat.csv") +sentiment_output = pd.read_csv('./vector_data/sentiment/chats/test_vectors_chat.csv') +sbert_output = pd.read_csv('./vector_data/sentence/chats/test_vectors_chat.csv') + # Import the Feature Dictionary from team_comm_tools.feature_dict import feature_dict @@ -167,6 +172,8 @@ def test_improper_case(): file.write(f"Number of unique conversation identifiers in improper case: {improper_ids}\n") file.write(f"Number of unique conversation identifiers in Case 2: {case_2_ids}\n") + raise + def test_robustness_to_existing_column_names(): try: chat_df_orig = pd.read_csv("./output/chat/test_chat_level_chat.csv") # original output @@ -187,4 +194,43 @@ def test_robustness_to_existing_column_names(): file.write("------TEST FAILED------\n") file.write(f"Robustness check for passing in chat dataframe with feature columns failed.\n") - raise \ No newline at end of file + raise + +def get_nan_vector_str(): + current_dir = os.path.dirname(__file__) + nan_vector_file_path = os.path.join(current_dir, '../src/team_comm_tools/features/assets/nan_vector.txt') + nan_vector_file_path = os.path.abspath(nan_vector_file_path) + + f = open(nan_vector_file_path, "r") + return f.read() + +def str_to_vec(str_vec): + vector_list = [float(e) for e in str_vec[1:-1].split(',')] + return np.array(vector_list).reshape(-1, 1) + +def test_empty_vectors_equal(): + try: + # assert that the last two rows are equal; they're both empty + assert(sbert_output.iloc[-1]["message_embedding"]==sbert_output.iloc[-2]["message_embedding"]) + assert(sentiment_output.iloc[-1].equals(sentiment_output.iloc[-2])) + + # assert that the 'positive bert' of the last sentiment is np.nan + assert(np.isnan(float(sentiment_output.iloc[-1]["positive_bert"]))) + + # compare empty vector to nan vector + message_embedding_str = sbert_output.iloc[-1]["message_embedding"] + message_embedding_vec = str_to_vec(message_embedding_str) + nan_vector_str = get_nan_vector_str() + nan_vector = str_to_vec(nan_vector_str) + + # Compute cosine similarity and assert it's essentially 1 + similarity = cosine_similarity([message_embedding_vec.flatten()], [nan_vector.flatten()])[0][0] + assert(round(similarity, 0) == 1.0) + + except AssertionError: + with open('test.log', 'a') as file: + file.write("\n") + file.write("------TEST FAILED------\n") + file.write(f"Empty message vectors / sentence scores are not equal.\n") + + raise