Skip to content

Commit

Permalink
update feature_dict with latest writeups and update examples with new…
Browse files Browse the repository at this point in the history
… setup
  • Loading branch information
xehu committed Aug 8, 2024
1 parent 7ec7c7a commit a63b273
Showing 1 changed file with 20 additions and 20 deletions.
40 changes: 20 additions & 20 deletions src/team_comm_tools/feature_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"semantic_grouping": "Emotion",
"description": "The extent to which a statement is positive, negative, or neutral, as assigned by Cardiffnlp/twitter-roberta-base-sentiment-latest. The total scores (Positive, Negative, Neutral) sum to 1.",
"references": "(Hugging Face, 2023)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/BERT-Sentiment-Analysis-Feature",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/positivity_bert.html",
"function": ChatLevelFeaturesCalculator.concat_bert_features,
"dependencies": [],
"preprocess": [],
Expand All @@ -44,7 +44,7 @@
"semantic_grouping": "Quantity",
"description": "The length of a message in words and characters.",
"references": "(Ranganath et al., 2013; Cao et al., 2021)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.18-Number-Words,-Messages-Per-Person",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/message_length.html",
"function": ChatLevelFeaturesCalculator.text_based_features,
"dependencies": [],
"preprocess": [],
Expand All @@ -58,7 +58,7 @@
"semantic_grouping": "Quantity",
"description": "The total number of messages sent.",
"references": "(Cao et al., 2021; Marlow et al., 2018, as objective communication frequency)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.18-Number-Words,-Messages-Per-Person",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/message_quantity.html",
"function": ChatLevelFeaturesCalculator.text_based_features,
"dependencies": [],
"preprocess": [],
Expand All @@ -75,7 +75,7 @@
"semantic_grouping": "Content",
"description": "A crude measure of task-focused communication: the total number of words spoken, with the number of first-person pronouns (which suggest self-focus) removed. This value is then z-scored to describe the extent to which a message had more/less task-focused communication relative to other messages. We implement two flavors of the z-score: the first scores the messages with respect to other messages in the same conversation; the second scores the messages with respect to all messages in the data.",
"references": "(Tausczik & Pennebaker, 2013)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/E.24-Information-Exchange",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/information_exchange.html#",
"function": ChatLevelFeaturesCalculator.info_exchange,
"dependencies": [ChatLevelFeaturesCalculator.text_based_features],
"preprocess": [],
Expand Down Expand Up @@ -187,7 +187,7 @@
"semantic_grouping": "Content",
"description": "The ratio of word types (the total number of unique words in an utterance) to tokens (the total number of words in an utterance).",
"references": "(Reichel et al., 2015)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.13-Word-Type-Token-Ratio,-Proportion-of-First-Person-Pronouns",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/word_ttr.html",
"function": ChatLevelFeaturesCalculator.other_lexical_features,
"dependencies": [ChatLevelFeaturesCalculator.text_based_features, ChatLevelFeaturesCalculator.lexical_features],
"preprocess": [preprocess_text_lowercase_but_retain_punctuation], # "message_lower_with_punc"
Expand All @@ -201,7 +201,7 @@
"semantic_grouping": "Content",
"description": "The proportion of words in an utterance that are first-person pronouns (e.g., “I,” “me,” “we,” “us”).",
"references": "(Reichel et al., 2015)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.13-Word-Type-Token-Ratio,-Proportion-of-First-Person-Pronouns",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/proportion_of_first_person_pronouns.html",
"function": ChatLevelFeaturesCalculator.other_lexical_features,
"dependencies": [ChatLevelFeaturesCalculator.text_based_features, ChatLevelFeaturesCalculator.lexical_features],
"preprocess": [preprocess_text_lowercase_but_retain_punctuation], # "message_lower_with_punc"
Expand All @@ -215,7 +215,7 @@
"semantic_grouping": "Variance",
"description": "The total number of function words used in a given turn that were also used in the previous turn. Function words are defined as a list of 190 words from the source paper.",
"references": "(Ranganath et al., 2013)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.9-Mimicry:-Function-word,-Content-word,-BERT,-Moving",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/function_word_accommodation.html",
"function": ChatLevelFeaturesCalculator.calculate_word_mimicry,
"dependencies": [],
"preprocess": [],
Expand All @@ -229,7 +229,7 @@
"semantic_grouping": "Variance",
"description": "The total number of non-function words used in a given turn that were also used in the previous turn, normalized by the inverse document frequency of each content word.",
"references": "(Ranganath et al., 2013)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.9-Mimicry:-Function-word,-Content-word,-BERT,-Moving",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/content_word_accommodation.html",
"function": ChatLevelFeaturesCalculator.calculate_word_mimicry,
"dependencies": [],
"preprocess": [],
Expand All @@ -243,7 +243,7 @@
"semantic_grouping": "Variance",
"description": "The cosine similarity of the SBERT vectors between the current utterance and the utterance in the previous turn.",
"references": "Inspired by accommodation (Matarazzo & Wiens, 1977), language style matching (Tausczik & Pennebaker, 2013) and synchrony (Niederhoffer & Pennebaker, 2002), and implemented in a manner similar to forward flow (Gray et al., 2019)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.9-Mimicry:-Function-word,-Content-word,-BERT,-Moving",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/mimicry_bert.html",
"function": ChatLevelFeaturesCalculator.calculate_vector_word_mimicry,
"dependencies": [],
"preprocess": [],
Expand All @@ -257,7 +257,7 @@
"semantic_grouping": "Variance",
"description": "The running average of all BERT Mimicry scores computed so far in a conversation. Captures the extent to which all participants in a conversation mimic each other up until a given point.",
"references": "Inspired by accommodation (Matarazzo & Wiens, 1977), language style matching (Tausczik & Pennebaker, 2013) and synchrony (Niederhoffer & Pennebaker, 2002), and implemented in a manner similar to forward flow (Gray et al., 2019)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.9-Mimicry:-Function-word,-Content-word,-BERT,-Moving",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/moving_mimicry.html",
"function": ChatLevelFeaturesCalculator.calculate_vector_word_mimicry,
"dependencies": [],
"preprocess": [],
Expand All @@ -271,7 +271,7 @@
"semantic_grouping": "Engagement",
"description": "Captures whether a speaker appears to “hedge” their statement and express lack of certainty; e.g., a score of 1 is assigned if hedge phrases (”I think,” “a little,” “maybe,” “possibly”) are present, and a score of 0 is assigned otherwise.",
"references": "(Ranganath et al., 2013; (Danescu-Niculescu-Mizil et al., 2013; Islam et al., 2020)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.9-Hedge",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/hedge.html",
"function": ChatLevelFeaturesCalculator.calculate_hedge_features,
"dependencies": [ChatLevelFeaturesCalculator.text_based_features, ChatLevelFeaturesCalculator.lexical_features],
"preprocess": [],
Expand All @@ -285,7 +285,7 @@
"semantic_grouping": "Content",
"description": "The extent to which a statement is “subjective” (containing personal information) or “objective” (containing factual information), as measured by TextBlob. Ranges from 0 (objective) to 1 (subjective).",
"references": "(Cao et al., 2021)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.18-TextBlob-Sentiment-Analysis-Features",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/textblob_subjectivity.html",
"function": ChatLevelFeaturesCalculator.calculate_textblob_sentiment,
"dependencies": [],
"preprocess": [],
Expand All @@ -299,7 +299,7 @@
"semantic_grouping": "Emotion",
"description": "The extent to which a statement is positive or negative; ranges from -1 (negative) to 1 (positive); neutrality is assigned a score of 0.",
"references": "(Cao et al., 2021)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.18-TextBlob-Sentiment-Analysis-Features",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/textblob_polarity.html",
"function": ChatLevelFeaturesCalculator.calculate_textblob_sentiment,
"dependencies": [],
"preprocess": [],
Expand All @@ -313,7 +313,7 @@
"semantic_grouping": "Emotion",
"description": "The relative extent to which an utterance is more (or less) positive, compared to other messages. Here, we use the BERT-assigned positivity score, and calculate two flavors of the z-score: the first scores the messages with respect to other messages in the same conversation; the second scores the messages with respect to all messages in the data.",
"references": "(Tausczik & Pennebaker, 2013)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/E.24-Positivity-(and-Positivity-z%E2%80%90score)",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/positivity_z_score.html",
"function": ChatLevelFeaturesCalculator.positivity_zscore,
"dependencies": [ChatLevelFeaturesCalculator.concat_bert_features],
"preprocess": [],
Expand All @@ -327,7 +327,7 @@
"semantic_grouping": "Content",
"description": "The reading level of the utterance, as calculated by the Dale-Chall Score.",
"references": "(Cao et al., 2021)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.18-Readability",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/dale_chall_score.html",
"function": ChatLevelFeaturesCalculator.get_dale_chall_score_and_classfication,
"dependencies": [],
"preprocess": [],
Expand All @@ -341,7 +341,7 @@
"semantic_grouping": "Pace",
"description": "The response time between successive utterances.",
"references": "(Reichel et al., 2015)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.13-Temporal-Features",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/time_difference.html",
"function": ChatLevelFeaturesCalculator.get_temporal_features,
"dependencies": [],
"preprocess": [],
Expand Down Expand Up @@ -377,7 +377,7 @@
"semantic_grouping": "Engagement",
"description": "A collection of conversational markers that indicates the use of politeness.",
"references": "(Danescu-Niculescu-Mizil et al., 2013)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/C.23-Politeness-(ConvoKit)",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/politeness_strategies.html",
"function": ChatLevelFeaturesCalculator.calculate_politeness_sentiment,
"dependencies": [],
"preprocess": [preprocess_text_lowercase_but_retain_punctuation], # "message_lower_with_punc"
Expand Down Expand Up @@ -431,7 +431,7 @@
"semantic_grouping": "Engagement",
"description": "A collection of conversational markers that indicates the use of politeness / receptiveness.",
"references": "(Yeomans et al., 2020)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/Politeness-V2",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/politeness_receptiveness_markers.html",
"function": ChatLevelFeaturesCalculator.calculate_politeness_v2,
"dependencies": [],
"preprocess": [preprocess_text_lowercase_but_retain_punctuation], # "message_lower_with_punc"
Expand Down Expand Up @@ -485,7 +485,7 @@
"semantic_grouping": "Content",
"description": "Calculates a number of metrics specific to communications in an online setting: 1. Num all caps: Number of words that are in all caps 2. Num links: Number of links to external resources 3. Num Reddit Users: Number of usernames referred to, in u/RedditUser format. 4. Num Emphasis: The number of times someone used **emphasis** in their message 5. Num Bullet Points: The number of bullet points used in a message. 6. Num Line Breaks: The number of line breaks in a message. 7. Num Quotes: The number of “quotes” in a message. 8. Num Block Quotes Responses: The number of times someone uses a block quote (”>”), indicating a longer quotation 9. Num Ellipses: The number of times someone uses ellipses (…) in their message 10. Num Parentheses: The number of sets of fully closed parenthetical statements in a message 11. Num Emoji: The number of emoticons in a message, e.g., “:)”",
"references": "New",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/Reddit-Tags",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/online_discussions_tags.html",
"function": ChatLevelFeaturesCalculator.get_reddit_features,
"dependencies": [],
"preprocess": [preprocess_text_lowercase_but_retain_punctuation], # "message_lower_with_punc"
Expand All @@ -500,7 +500,7 @@
"semantic_grouping": "Equality",
"description": "Calculates a metric describing the extent to which individuals take turns speaking in a conversation. Adapted from Almaatouq et al. (2023), in which we treat each separate chat as equivalent to an in-game “solution”: ”A group’s turn-taking index for a given round is measured by dividing the number of turns taken … by the total number of [chats] on a particular task instance.”",
"references": "(Almaatouq et al., 2023)",
"wiki_link": "https://github.com/Watts-Lab/team-process-map/wiki/Turn%E2%80%90Taking",
"wiki_link": "https://conversational-featurizer.readthedocs.io/en/latest/features_conceptual/turn_taking_index.html",
"function": ConversationLevelFeaturesCalculator.get_turn_taking_features,
"dependencies": [],
"preprocess": [],
Expand Down

0 comments on commit a63b273

Please sign in to comment.