# **Installing the packages**

In [1]:
! pip install pandas numpy plotly scikit-learn matplotlib torch lightning spacy torchtext==0.6
!python -m spacy download en_core_web_sm

Collecting lightning
  Downloading lightning-2.2.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
Collecting torchtext==0.6
  Downloading torchtext-0.6.0-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)

# **Importing the libraries**

In [2]:
# Mount the drive if not mounted
from google.colab import drive
drive.mount("/content/drive/")

import time
import re
import string
from collections import Counter

import numpy as np
import pandas as pd
from tqdm import tqdm
import spacy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, balanced_accuracy_score, classification_report

import torch
from torch import nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from torchtext import data
from torchtext.data import Field
from torchtext.data import Dataset, Example
from torchtext.data import BucketIterator
from torchtext.vocab import FastText
from torchtext.vocab import CharNGram

#import wandb
# wandb.login(relogin=True)

import warnings
warnings.filterwarnings('ignore')

Mounted at /content/drive/


In [3]:
# Load spaCy's English model
spacy_en = spacy.load('en_core_web_sm')

In [4]:
SEED = 42

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# **Loading and Pre-processing data**

In [5]:
df = pd.read_csv("/content/drive/MyDrive/AA-Tutorial/data/Agora.csv", encoding='ISO-8859-1')
# Renaming all the features of the dataframe
df = df.rename(str.strip, axis='columns')
# Merging the Item and Item Description using a [SEP] token
separator = ' [SEP] '
df['TEXT'] = df.apply(lambda row: f"{row['Item']}{separator}{row['Item Description']}", axis=1)
# dropping Unncessary columns
df.drop(columns=["Item", "Item Description", "Category", "Price", "Origin", "Destination", "Rating", "Remarks"], inplace=True)
# Assuming that vendors Amsterdam100 and amsterdam100 are the same vendors
df.Vendor = df.Vendor.apply(lambda x: x.lower())

Due to the extensive time required to train on over 100K+ samples, we have decided to limit our analysis to a subset of 5K samples. To get these samples, we look into vendors that have 5+ advertisements and then allocate all the vendors that have less than 5 ads into a new class, "others".

In [6]:
df = df.iloc[:5000]
# Calculate advertisement frequency for each vendor
ad_freq = df['Vendor'].value_counts()
# Filter vendors with ad frequency less than 5
vendors_to_replace = ad_freq[ad_freq < 5].index
# Update DataFrame: Replace vendor names with 'others' where ad frequency is less than 5
df['Vendor'] = df['Vendor'].apply(lambda x: 'others' if x in vendors_to_replace else x)
# Getting all unique vendor handles from the 'Vendor' column.
unique_vendors = df['Vendor'].unique()

In [7]:
# Creating a dictionary to map each unique vendor name to a unique integer.
# The `enumerate` function is used to generate integer indices starting from 0 for each unique label found in `df['Vendor']`.
# This effectively creates a label-to-index mapping.
vendor2idx = {l: i for i, l in enumerate(df['Vendor'].unique())}

# Applying the mapping to convert all categorical labels in 'label' column to integers.
# The `apply` method goes through each label in `df['label']`, and the lambda function uses the mapping `ltoi`
# to find the corresponding integer. The result is a column of integer labels.
df['Vendor'] = df['Vendor'].apply(lambda y: vendor2idx[y])

# **Splitting data**

In [8]:
train_df, temp_df = train_test_split(df, test_size=0.25, random_state=1111)
# Calculate the proportion of test size in the temporary dataset
test_size_in_temp = 0.20 / (0.20 + 0.05)
# Now split the temporary set into test and validation sets
test_df, val_df = train_test_split(temp_df, test_size=test_size_in_temp, random_state=1111)

print(f"Training set size: {len(train_df)}")
print(f"Test set size: {len(test_df)}")
print(f"Validation set size: {len(val_df)}")

Training set size: 3750
Test set size: 250
Validation set size: 1000


# **Pytorch Dataset**

The AgoraDataset class inherits from Dataset, intended to facilitate the creation of a dataset from a pandas DataFrame for use in training models with PyTorch, particularly in contexts where data is tabular and includes text fields that need processing similar to what's found in torchtext. The implementation seems to aim for compatibility with torchtext's data handling by utilizing the Example and Field abstractions, though it's not a direct usage pattern seen in PyTorch's Dataset.

In [9]:
class AgoraDataset(data.Dataset):
    # Constructor for initializing the dataset object.
    def __init__(self, df, fields, is_test=False, **kwargs):
        examples = []
        # Iterating over each row in the DataFrame to construct dataset examples.
        for i, row in df.iterrows():
            # Assigning 'Vendor' as the label for training/validation data, and None for test data.
            label = row.Vendor if not is_test else None
            # Extracting the text data from the row.
            text = row.TEXT
            # Creating an Example object for each row and appending it to the examples list.
            examples.append(data.Example.fromlist([text, label], fields))

        # Calling the constructor of the parent class (Dataset) with the examples and fields.
        super().__init__(examples, fields, **kwargs)

    # A static method to define the sort key used for sorting examples, based on their text length.
    @staticmethod
    def sort_key(ex):
        return len(ex.text)

    # A class method to create dataset splits for training, validation, and testing.
    @classmethod
    def splits(cls, fields, train_df, val_df=None, test_df=None, **kwargs):
        train_data, val_data, test_data = (None, None, None)

        # Creating dataset objects for training, validation, and testing dataframes if they are provided.
        if train_df is not None:
            train_data = cls(train_df.copy(), fields, **kwargs)
        if val_df is not None:
            val_data = cls(val_df.copy(), fields, **kwargs)
        if test_df is not None:
            test_data = cls(test_df.copy(), fields, False, **kwargs)

        # Returning the dataset objects as a tuple.
        return tuple(d for d in (train_data, val_data, test_data) if d is not None)

# **Preparing text and label fields**

This approach allows for the definition, preprocessing, and numericalization (token-to-index mapping) of text data, along with the integration of pre-trained word embeddings. For this project, we are using [Fasttext](https://fasttext.cc/) simple 300 dimension word embeddings.

--- 

### FastText Embedding
- **Word Representation:** FastText is an embedding technique that represents words in a high-dimensional space, capturing the semantic meaning of words by considering both the word itself and its subword components (n-grams).
- **Handling OOV Words:** One of the strengths of FastText is its ability to generate representations for out-of-vocabulary (OOV) words by using the embeddings of subword n-grams, making it robust in handling rare or unseen words in the training data.
- **Pre-trained Models:** FastText comes with pre-trained models on large corpora, allowing for the transfer of semantic knowledge to specific tasks without the need for extensive training data.

In [10]:
# Define the tokenization function that uses spaCy for English
def tokenize_en(text):
    # Tokenize the input text and return a list of tokens
    return [tok.text for tok in spacy_en.tokenizer(text)]

# Define fields for the text and label data, specifying how they should be processed
TEXT = data.Field(tokenize = tokenize_en, batch_first=True, include_lengths = True) # Process text data: tokenize and prepare batch
LABEL = data.LabelField(batch_first=True) # Process label data

# Define the structure of the dataset including fields
fields = [('text',TEXT), ('label',LABEL)]

# Split the dataset into training, validation, and test sets
train_ds, val_ds, test_ds = AgoraDataset.splits(fields, train_df=train_df, val_df=val_df, test_df=test_df)

# Build the vocabulary for the text field using the training dataset
TEXT.build_vocab(train_ds,
                 max_size = 100000, # Maximum size of the vocabulary
                 vectors = 'fasttext.simple.300d', # Use FastText pre-trained word embeddings
                 unk_init = torch.Tensor.zero_) # Initialize unknown tokens to zero

# Build the vocabulary for the label field using the training dataset
LABEL.build_vocab(train_ds)

# Output the size of the vocabulary for the text and label fields
print("Size of TEXT vocabulary:",len(TEXT.vocab)) # Number of unique tokens in text
print("Size of LABEL vocabulary:",len(LABEL.vocab)) # Number of unique tokens in label

# Output the 10 most common words in the vocabulary
print(TEXT.vocab.freqs.most_common(10)) # Commonly used words

.vector_cache/wiki.simple.vec: 293MB [00:01, 179MB/s]                           
100%|██████████| 111051/111051 [00:16<00:00, 6805.18it/s]


Size of TEXT vocabulary: 14801
Size of LABEL vocabulary: 153
[('*', 11139), (' ', 7792), ('.', 4329), ('[', 3892), (']', 3888), ('-', 3883), ('SEP', 3750), ('...', 3004), (':', 2600), ('the', 2398)]


# **Defining the model**

The BiGRUModel class utilizes a [Bidirectional Gated Recurrent Unit (BiGRU)](https://en.wikipedia.org/wiki/Gated_recurrent_unit) for processing sequences.  This class effectively integrates pre-trained Fasttext embeddings as a fixed (non-trainable) input representation, processes sequences with a BiGRU to capture temporal dependencies, and applies a linear transformation to produce outputs for classification. The architecture is designed for tasks where leveraging pre-trained word embeddings can significantly enhance model performance by providing a rich, pre-learned representation of the input vocabulary.

---

### Bidirectional GRU
- **Gated Recurrent Units (GRUs):** GRUs are a type of recurrent neural network (RNN) architecture that efficiently captures dependencies in sequential data, such as text, by processing input data both forwards and backwards.
- **Bidirectional Processing:** A Bidirectional GRU processes data in both directions (forward and backward), allowing it to capture context from both past and future within the sequence. This is especially beneficial for understanding the context and meaning in text data.
- **Handling Sequential Data:** Well-suited for tasks that involve sequential data, such as language modeling, text classification, and sentiment analysis, where the order of words is crucial for understanding the overall meaning.

### Bidirectional GRU with FastText Embedding Classifier
- **Combining Strengths:** By combining Bidirectional GRU with FastText embeddings, this classifier leverages both the contextual awareness of bidirectional processing and the rich semantic representations of FastText. This results in improved performance on text classification tasks.
- **Suitable for Complex Tasks:** Especially effective for complex natural language processing tasks that require an understanding of nuanced context and semantics, such as sentiment analysis, question answering, and topic classification.
- **Flexibility and Adaptability:** The approach is adaptable to various languages and domains, benefiting from FastText's robust handling of word representations and GRU's efficient processing of sequential data.

Overall, a Bidirectional GRU with FastText embedding classifier represents a powerful combination for tackling a wide range of text classification challenges, offering both deep contextual understanding and rich semantic representation of words.





In [11]:
class GRU_net(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
                 bidirectional, dropout, pad_idx):
        # Initialize the parent class (nn.Module)
        super().__init__()

        # Embedding layer to transform indices into dense vectors of a fixed size
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)

        # Replace LSTM with GRU
        self.rnn = nn.GRU(embedding_dim,
                          hidden_dim,
                          num_layers=n_layers,
                          bidirectional=bidirectional,
                          dropout=dropout,
                          batch_first=True)

        # Linear layer to map from hidden state space to hidden space
        self.fc1 = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, hidden_dim)

        # Linear layer to map from hidden space to output dimension
        self.fc2 = nn.Linear(hidden_dim, output_dim)

        # Dropout for regularization
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):
        # text: tensor of [batch size, sentence length]

        # Pass text through embedding layer
        embedded = self.embedding(text)
        # embedded: tensor of [batch size, sentence length, embedding dimension]

        # Pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.cpu(), batch_first=True)

        # Pass packed sequence through GRU
        packed_output, hidden = self.rnn(packed_embedded)

        # Unpack sequence (if needed, not done in this code as GRU's output is not directly used after unpacking)

        # Concatenate the final forward and backward hidden state
        if self.rnn.bidirectional:
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        else:
            hidden = self.dropout(hidden[-1,:,:])

        # Pass the output through the first fully connected layer
        output = self.fc1(hidden)
        # Apply dropout
        output = self.dropout(output)
        # Pass the output through the second fully connected layer
        output = self.fc2(output)

        # Return the final output
        return output

# **Loading Model**

In [37]:
# Set the batch size for training and evaluation
BATCH_SIZE = 32

# Determine the computing device based on the availability of CUDA
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create iterators for the training and validation datasets
train_iterator, valid_iterator = BucketIterator.splits(
    (train_ds, val_ds), # Datasets for training and validation
    batch_size = BATCH_SIZE, # Batch size for both datasets
    sort_within_batch = True, # Sort examples within each batch by their lengths
    device = device) # Specify the computing device

# Create an iterator for the test dataset
_, test_iterator = BucketIterator.splits(
    (train_ds, test_ds), # Reuse train_ds to keep the structure, but focus is on test_ds
    batch_size = BATCH_SIZE, # Batch size for the dataset
    sort_within_batch = True, # Sort examples within each batch by their lengths
    device = device) # Specify the computing device

# Set hyperparameters for the model training
num_epochs = 10 # Number of epochs to train for
learning_rate = 0.001 # Learning rate for the optimizer

# Define the model architecture parameters
INPUT_DIM = len(TEXT.vocab) # Vocabulary size
EMBEDDING_DIM = 300 # Size of each embedding vector
HIDDEN_DIM = 256 # Size of hidden layers
OUTPUT_DIM = 153 # Size of the output layer; Change this accordingly as you increase the size of the dataset
N_LAYERS = 2 # Number of recurrent layers
BIDIRECTIONAL = True # Use a bidirectional model
DROPOUT = 0.2 # Dropout rate for regularization
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] # Index of the padding token for embedding layer

# Instantiate the model with the specified architecture parameters
model = GRU_net(INPUT_DIM,
                EMBEDDING_DIM,
                HIDDEN_DIM,
                OUTPUT_DIM,
                N_LAYERS,
                BIDIRECTIONAL,
                DROPOUT,
                PAD_IDX)

# Print the model architecture for review
print(model)

# Function to count the number of trainable parameters in the model
def count_parameters(model):
    # Sum the number of elements in all parameters that require gradient computation
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Print the total number of trainable parameters in the model
print(f'The model has {count_parameters(model):,} trainable parameters')


GRU_net(
  (embedding): Embedding(14801, 300, padding_idx=1)
  (rnn): GRU(300, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (fc1): Linear(in_features=512, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=153, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)
The model has 6,650,757 trainable parameters


In [38]:
# Load pre-trained embeddings from the TEXT field's vocabulary into a variable
pretrained_embeddings = TEXT.vocab.vectors

# Copy the pre-trained embeddings into the model's embedding layer weights
model.embedding.weight.data.copy_(pretrained_embeddings)

# Initialize the embedding vector for the padding index (PAD_IDX) to all zeros
# This is done to ensure that the padding token does not contribute to the model's predictions
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

# **Helper functions**

In [39]:
def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    all_predictions = []
    all_labels = []

    model.train()

    for batch in iterator:
        text, text_lengths = batch.text

        optimizer.zero_grad()

        predictions = model(text, text_lengths)
        # For multi-class classification, predictions are not squeezed
        # predictions shape is [batch size, n_classes]

        loss = criterion(predictions, batch.label)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

        # Convert predictions to actual class numbers
        _, predicted_labels = torch.max(predictions, 1)

        # Collect all labels and predictions for metric calculation
        all_labels.extend(batch.label.cpu().numpy())
        all_predictions.extend(predicted_labels.cpu().numpy())

    # Calculate metrics for multi-class classification
    balanced_acc = balanced_accuracy_score(all_labels, all_predictions)
    weighted_f1 = f1_score(all_labels, all_predictions, average='weighted')
    micro_f1 = f1_score(all_labels, all_predictions, average='micro')
    macro_f1 = f1_score(all_labels, all_predictions, average='macro')

    return {
        "loss": epoch_loss / len(iterator),
        "balanced_accuracy": balanced_acc,
        "weighted_f1": weighted_f1,
        "micro_f1": micro_f1,
        "macro_f1": macro_f1
    }

In [40]:
def evaluate(model, iterator, criterion):
    # Initialize variables to accumulate loss and store predictions and labels
    epoch_loss = 0
    all_predictions = []
    all_labels = []

    # Set model to evaluation mode (turns off dropout and batch normalization)
    model.eval()

    # Disable gradient calculations to speed up the process
    with torch.no_grad():
        for batch in iterator:
            # Extract text and its lengths from the current batch
            text, text_lengths = batch.text

            # Generate predictions using the model
            predictions = model(text, text_lengths).squeeze(1)

            # Compute loss for the current batch
            loss = criterion(predictions, batch.label)

            # Accumulate the loss over all batches
            epoch_loss += loss.item()


            # Convert predictions to actual class numbers
            _, predicted_labels = torch.max(predictions, 1)

            # Collect all labels and predictions for metric calculation
            all_labels.extend(batch.label.cpu().numpy())
            all_predictions.extend(predicted_labels.cpu().numpy())

    # Calculate metrics using accumulated predictions and true labels
    balanced_acc = balanced_accuracy_score(all_labels, all_predictions)
    weighted_f1 = f1_score(all_labels, all_predictions, average='weighted')
    micro_f1 = f1_score(all_labels, all_predictions, average='micro')
    macro_f1 = f1_score(all_labels, all_predictions, average='macro')

    # Return loss and calculated metrics
    return {
        "loss": epoch_loss / len(iterator),
        "balanced_accuracy": balanced_acc,
        "weighted_f1": weighted_f1,
        "micro_f1": micro_f1,
        "macro_f1": macro_f1
    }

# **Training Model**

In [41]:
# Record start time
t = time.time()

# Initialize the best validation loss to infinity
best_valid_loss = float('inf')

# Move the model to the appropriate device (GPU or CPU)
model.to(device)

# Define the loss function
criterion = nn.CrossEntropyLoss()
batch
# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training and evaluation loop
for epoch in range(num_epochs):
    # Train the model and retrieve metrics
    train_metrics = train(model, train_iterator, optimizer, criterion)

    # Evaluate the model on the validation set and retrieve metrics
    valid_metrics = evaluate(model, valid_iterator, criterion)

    # Print training and validation metrics
    print(f'Epoch: {epoch+1}')
    print(f'\tTrain Loss: {train_metrics["loss"]:.4f} | Train Balanced Acc: {train_metrics["balanced_accuracy"]:.4f}')
    print(f'\tValid Loss: {valid_metrics["loss"]:.4f} | Valid Balanced Acc: {valid_metrics["balanced_accuracy"]:.4f}')
    print(f'\tTrain Weighted F1: {train_metrics["weighted_f1"]:.4f} | Valid Weighted F1: {valid_metrics["weighted_f1"]:.4f}')
    print(f'\tTrain Micro F1: {train_metrics["micro_f1"]:.4f} | Valid Micro F1: {valid_metrics["micro_f1"]:.4f}')
    print(f'\tTrain Macro F1: {train_metrics["macro_f1"]:.4f} | Valid Macro F1: {valid_metrics["macro_f1"]:.4f}')

    # Update lists to track loss and accuracy (if necessary for later analysis)
    # loss.append(train_metrics["loss"])
    # acc.append(train_metrics["balanced_accuracy"])
    # val_acc.append(valid_metrics["balanced_accuracy"])

    # Check if the current model is the best one based on validation loss
    # if valid_metrics["loss"] < best_valid_loss:
        # best_valid_loss = valid_metrics["loss"]
        # Save the current best model
        # torch.save(model.state_dict(), 'best_model.pt')

# Calculate and print the total time taken for training and evaluation
print(f'Time taken: {time.time()-t:.3f} seconds')

Epoch: 1
	Train Loss: 3.8779 | Train Balanced Acc: 0.0317
	Valid Loss: 2.8381 | Valid Balanced Acc: 0.1062
	Train Weighted F1: 0.1368 | Valid Weighted F1: 0.2793
	Train Micro F1: 0.1971 | Valid Micro F1: 0.3850
	Train Macro F1: 0.0292 | Valid Macro F1: 0.0753
Epoch: 2
	Train Loss: 2.2533 | Train Balanced Acc: 0.1625
	Valid Loss: 1.9536 | Valid Balanced Acc: 0.2607
	Train Weighted F1: 0.4234 | Valid Weighted F1: 0.4969
	Train Micro F1: 0.4912 | Valid Micro F1: 0.5600
	Train Macro F1: 0.1497 | Valid Macro F1: 0.2192
Epoch: 3
	Train Loss: 1.2362 | Train Balanced Acc: 0.3692
	Valid Loss: 1.7194 | Valid Balanced Acc: 0.4139
	Train Weighted F1: 0.6604 | Valid Weighted F1: 0.5899
	Train Micro F1: 0.6949 | Valid Micro F1: 0.6150
	Train Macro F1: 0.3609 | Valid Macro F1: 0.3760
Epoch: 4
	Train Loss: 0.5824 | Train Balanced Acc: 0.6204
	Valid Loss: 1.8392 | Valid Balanced Acc: 0.5031
	Train Weighted F1: 0.8353 | Valid Weighted F1: 0.6169
	Train Micro F1: 0.8461 | Valid Micro F1: 0.6320
	Train Ma

# **Testing on Test Dataset**

In [42]:
evaluate(model, test_iterator, criterion)

{'loss': 1.9752449542284012,
 'balanced_accuracy': 0.667471480425201,
 'weighted_f1': 0.7113831447711159,
 'micro_f1': 0.708,
 'macro_f1': 0.5553573957836322}

# **Loading the Results Dataframe**

In [43]:
results_df = pd.read_csv("/content/drive/MyDrive/AA-Tutorial/data/results.csv")

In [45]:
results_df["Bi-GRU"] = [0.6674714, 0.7113831, 0.708, 0.5553573]

In [46]:
results_df

Unnamed: 0,Metrics,MultinomialNB,SVC,RandomForest,LogisticRegression,MLP,All-miniLM,DistilBERT,DistilRoBERTa,Bi-GRU
0,Accuracy,0.331297,0.682322,0.412122,0.775756,0.676682,0.647554,0.778432,0.782053,0.667471
1,Weighted-F1,0.524405,0.708481,0.34429,0.662284,0.752288,0.682059,0.794814,0.801566,0.711383
2,Micro-F1,0.596,0.7,0.344,0.668,0.756,0.703,0.808,0.811,0.708
3,Macro-F1,0.337436,0.671945,0.283832,0.590027,0.646615,0.552629,0.70647,0.707506,0.555357
