new splits

uhh-lt · Sep 20, 2024 · f53b782 · f53b782
1 parent e5f64c8
commit f53b782
Show file tree

Hide file tree

Showing 22 changed files with 13,159 additions and 12,039 deletions.
diff --git a/CompUGE-Research/Baselines/Object-and-Aspect-Identification/testing_results/analysis.ipynb b/CompUGE-Research/Baselines/Object-and-Aspect-Identification/testing_results/analysis.ipynb
diff --git a/...earch/Baselines/Object-and-Aspect-Identification/testing_results_predsless/analysis.ipynb b/...earch/Baselines/Object-and-Aspect-Identification/testing_results_predsless/analysis.ipynb
@@ -9,8 +9,49 @@
    },
    "outputs": [],
    "source": [
-    ""
+    "import pandas as pd\n",
+    "\n",
+    "metrics = [\n",
+    "    pd.read_csv(f'{folder}/metrics.csv') for folder in ['dslim-bert-base-NER-uncased', 'FacebookAI-roberta-base', 'google-bert-bert-base-uncased', 'microsoft-deberta-v3-base']\n",
+    "    ]\n",
+    "# header of each metrics file: training on,tested on,model,accuracy,precision,recall,f1\n",
+    "# make a new metrics file, with training on, testedon, average accuracy, average precision, average recall, average f1\n",
+    "# sort by f1 score\n",
+    "# remove model column\n",
+    "metrics = pd.concat(metrics)\n",
+    "metrics = metrics.drop(columns=['model'])\n",
+    "metrics = metrics.groupby(['training on', 'tested on']).mean().reset_index()\n",
+    "metrics = metrics.sort_values(by='f1', ascending=False)\n",
+    "\n",
+    "# save the metrics file\n",
+    "metrics.to_csv('metrics.csv', index=False)"
    ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "metrics = pd.read_csv('microsoft-deberta-v3-base/metrics.csv')\n",
+    "\n",
+    "# keep only columns that contain f1 \n",
+    "metrics = metrics[metrics.columns[metrics.columns.str.contains('f1|training on|tested on')]]\n",
+    "\n",
+    "# remove all rows that are tested on \"qi_all\"\n",
+    "metrics = metrics[~metrics['tested on'].str.contains('oai_all')]\n",
+    "\n",
+    "# remove rows where training on and tested on are the same\n",
+    "#metrics = metrics[metrics['training on'] != metrics['tested on']]\n",
+    "\n",
+    "# keep only rows where tested on contains either beloucif or webis\n",
+    "metrics = metrics[metrics['tested on'].str.contains('chekalina')]\n",
+    "\n",
+    "metrics"
+   ],
+   "id": "a3ecfcbbce10c02c"
   }
  ],
  "metadata": {

diff --git a/CompUGE-Research/Baselines/Stance-Classification/testing_results_new/analysis.ipynb b/CompUGE-Research/Baselines/Stance-Classification/testing_results_new/analysis.ipynb
@@ -6,8 +6,8 @@
    "metadata": {
     "collapsed": true,
     "ExecuteTime": {
-     "end_time": "2024-09-09T21:16:07.715629Z",
-     "start_time": "2024-09-09T21:16:07.705234Z"
+     "end_time": "2024-09-09T23:35:24.833513Z",
+     "start_time": "2024-09-09T23:35:24.814503Z"
     }
    },
    "source": [
@@ -21,31 +21,198 @@
     "# sort by f1 score\n",
     "# remove model column\n",
     "metrics = pd.concat(metrics)\n",
-    "metrics = metrics.drop(columns=['model'])\n",
-    "metrics = metrics.groupby(['training on', 'tested on']).mean().reset_index()\n",
-    "metrics = metrics.sort_values(by='f1', ascending=False)\n",
     "\n",
-    "# remove rows where tested on is sc_all\n",
-    "metrics = metrics[metrics['tested on'] != 'sc_all']\n",
-    "\n"
+    "# keep only rows where tested on is sc_all\n",
+    "metrics = metrics[metrics['tested on'] == 'sc_all']\n",
+    "\n",
+    "# remove tested on column\n",
+    "metrics = metrics.drop(columns='tested on')\n",
+    "\n",
+    "# round to 2 decimal places\n",
+    "metrics = metrics.round(2)\n",
+    "\n",
+    "metrics"
    ],
-   "outputs": [],
-   "execution_count": 7
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "       training on        model  accuracy  precision  recall    f1\n",
+       "2    sc_webis_2022   FacebookAI      0.09       0.01    0.09  0.02\n",
+       "5   sc_compsent_19   FacebookAI      0.88       0.87    0.88  0.88\n",
+       "8           sc_all   FacebookAI      0.88       0.88    0.88  0.88\n",
+       "2    sc_webis_2022  google-bert      0.44       0.50    0.44  0.46\n",
+       "5   sc_compsent_19  google-bert      0.86       0.85    0.86  0.86\n",
+       "8           sc_all  google-bert      0.87       0.87    0.87  0.86\n",
+       "11   sc_webis_2022  google-bert      0.70       0.58    0.70  0.59\n",
+       "14  sc_compsent_19  google-bert      0.86       0.85    0.86  0.86\n",
+       "17          sc_all  google-bert      0.87       0.87    0.87  0.86\n",
+       "2    sc_webis_2022    microsoft      0.70       0.51    0.70  0.59\n",
+       "5   sc_compsent_19    microsoft      0.89       0.89    0.89  0.89\n",
+       "8           sc_all    microsoft      0.89       0.89    0.89  0.89"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>training on</th>\n",
+       "      <th>model</th>\n",
+       "      <th>accuracy</th>\n",
+       "      <th>precision</th>\n",
+       "      <th>recall</th>\n",
+       "      <th>f1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>sc_webis_2022</td>\n",
+       "      <td>FacebookAI</td>\n",
+       "      <td>0.09</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.09</td>\n",
+       "      <td>0.02</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>sc_compsent_19</td>\n",
+       "      <td>FacebookAI</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.88</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>sc_all</td>\n",
+       "      <td>FacebookAI</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.88</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>sc_webis_2022</td>\n",
+       "      <td>google-bert</td>\n",
+       "      <td>0.44</td>\n",
+       "      <td>0.50</td>\n",
+       "      <td>0.44</td>\n",
+       "      <td>0.46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>sc_compsent_19</td>\n",
+       "      <td>google-bert</td>\n",
+       "      <td>0.86</td>\n",
+       "      <td>0.85</td>\n",
+       "      <td>0.86</td>\n",
+       "      <td>0.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>sc_all</td>\n",
+       "      <td>google-bert</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>0.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>sc_webis_2022</td>\n",
+       "      <td>google-bert</td>\n",
+       "      <td>0.70</td>\n",
+       "      <td>0.58</td>\n",
+       "      <td>0.70</td>\n",
+       "      <td>0.59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>sc_compsent_19</td>\n",
+       "      <td>google-bert</td>\n",
+       "      <td>0.86</td>\n",
+       "      <td>0.85</td>\n",
+       "      <td>0.86</td>\n",
+       "      <td>0.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>sc_all</td>\n",
+       "      <td>google-bert</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>0.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>sc_webis_2022</td>\n",
+       "      <td>microsoft</td>\n",
+       "      <td>0.70</td>\n",
+       "      <td>0.51</td>\n",
+       "      <td>0.70</td>\n",
+       "      <td>0.59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>sc_compsent_19</td>\n",
+       "      <td>microsoft</td>\n",
+       "      <td>0.89</td>\n",
+       "      <td>0.89</td>\n",
+       "      <td>0.89</td>\n",
+       "      <td>0.89</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>sc_all</td>\n",
+       "      <td>microsoft</td>\n",
+       "      <td>0.89</td>\n",
+       "      <td>0.89</td>\n",
+       "      <td>0.89</td>\n",
+       "      <td>0.89</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 20
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-09-09T21:16:08.395155Z",
-     "start_time": "2024-09-09T21:16:08.390014Z"
+     "end_time": "2024-09-09T23:36:13.622553Z",
+     "start_time": "2024-09-09T23:36:13.607983Z"
     }
    },
    "cell_type": "code",
    "source": [
-    "# convert table to rows training on, columns tested on, values f1\n",
-    "metrics = metrics.pivot(index='training on', columns='tested on', values='f1')\n",
+    "# group by model, take the mean of each metric\n",
+    "metrics = metrics.groupby(['training on', 'model']).mean().reset_index()\n",
     "\n",
-    "# round to 2 decimal places\n",
-    "metrics = metrics.round(2)\n",
+    "# convert table to rows training on, columns model, values f1\n",
+    "metrics = metrics.pivot(index='training on', columns='model', values='f1')\n",
     "\n",
     "metrics"
    ],
@@ -54,11 +221,11 @@
     {
      "data": {
       "text/plain": [
-       "tested on       sc_compsent_19  sc_webis_2022\n",
-       "training on                                  \n",
-       "sc_all                    0.89           0.53\n",
-       "sc_compsent_19            0.89           0.42\n",
-       "sc_webis_2022             0.42           0.36"
+       "model           FacebookAI  google-bert  microsoft\n",
+       "training on                                       \n",
+       "sc_all                0.88        0.860       0.89\n",
+       "sc_compsent_19        0.88        0.860       0.89\n",
+       "sc_webis_2022         0.02        0.525       0.59"
       ],
       "text/html": [
        "<div>\n",
@@ -78,43 +245,48 @@
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
-       "      <th>tested on</th>\n",
-       "      <th>sc_compsent_19</th>\n",
-       "      <th>sc_webis_2022</th>\n",
+       "      <th>model</th>\n",
+       "      <th>FacebookAI</th>\n",
+       "      <th>google-bert</th>\n",
+       "      <th>microsoft</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>training on</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>sc_all</th>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.860</td>\n",
        "      <td>0.89</td>\n",
-       "      <td>0.53</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>sc_compsent_19</th>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.860</td>\n",
        "      <td>0.89</td>\n",
-       "      <td>0.42</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>sc_webis_2022</th>\n",
-       "      <td>0.42</td>\n",
-       "      <td>0.36</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.525</td>\n",
+       "      <td>0.59</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 8
+   "execution_count": 22
   },
   {
    "metadata": {},

diff --git a/...ch/Raw Datasets/Beloucif/Labeling Task/SeqDataSetAllData_TestDataExcluded_Flair_train.tsv b/...ch/Raw Datasets/Beloucif/Labeling Task/SeqDataSetAllData_TestDataExcluded_Flair_train.tsv
@@ -48632,6 +48632,10 @@ plastic	OBJ-2
 bottle	OBJ-2
 ?	O
 
+
+
+
+
 Why	O
 is	O
 the	O

diff --git a/CompUGE-Research/Raw Datasets/Chekalina-OAI/test.tsv b/CompUGE-Research/Raw Datasets/Chekalina-OAI/test.tsv
@@ -294,6 +294,15 @@ than	O
 ios	B-Object
 .	O
 
+
+
+
+
+
+
+
+
+
 oh	O
 and	O
 by	O

diff --git a/...UGE-Research/Raw Datasets/Webis-CompQuestions-22-2/comparative-questions-parsing/full.tsv b/...UGE-Research/Raw Datasets/Webis-CompQuestions-22-2/comparative-questions-parsing/full.tsv
@@ -200,6 +200,7 @@ sentence_id	words	labels
 16	the	ASP
 16	spanish	ASP
 16	language	ASP
+
 17	what	O
 17	foods	OBJ
 17	burn	ASP
@@ -209,6 +210,13 @@ sentence_id	words	labels
 17	blast	ASP
 17	that	ASP
 17	fat	ASP
+
+
+
+
+
+
+
 18	what	O
 18	are	O
 18	the	O
-Original file line number
+Diff line change
@@ Expand Up / @@ -48632,6 +48632,10 @@ plastic OBJ-2 @@
     bottle	OBJ-2
     ?	O
     Why	O
     is	O
     the	O
@@ Expand Down @@