Gemaakt met Colaboratory

NielsRogge · NielsRogge · commit 272f550392a1 · 2024-03-30T16:01:37.000+01:00
diff --git a/BEiT/Understanding_BeitForMaskedImageModeling.ipynb b/BEiT/Understanding_BeitForMaskedImageModeling.ipynb
@@ -5,8 +5,7 @@
     "colab": {
       "name": "Understanding BeitForMaskedImageModeling.ipynb",
       "provenance": [],
-      "collapsed_sections": [],
-      "authorship_tag": "ABX9TyML+527/GMXCF12tSDUgQFX",
+      "authorship_tag": "ABX9TyMNKksEST+khtV9qo1CbZT9",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -737,7 +736,7 @@
       "source": [
         "!pip install -q transformers"
       ],
-      "execution_count": 8,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -765,7 +764,7 @@
       "source": [
         "!git clone https://github.com/microsoft/unilm.git"
       ],
-      "execution_count": 2,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -790,7 +789,7 @@
       "source": [
         "!pip install -q einops"
       ],
-      "execution_count": 3,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -805,7 +804,7 @@
       "source": [
         "!pip install -q DALL-E"
       ],
-      "execution_count": 4,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -836,7 +835,7 @@
       "source": [
         "%cd unilm/beit"
       ],
-      "execution_count": 5,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -877,7 +876,7 @@
         "\n",
         "image"
       ],
-      "execution_count": 6,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -917,15 +916,15 @@
         "outputId": "02c13ee1-11ba-4157-f15d-2d23162b66e7"
       },
       "source": [
-        "from transformers import BeitFeatureExtractor\n",
+        "from transformers import BeitImageProcessor\n",
         "\n",
-        "feature_extractor = BeitFeatureExtractor()\n",
+        "image_processor = BeitImageProcessor()\n",
         "\n",
         "# create input 1 (pixel_values)\n",
-        "pixel_values = feature_extractor(image, return_tensors=\"pt\").pixel_values\n",
+        "pixel_values = image_processor(image, return_tensors=\"pt\").pixel_values\n",
         "pixel_values.shape"
       ],
-      "execution_count": 9,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -961,7 +960,7 @@
         "pixel_values_dall_e = visual_token_transform(image).unsqueeze(0)\n",
         "pixel_values_dall_e.shape"
       ],
-      "execution_count": 10,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -985,7 +984,7 @@
         "!mkdir -p dall_e_tokenizer\n",
         "!wget -o dall_e_tokenizer/encoder.pkl https://cdn.openai.com/dall-e/encoder.pkl"
       ],
-      "execution_count": 11,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1027,7 +1026,7 @@
         "\n",
         "model = BeitForMaskedImageModeling.from_pretrained(\"microsoft/beit-base-patch16-224-pt22k\")"
       ],
-      "execution_count": 12,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -1079,7 +1078,7 @@
         "            min_num_patches=min_mask_patches_per_block,\n",
         "        )"
       ],
-      "execution_count": 13,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1094,7 +1093,7 @@
         "bool_masked_pos = mask_generator()\n",
         "bool_masked_pos = torch.from_numpy(bool_masked_pos).unsqueeze(0)"
       ],
-      "execution_count": 34,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1109,7 +1108,7 @@
       "source": [
         "bool_masked_pos.shape"
       ],
-      "execution_count": 35,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1129,14 +1128,14 @@
         "id": "fFad_m_s41Ru"
       },
       "source": [
-        "from dall_e import map_pixels, load_model \n",
+        "from dall_e import map_pixels, load_model\n",
         "import torch\n",
         "\n",
         "# step 2: get input_ids from OpenAI's DALL-E\n",
         "device = torch.device('cpu')\n",
         "encoder = load_model(\"https://cdn.openai.com/dall-e/encoder.pkl\", device)"
       ],
-      "execution_count": 16,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1151,7 +1150,7 @@
       "source": [
         "pixel_values_dall_e.shape"
       ],
-      "execution_count": 17,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1177,7 +1176,7 @@
         "    bool_masked_pos = bool_masked_pos.flatten(1).to(torch.bool)\n",
         "    labels = input_ids[bool_masked_pos]"
       ],
-      "execution_count": 37,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1192,7 +1191,7 @@
       "source": [
         "input_ids.shape"
       ],
-      "execution_count": 38,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1218,7 +1217,7 @@
       "source": [
         "labels.shape"
       ],
-      "execution_count": 39,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1249,7 +1248,7 @@
       "source": [
         "outputs = model(pixel_values, bool_masked_pos)"
       ],
-      "execution_count": 40,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1264,7 +1263,7 @@
       "source": [
         "labels"
       ],
-      "execution_count": 41,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1292,7 +1291,7 @@
       "source": [
         "predictions = outputs.logits[bool_masked_pos].argmax(-1)"
       ],
-      "execution_count": 42,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1307,7 +1306,7 @@
       "source": [
         "predictions"
       ],
-      "execution_count": 43,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1356,13 +1355,13 @@
         "# prepare for model (simply resize + normalize)\n",
         "mean = (0.5, 0.5, 0.5)\n",
         "std = (0.5, 0.5, 0.5)\n",
-        "transform = transforms.Compose([transforms.Resize((224, 224)), \n",
-        "                                transforms.ToTensor(), \n",
+        "transform = transforms.Compose([transforms.Resize((224, 224)),\n",
+        "                                transforms.ToTensor(),\n",
         "                                transforms.Normalize(mean, std)])\n",
         "pixel_values = transform(image).unsqueeze(0)\n",
         "pixel_values.shape"
       ],
-      "execution_count": 25,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1388,7 +1387,7 @@
       "source": [
         "pixel_values[0,:3,:3,:3]"
       ],
-      "execution_count": 26,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1426,7 +1425,7 @@
         "# forward pass\n",
         "outputs = model(pixel_values, bool_masked_pos)"
       ],
-      "execution_count": 27,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1441,7 +1440,7 @@
       "source": [
         "outputs.logits.shape"
       ],
-      "execution_count": 28,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1467,7 +1466,7 @@
       "source": [
         "outputs.logits[bool_masked_pos][:3,:3]"
       ],
-      "execution_count": 29,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1488,9 +1487,7 @@
       "metadata": {
         "id": "D-8mymKCOQZK"
       },
-      "source": [
-        ""
-      ],
+      "source": [],
       "execution_count": null,
       "outputs": []
     }