|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [ |
| 8 | + { |
| 9 | + "name": "stdout", |
| 10 | + "output_type": "stream", |
| 11 | + "text": [ |
| 12 | + "Intel MKL WARNING: Support of Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) enabled only processors has been deprecated. Intel oneAPI Math Kernel Library 2025.0 will require Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions.\n", |
| 13 | + "Intel MKL WARNING: Support of Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) enabled only processors has been deprecated. Intel oneAPI Math Kernel Library 2025.0 will require Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions.\n" |
| 14 | + ] |
| 15 | + }, |
| 16 | + { |
| 17 | + "name": "stderr", |
| 18 | + "output_type": "stream", |
| 19 | + "text": [ |
| 20 | + "/Users/andrew/miniconda3/envs/python310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
| 21 | + " from .autonotebook import tqdm as notebook_tqdm\n" |
| 22 | + ] |
| 23 | + } |
| 24 | + ], |
| 25 | + "source": [ |
| 26 | + "import re\n", |
| 27 | + "import json\n", |
| 28 | + "import numpy as np\n", |
| 29 | + "\n", |
| 30 | + "from pathlib import Path\n", |
| 31 | + "from datasets import load_dataset" |
| 32 | + ] |
| 33 | + }, |
| 34 | + { |
| 35 | + "cell_type": "code", |
| 36 | + "execution_count": 2, |
| 37 | + "metadata": {}, |
| 38 | + "outputs": [], |
| 39 | + "source": [ |
| 40 | + "\"\"\"\n", |
| 41 | + "Simple evaluator that handles\n", |
| 42 | + "1. Load benchmark data\n", |
| 43 | + "2. Parse prediction string\n", |
| 44 | + "3. Evaluate success based on `delta` parameter\n", |
| 45 | + "\"\"\"\n", |
| 46 | + "\n", |
| 47 | + "class QSpatialEvaluator:\n", |
| 48 | + " delta = 2\n", |
| 49 | + " \n", |
| 50 | + " def __init__(self, benchmark_split):\n", |
| 51 | + " assert benchmark_split in [\"QSpatial_plus\", \"QSpatial_scannet\"]\n", |
| 52 | + " self.dataset = load_dataset(\"andrewliao11/Q-Spatial-Bench\", split=benchmark_split)\n", |
| 53 | + " \n", |
| 54 | + " def evaluate(self, data_ind, vlm_response):\n", |
| 55 | + " \n", |
| 56 | + " #### Parse ground truth\n", |
| 57 | + " value = self.dataset[\"answer_value\"][data_ind]\n", |
| 58 | + " unit = self.dataset[\"answer_unit\"][data_ind]\n", |
| 59 | + " ground_truth_value_in_cms = value * self._get_multiplier(unit)\n", |
| 60 | + "\n", |
| 61 | + " #### Parse prediction\n", |
| 62 | + " # Value\n", |
| 63 | + " pattern = r'scalar{([^}]*)}'\n", |
| 64 | + " str_inside_scalar_boxes = re.findall(pattern, vlm_response)[-1]\n", |
| 65 | + " scalar_list = re.findall(r'\\d+\\.?\\d*', str_inside_scalar_boxes)\n", |
| 66 | + " parsed_scalar = np.array(scalar_list).astype(float).mean()\n", |
| 67 | + "\n", |
| 68 | + " # Unit\n", |
| 69 | + " pattern = r'distance_unit{([^}]*)}'\n", |
| 70 | + " str_inside_unit_boxes = re.findall(pattern, vlm_response)\n", |
| 71 | + " parsed_unit = str_inside_unit_boxes[-1]\n", |
| 72 | + "\n", |
| 73 | + " pred_value_in_cms = parsed_scalar * self._get_multiplier(parsed_unit)\n", |
| 74 | + " success = max(pred_value_in_cms / ground_truth_value_in_cms, ground_truth_value_in_cms / pred_value_in_cms) < self.delta\n", |
| 75 | + "\n", |
| 76 | + " return dict(\n", |
| 77 | + " ground_truth_value_in_cms = ground_truth_value_in_cms,\n", |
| 78 | + " pred_value_in_cms = pred_value_in_cms,\n", |
| 79 | + " success = success\n", |
| 80 | + " )\n", |
| 81 | + " \n", |
| 82 | + " def _get_multiplier(self, unit):\n", |
| 83 | + " \n", |
| 84 | + " unit = unit.lower()\n", |
| 85 | + " if unit in [\"meters\", \"meter\", \"m\", \"metre\", \"metres\"]:\n", |
| 86 | + " multiplier = 100\n", |
| 87 | + " elif unit in [\"centimeters\", \"centimeter\", \"cm\"]:\n", |
| 88 | + " multiplier = 1\n", |
| 89 | + " elif unit in [\"feet\", \"foot\", \"ft\"]:\n", |
| 90 | + " multiplier = 30.48\n", |
| 91 | + " elif unit in [\"inch\", \"inches\", \"in\"]:\n", |
| 92 | + " multiplier = 2.54\n", |
| 93 | + " elif unit in [\"mm\"]:\n", |
| 94 | + " multiplier = 0.1\n", |
| 95 | + " else: \n", |
| 96 | + " #raise ValueError(f\"Unknown unit: {unit}\")\n", |
| 97 | + " print(f\"Unknown unit: {unit}\")\n", |
| 98 | + " multiplier = 1\n", |
| 99 | + " \n", |
| 100 | + " return multiplier" |
| 101 | + ] |
| 102 | + }, |
| 103 | + { |
| 104 | + "cell_type": "code", |
| 105 | + "execution_count": 3, |
| 106 | + "metadata": {}, |
| 107 | + "outputs": [], |
| 108 | + "source": [ |
| 109 | + "evaluator = QSpatialEvaluator(benchmark_split=\"QSpatial_plus\")" |
| 110 | + ] |
| 111 | + }, |
| 112 | + { |
| 113 | + "cell_type": "code", |
| 114 | + "execution_count": 4, |
| 115 | + "metadata": {}, |
| 116 | + "outputs": [ |
| 117 | + { |
| 118 | + "name": "stdout", |
| 119 | + "output_type": "stream", |
| 120 | + "text": [ |
| 121 | + "To determine the minimum distance between the two speckled pattern stool chairs in the image, let's follow these steps:\\n\\n1. **Identify the Stools**: Locate the two speckled pattern stools in the image. They are positioned in front of the couches.\\n\\n2. **Reference Points**: Choose reference points on each stool to measure the distance. The closest points on the stools would be the edges facing each other.\\n\\n3. **Estimate the Distance**: Visually estimate the distance between these two closest points. Given the perspective and the relative size of the stools, we can approximate the distance.\\n\\nConsidering the size of the stools and the space between them, the minimum distance between the two speckled pattern stool chairs is approximately:\\n\\n\\\\scalar{1} \\\\distance_unit{meter}\n", |
| 122 | + "\n", |
| 123 | + "Evaluation: {'ground_truth_value_in_cms': 96.0, 'pred_value_in_cms': 100.0, 'success': True}\n" |
| 124 | + ] |
| 125 | + } |
| 126 | + ], |
| 127 | + "source": [ |
| 128 | + "# Example VLM responses from GPT-4o\n", |
| 129 | + "vlm_response = \"To determine the minimum distance between the two speckled pattern stool chairs in the image, let's follow these steps:\\\\n\\\\n1. **Identify the Stools**: Locate the two speckled pattern stools in the image. They are positioned in front of the couches.\\\\n\\\\n2. **Reference Points**: Choose reference points on each stool to measure the distance. The closest points on the stools would be the edges facing each other.\\\\n\\\\n3. **Estimate the Distance**: Visually estimate the distance between these two closest points. Given the perspective and the relative size of the stools, we can approximate the distance.\\\\n\\\\nConsidering the size of the stools and the space between them, the minimum distance between the two speckled pattern stool chairs is approximately:\\\\n\\\\n\\\\\\\\scalar{1} \\\\\\\\distance_unit{meter}\\n\"\n", |
| 130 | + "\n", |
| 131 | + "print(vlm_response)\n", |
| 132 | + "print(\"Evaluation:\", evaluator.evaluate(data_ind=41, vlm_response=vlm_response))" |
| 133 | + ] |
| 134 | + }, |
| 135 | + { |
| 136 | + "cell_type": "code", |
| 137 | + "execution_count": null, |
| 138 | + "metadata": {}, |
| 139 | + "outputs": [], |
| 140 | + "source": [] |
| 141 | + } |
| 142 | + ], |
| 143 | + "metadata": { |
| 144 | + "kernelspec": { |
| 145 | + "display_name": "multi_rounds_vlm", |
| 146 | + "language": "python", |
| 147 | + "name": "python3" |
| 148 | + }, |
| 149 | + "language_info": { |
| 150 | + "codemirror_mode": { |
| 151 | + "name": "ipython", |
| 152 | + "version": 3 |
| 153 | + }, |
| 154 | + "file_extension": ".py", |
| 155 | + "mimetype": "text/x-python", |
| 156 | + "name": "python", |
| 157 | + "nbconvert_exporter": "python", |
| 158 | + "pygments_lexer": "ipython3", |
| 159 | + "version": "3.10.14" |
| 160 | + } |
| 161 | + }, |
| 162 | + "nbformat": 4, |
| 163 | + "nbformat_minor": 2 |
| 164 | +} |
0 commit comments