|
173 | 173 | "outputs": [], |
174 | 174 | "source": [ |
175 | 175 | "# Load reference storage configuration from environment\n", |
176 | | - "REFERENCE_DOC_PATH = os.getenv(\"REFERENCE_DOC_PATH\")\n", |
| 176 | + "reference_doc_path = os.getenv(\"REFERENCE_DOC_PATH\")\n", |
177 | 177 | "\n", |
178 | | - "REFERENCE_DOC_SAS_URL = os.getenv(\"REFERENCE_DOC_SAS_URL\")\n", |
179 | | - "if not REFERENCE_DOC_SAS_URL:\n", |
| 178 | + "reference_doc_sas_url = os.getenv(\"REFERENCE_DOC_SAS_URL\")\n", |
| 179 | + "if not reference_doc_sas_url:\n", |
180 | 180 | " REFERENCE_DOC_STORAGE_ACCOUNT_NAME = os.getenv(\"REFERENCE_DOC_STORAGE_ACCOUNT_NAME\")\n", |
181 | 181 | " REFERENCE_DOC_CONTAINER_NAME = os.getenv(\"REFERENCE_DOC_CONTAINER_NAME\")\n", |
182 | 182 | " if REFERENCE_DOC_STORAGE_ACCOUNT_NAME and REFERENCE_DOC_CONTAINER_NAME:\n", |
183 | | - " REFERENCE_DOC_SAS_URL = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n", |
184 | | - " REFERENCE_DOC_STORAGE_ACCOUNT_NAME,\n", |
185 | | - " REFERENCE_DOC_CONTAINER_NAME,\n", |
| 183 | + " from azure.storage.blob import ContainerSasPermissions\n", |
| 184 | + " # We will need \"Write\" for uploading, modifying, or appending blobs\n", |
| 185 | + " reference_doc_sas_url = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n", |
| 186 | + " account_name=REFERENCE_DOC_STORAGE_ACCOUNT_NAME,\n", |
| 187 | + " container_name=REFERENCE_DOC_CONTAINER_NAME,\n", |
| 188 | + " permissions=ContainerSasPermissions(read=True, write=True, list=True),\n", |
| 189 | + " expiry_hours=1,\n", |
186 | 190 | " )" |
187 | 191 | ] |
188 | 192 | }, |
|
203 | 207 | "# Please name the OCR result files with the same name as the original document files including its extension, and add the suffix \".result.json\"\n", |
204 | 208 | "# For example, if the original document is \"invoice.pdf\", the OCR result file should be named \"invoice.pdf.result.json\"\n", |
205 | 209 | "# NOTE: Please comment out the follwing line if you don't have any reference documents.\n", |
206 | | - "await client.generate_knowledge_base_on_blob(reference_docs, REFERENCE_DOC_SAS_URL, REFERENCE_DOC_PATH, skip_analyze=False)" |
| 210 | + "await client.generate_knowledge_base_on_blob(reference_docs, reference_doc_sas_url, reference_doc_path, skip_analyze=False)" |
207 | 211 | ] |
208 | 212 | }, |
209 | 213 | { |
|
213 | 217 | "## Create analyzer with defined schema for Pro mode\n", |
214 | 218 | "Before creating the analyzer, you should fill in the constant ANALYZER_ID with a relevant name to your task. Here, we generate a unique suffix so this cell can be run multiple times to create different analyzers.\n", |
215 | 219 | "\n", |
216 | | - "We use **REFERENCE_DOC_SAS_URL** and **REFERENCE_DOC_PATH** that's set up in the [.env](./.env) file and used in the previous step." |
| 220 | + "We use **reference_doc_sas_url** and **reference_doc_path** that's set up in the [.env](./.env) file and used in the previous step." |
217 | 221 | ] |
218 | 222 | }, |
219 | 223 | { |
|
228 | 232 | "response = client.begin_create_analyzer(\n", |
229 | 233 | " CUSTOM_ANALYZER_ID,\n", |
230 | 234 | " analyzer_template_path=analyzer_template,\n", |
231 | | - " pro_mode_reference_docs_storage_container_sas_url=REFERENCE_DOC_SAS_URL,\n", |
232 | | - " pro_mode_reference_docs_storage_container_path_prefix=REFERENCE_DOC_PATH,\n", |
| 235 | + " pro_mode_reference_docs_storage_container_sas_url=reference_doc_sas_url,\n", |
| 236 | + " pro_mode_reference_docs_storage_container_path_prefix=reference_doc_path,\n", |
233 | 237 | ")\n", |
234 | 238 | "result = client.poll_result(response)\n", |
235 | 239 | "if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n", |
|
342 | 346 | "reference_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/reference_docs\"\n", |
343 | 347 | "\n", |
344 | 348 | "# Load reference storage configuration from environment\n", |
345 | | - "REFERENCE_DOC_PATH_2 = os.getenv(\"REFERENCE_DOC_PATH\").rstrip(\"/\") + \"_2/\" # NOTE: Use a different path for the second sample\n", |
| 349 | + "reference_doc_path_2 = os.getenv(\"REFERENCE_DOC_PATH\").rstrip(\"/\") + \"_2/\" # NOTE: Use a different path for the second sample\n", |
346 | 350 | "CUSTOM_ANALYZER_ID_2 = \"pro-mode-sample-\" + str(uuid.uuid4())" |
347 | 351 | ] |
348 | 352 | }, |
|
362 | 366 | "source": [ |
363 | 367 | "logging.info(\"Start generating knowledge base for the second sample...\")\n", |
364 | 368 | "# Reuse the same blob container\n", |
365 | | - "await client.generate_knowledge_base_on_blob(reference_docs_2, REFERENCE_DOC_SAS_URL, REFERENCE_DOC_PATH_2, skip_analyze=True)" |
| 369 | + "await client.generate_knowledge_base_on_blob(reference_docs_2, reference_doc_sas_url, reference_doc_path_2, skip_analyze=True)" |
366 | 370 | ] |
367 | 371 | }, |
368 | 372 | { |
|
382 | 386 | "response = client.begin_create_analyzer(\n", |
383 | 387 | " CUSTOM_ANALYZER_ID_2,\n", |
384 | 388 | " analyzer_template_path=analyzer_template_2,\n", |
385 | | - " pro_mode_reference_docs_storage_container_sas_url=REFERENCE_DOC_SAS_URL,\n", |
386 | | - " pro_mode_reference_docs_storage_container_path_prefix=REFERENCE_DOC_PATH_2,\n", |
| 389 | + " pro_mode_reference_docs_storage_container_sas_url=reference_doc_sas_url,\n", |
| 390 | + " pro_mode_reference_docs_storage_container_path_prefix=reference_doc_path_2,\n", |
387 | 391 | ")\n", |
388 | 392 | "result = client.poll_result(response)\n", |
389 | 393 | "if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n", |
|
0 commit comments