remove a commit with output

chienyuanchang · chienyuanchang · commit 23a8eb60a146 · 2025-07-31T00:33:38.000Z
diff --git a/docs/set_env_for_training_data_and_reference_doc.md b/docs/set_env_for_training_data_and_reference_doc.md
@@ -10,6 +10,7 @@ Folders [document_training](../data/document_training/) and [field_extraction_pr
     - Option A - Generate a SAS URL manually on Azure Storage Explorer
         - Right-click on blob container and select the `Get Shared Access Signature...` in the menu.
         - Check the required permissions: `Read`, `Write` and `List`
+        - We will need `Write` for uploading, modifying, or appending blobs
         - Click the `Create` button.  
         <img src="./get-access-signature.png" height="600" />  <img src="./choose-signature-options.png" height="600" />  
         - *Copy the SAS URL:* After creating the SAS, click `Copy` to get the URL with token. This will be used as the value for **TRAINING_DATA_SAS_URL** or **REFERENCE_DOC_SAS_URL** when running the sample code.  
diff --git a/notebooks/analyzer_training.ipynb b/notebooks/analyzer_training.ipynb
@@ -132,22 +132,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "TRAINING_DATA_SAS_URL = os.getenv(\"TRAINING_DATA_SAS_URL\")\n",
-    "if not TRAINING_DATA_SAS_URL:\n",
+    "training_data_sas_url = os.getenv(\"TRAINING_DATA_SAS_URL\")\n",
+    "if not training_data_sas_url:\n",
     "    TRAINING_DATA_STORAGE_ACCOUNT_NAME = os.getenv(\"TRAINING_DATA_STORAGE_ACCOUNT_NAME\")\n",
     "    TRAINING_DATA_CONTAINER_NAME = os.getenv(\"TRAINING_DATA_CONTAINER_NAME\")\n",
-    "    if not TRAINING_DATA_STORAGE_ACCOUNT_NAME and not TRAINING_DATA_SAS_URL:\n",
+    "    if not TRAINING_DATA_STORAGE_ACCOUNT_NAME and not training_data_sas_url:\n",
     "        raise ValueError(\n",
     "            \"Please set either TRAINING_DATA_SAS_URL or both TRAINING_DATA_STORAGE_ACCCOUNT_NAME and TRAINING_DATA_CONTAINER_NAME environment variables.\"\n",
     "        )\n",
-    "    TRAINING_DATA_SAS_URL = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n",
-    "        TRAINING_DATA_STORAGE_ACCOUNT_NAME,\n",
-    "        TRAINING_DATA_CONTAINER_NAME,\n",
+    "    from azure.storage.blob import ContainerSasPermissions\n",
+    "    # We will need \"Write\" for uploading, modifying, or appending blobs\n",
+    "    training_data_sas_url = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n",
+    "        account_name=TRAINING_DATA_STORAGE_ACCOUNT_NAME,\n",
+    "        container_name=TRAINING_DATA_CONTAINER_NAME,\n",
+    "        permissions=ContainerSasPermissions(read=True, write=True, list=True),\n",
+    "        expiry_hours=1,\n",
     "    )\n",
     "\n",
-    "TRAINING_DATA_PATH = os.getenv(\"TRAINING_DATA_PATH\")\n",
+    "training_data_path = os.getenv(\"TRAINING_DATA_PATH\")\n",
     "\n",
-    "await client.generate_training_data_on_blob(training_docs_folder, TRAINING_DATA_SAS_URL, TRAINING_DATA_PATH)"
+    "await client.generate_training_data_on_blob(training_docs_folder, training_data_sas_url, training_data_path)"
    ]
   },
   {
@@ -157,7 +161,7 @@
     "## Create analyzer with defined schema\n",
     "Before creating the analyzer, you should fill in the constant ANALYZER_ID with a relevant name to your task. Here, we generate a unique suffix so this cell can be run multiple times to create different analyzers.\n",
     "\n",
-    "We use **TRAINING_DATA_SAS_URL** and **TRAINING_DATA_PATH** that's set up in the [.env](./.env) file and used in the previous step."
+    "We use **training_data_sas_url** and **training_data_path** that's set up in the [.env](./.env) file and used in the previous step."
    ]
   },
   {
@@ -172,8 +176,8 @@
     "response = client.begin_create_analyzer(\n",
     "    CUSTOM_ANALYZER_ID,\n",
     "    analyzer_template_path=analyzer_template,\n",
-    "    training_storage_container_sas_url=TRAINING_DATA_SAS_URL,\n",
-    "    training_storage_container_path_prefix=TRAINING_DATA_PATH,\n",
+    "    training_storage_container_sas_url=training_data_sas_url,\n",
+    "    training_storage_container_path_prefix=training_data_path,\n",
     ")\n",
     "result = client.poll_result(response)\n",
     "if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n",
diff --git a/notebooks/field_extraction_pro_mode.ipynb b/notebooks/field_extraction_pro_mode.ipynb
@@ -173,16 +173,20 @@
    "outputs": [],
    "source": [
     "# Load reference storage configuration from environment\n",
-    "REFERENCE_DOC_PATH = os.getenv(\"REFERENCE_DOC_PATH\")\n",
+    "reference_doc_path = os.getenv(\"REFERENCE_DOC_PATH\")\n",
     "\n",
-    "REFERENCE_DOC_SAS_URL = os.getenv(\"REFERENCE_DOC_SAS_URL\")\n",
-    "if not REFERENCE_DOC_SAS_URL:\n",
+    "reference_doc_sas_url = os.getenv(\"REFERENCE_DOC_SAS_URL\")\n",
+    "if not reference_doc_sas_url:\n",
     "    REFERENCE_DOC_STORAGE_ACCOUNT_NAME = os.getenv(\"REFERENCE_DOC_STORAGE_ACCOUNT_NAME\")\n",
     "    REFERENCE_DOC_CONTAINER_NAME = os.getenv(\"REFERENCE_DOC_CONTAINER_NAME\")\n",
     "    if REFERENCE_DOC_STORAGE_ACCOUNT_NAME and REFERENCE_DOC_CONTAINER_NAME:\n",
-    "        REFERENCE_DOC_SAS_URL = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n",
-    "            REFERENCE_DOC_STORAGE_ACCOUNT_NAME,\n",
-    "            REFERENCE_DOC_CONTAINER_NAME,\n",
+    "        from azure.storage.blob import ContainerSasPermissions\n",
+    "        # We will need \"Write\" for uploading, modifying, or appending blobs\n",
+    "        reference_doc_sas_url = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n",
+    "            account_name=REFERENCE_DOC_STORAGE_ACCOUNT_NAME,\n",
+    "            container_name=REFERENCE_DOC_CONTAINER_NAME,\n",
+    "            permissions=ContainerSasPermissions(read=True, write=True, list=True),\n",
+    "            expiry_hours=1,\n",
     "        )"
    ]
   },
@@ -203,7 +207,7 @@
     "# Please name the OCR result files with the same name as the original document files including its extension, and add the suffix \".result.json\"\n",
     "# For example, if the original document is \"invoice.pdf\", the OCR result file should be named \"invoice.pdf.result.json\"\n",
     "# NOTE: Please comment out the follwing line if you don't have any reference documents.\n",
-    "await client.generate_knowledge_base_on_blob(reference_docs, REFERENCE_DOC_SAS_URL, REFERENCE_DOC_PATH, skip_analyze=False)"
+    "await client.generate_knowledge_base_on_blob(reference_docs, reference_doc_sas_url, reference_doc_path, skip_analyze=False)"
    ]
   },
   {
@@ -213,7 +217,7 @@
     "## Create analyzer with defined schema for Pro mode\n",
     "Before creating the analyzer, you should fill in the constant ANALYZER_ID with a relevant name to your task. Here, we generate a unique suffix so this cell can be run multiple times to create different analyzers.\n",
     "\n",
-    "We use **REFERENCE_DOC_SAS_URL** and **REFERENCE_DOC_PATH** that's set up in the [.env](./.env) file and used in the previous step."
+    "We use **reference_doc_sas_url** and **reference_doc_path** that's set up in the [.env](./.env) file and used in the previous step."
    ]
   },
   {
@@ -228,8 +232,8 @@
     "response = client.begin_create_analyzer(\n",
     "    CUSTOM_ANALYZER_ID,\n",
     "    analyzer_template_path=analyzer_template,\n",
-    "    pro_mode_reference_docs_storage_container_sas_url=REFERENCE_DOC_SAS_URL,\n",
-    "    pro_mode_reference_docs_storage_container_path_prefix=REFERENCE_DOC_PATH,\n",
+    "    pro_mode_reference_docs_storage_container_sas_url=reference_doc_sas_url,\n",
+    "    pro_mode_reference_docs_storage_container_path_prefix=reference_doc_path,\n",
     ")\n",
     "result = client.poll_result(response)\n",
     "if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n",
@@ -342,7 +346,7 @@
     "reference_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/reference_docs\"\n",
     "\n",
     "# Load reference storage configuration from environment\n",
-    "REFERENCE_DOC_PATH_2 = os.getenv(\"REFERENCE_DOC_PATH\").rstrip(\"/\") + \"_2/\"  # NOTE: Use a different path for the second sample\n",
+    "reference_doc_path_2 = os.getenv(\"REFERENCE_DOC_PATH\").rstrip(\"/\") + \"_2/\"  # NOTE: Use a different path for the second sample\n",
     "CUSTOM_ANALYZER_ID_2 = \"pro-mode-sample-\" + str(uuid.uuid4())"
    ]
   },
@@ -362,7 +366,7 @@
    "source": [
     "logging.info(\"Start generating knowledge base for the second sample...\")\n",
     "# Reuse the same blob container\n",
-    "await client.generate_knowledge_base_on_blob(reference_docs_2, REFERENCE_DOC_SAS_URL, REFERENCE_DOC_PATH_2, skip_analyze=True)"
+    "await client.generate_knowledge_base_on_blob(reference_docs_2, reference_doc_sas_url, reference_doc_path_2, skip_analyze=True)"
    ]
   },
   {
@@ -382,8 +386,8 @@
     "response = client.begin_create_analyzer(\n",
     "    CUSTOM_ANALYZER_ID_2,\n",
     "    analyzer_template_path=analyzer_template_2,\n",
-    "    pro_mode_reference_docs_storage_container_sas_url=REFERENCE_DOC_SAS_URL,\n",
-    "    pro_mode_reference_docs_storage_container_path_prefix=REFERENCE_DOC_PATH_2,\n",
+    "    pro_mode_reference_docs_storage_container_sas_url=reference_doc_sas_url,\n",
+    "    pro_mode_reference_docs_storage_container_path_prefix=reference_doc_path_2,\n",
     ")\n",
     "result = client.poll_result(response)\n",
     "if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n",
diff --git a/python/content_understanding_client.py b/python/content_understanding_client.py
@@ -186,26 +186,45 @@ def is_supported_doc_type_by_file_path(file_path: Path, is_document: bool=False)
     def generate_temp_container_sas_url(
         account_name: str,
         container_name: str,
+        permissions: Optional[ContainerSasPermissions] = None,
+        expiry_hours: Optional[int] = None,
     ) -> str:
+        """
+        Generate a temporary SAS URL for an Azure Blob container using Azure AD authentication.
+
+        Args:
+            account_name (str): The Azure Storage account name.
+            container_name (str): The name of the container.
+            permissions (ContainerSasPermissions, optional): Permissions to assign to the SAS token.
+                Defaults to read, write, and list permissions.
+            expiry_hours (int, optional): Number of hours until the SAS token expires.
+                Defaults to `AzureContentUnderstandingClient.SAS_EXPIRY_HOURS`.
+
+        Returns:
+            str: The SAS URL for the container.
+        """
+        if permissions is None:
+            permissions = ContainerSasPermissions(read=True, write=True, list=True)
+        expiry_duration = timedelta(hours=expiry_hours or AzureContentUnderstandingClient.SAS_EXPIRY_HOURS)
+
         account_url = f"https://{account_name}.blob.core.windows.net"
         blob_service_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential())
 
         # Get user delegation key
         start_time = datetime.now(timezone.utc)
-        expiry_time = start_time + timedelta(hours=AzureContentUnderstandingClient.SAS_EXPIRY_HOURS)
+        expiry_time = start_time + expiry_duration
         delegation_key = blob_service_client.get_user_delegation_key(start_time, expiry_time)
 
         sas_token = generate_container_sas(
             account_name=account_name,
             container_name=container_name,
             user_delegation_key=delegation_key,
-            permission=ContainerSasPermissions(read=True, list=True, write=True),
+            permission=permissions,
             expiry=expiry_time,
             start=start_time,
         )
-        container_sas_url = f"{account_url}/{container_name}?{sas_token}"
 
-        return container_sas_url
+        return f"{account_url}/{container_name}?{sas_token}"
 
     def get_all_analyzers(self) -> Dict[str, Any]:
         """