Update fastapi & pydantic

Pydantic v2 has a number of breaking changes. Also see #428
kbase · Aug 24, 2023 · 901d6e5 · 901d6e5
1 parent 6cb7b1e
commit 901d6e5
Show file tree

Hide file tree

Showing 15 changed files with 505 additions and 290 deletions.
diff --git a/Pipfile b/Pipfile
@@ -4,7 +4,7 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-fastapi = "==0.99.1"
+fastapi = "==0.101.1"
 uvicorn = {version = "==0.23.2", extras = ["standard"]}
 jsonlines = "==3.1.0"
 cacheout = "==0.14.1"

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/src/common/product_models/heatmap_common_models.py b/src/common/product_models/heatmap_common_models.py
@@ -111,9 +111,6 @@ class Cell(BaseModel):
         description="The value of the heatmap at this cell."
     )
 
-    class Config:
-        smart_union = True
-
 
 class HeatMapRow(BaseModel):
     """
@@ -133,7 +130,7 @@ class HeatMapRow(BaseModel):
         description="The cells in the row of the heatmap in render order."
     )
     meta: dict[str, str] | None = Field(
-        examples=[{"growth_media": "Spizizen minimal media + 0.5mM biotin"}],
+        example={"growth_media": "Spizizen minimal media + 0.5mM biotin"},
         description="Arbitrary metadata about the data in the row"
     )
 
@@ -178,9 +175,6 @@ class CellDetailEntry(BaseModel):
         description="The value of the cell entry."
     )
 
-    class Config:
-        smart_union = True
-
 
 class CellDetail(BaseModel):
     """

diff --git a/src/common/storage/init_storage.py b/src/common/storage/init_storage.py
@@ -21,7 +21,7 @@
 
 def _get_config() -> CollectionsServiceConfig:
     parser = argparse.ArgumentParser(
-        description="Set up ArangoDB collection sharding for the KBase collectins service."
+        description="Set up ArangoDB collection sharding for the KBase collections service."
     )
     parser.add_argument(
         '-c', '--config', required=True, type=str,
@@ -33,7 +33,7 @@ def _get_config() -> CollectionsServiceConfig:
         "-s", "--skip-database-creation", action="store_true",
         help="Don't create the database. This is necessary if the credentials in the config "
             + "file don't have permissions for the _system database; however the target database "
-            + "must alreaady exist."
+            + "must already exist."
     )
     args = parser.parse_args()
     with open(args.config, 'rb') as cfgfile:

diff --git a/src/service/data_products/common_models.py b/src/service/data_products/common_models.py
@@ -3,7 +3,7 @@
 """
 
 from fastapi import APIRouter, Query
-from pydantic import BaseModel, validator, Field
+from pydantic import field_validator, ConfigDict, BaseModel, Field
 from src.common.product_models.common_models import SubsetProcessStates
 from src.common.storage import collection_and_field_names as names
 from src.service import models
@@ -58,14 +58,13 @@ class DataProductSpec(BaseModel):
     in the `tags` argument.
     """
 
-    @validator("router")
+    @field_validator("router")
+    @classmethod
     def _check_router_tags(cls, v):  # @NoSelf
         if not v.tags:
             raise ValueError("router must have at least one tag")
         return v
-
-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)
 
 
 class DataProductMissingIDs(SubsetProcessStates):
@@ -82,10 +81,10 @@ class DataProductMissingIDs(SubsetProcessStates):
     )
 
 
-QUERY_VALIDATOR_LOAD_VERSION_OVERRIDE = Annotated[str | None, Query(
+QUERY_VALIDATOR_LOAD_VERSION_OVERRIDE = Annotated[str, Query(
     min_length=models.LENGTH_MIN_LOAD_VERSION,
     max_length=models.LENGTH_MAX_LOAD_VERSION,
-    regex=models.REGEX_LOAD_VERSION,
+    pattern=models.REGEX_LOAD_VERSION,
     example=models.FIELD_LOAD_VERSION_EXAMPLE,
     description=models.FIELD_LOAD_VERSION_DESCRIPTION + ". This will override the collection's "
         + "load version. Service administrator privileges are required."
@@ -114,7 +113,7 @@ class DataProductMissingIDs(SubsetProcessStates):
 )]
 
 
-QUERY_VALIDATOR_MATCH_ID = Annotated[str | None, Query(
+QUERY_VALIDATOR_MATCH_ID = Annotated[str, Query(
     description="A match ID to set the view to the match rather than "
         + "the entire collection. Authentication is required. If a match ID is "
         # matches are against a specific load version, so...
@@ -136,7 +135,7 @@ class DataProductMissingIDs(SubsetProcessStates):
 )]
 
 
-QUERY_VALIDATOR_SELECTION_ID = Annotated[str | None, Query(
+QUERY_VALIDATOR_SELECTION_ID = Annotated[str, Query(
     description="A selection ID to set the view to the selection rather than the entire "
         + "collection. If a selection ID is set, any load version override is ignored. "
         + "If a selection filter and a match filter are provided, they are ANDed together. "
@@ -161,7 +160,7 @@ class DataProductMissingIDs(SubsetProcessStates):
 )]
 
 
-QUERY_VALIDATOR_SORT_ON = Annotated[str | None, Query(
+QUERY_VALIDATOR_SORT_ON = Annotated[str, Query(
     example=names.FLD_KBASE_ID,
     description="The field to sort on."
 )]

diff --git a/src/service/data_products/heatmap.py b/src/service/data_products/heatmap.py
@@ -225,7 +225,7 @@ async def get_heatmap(
         self,
         r: Request,
         collection_id: Annotated[str, PATH_VALIDATOR_COLLECTION_ID],
-        start_after: str | None = Query(
+        start_after: str = Query(
             default=None,
             example="GB_GCA_000006155.2",
             description=f"The `{names.FLD_KBASE_ID}` to start after when listing data. This "
@@ -290,8 +290,8 @@ async def get_missing_ids(
         self,
         r: Request,
         collection_id: Annotated[str, PATH_VALIDATOR_COLLECTION_ID],
-        match_id: Annotated[str | None, Query(description="A match ID.")] = None,
-        selection_id: Annotated[str | None, Query(description="A selection ID.")] = None,
+        match_id: Annotated[str, Query(description="A match ID.")] = None,
+        selection_id: Annotated[str, Query(description="A selection ID.")] = None,
         user: kb_auth.KBaseUser = Depends(_OPT_AUTH),
     ) -> DataProductMissingIDs:
         return await get_missing_ids(

diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py
@@ -298,15 +298,15 @@ async def get_sample_locations(
             + "time and a lot of memory. In the future it may be disallowed for collections with "
             + "large numbers of samples."
     )] = False,
-    match_id: Annotated[str | None, Query(
+    match_id: Annotated[str, Query(
         description="A match ID to set the view to the match rather than "
             + "the entire collection. Authentication is required. If a match ID is "
             # matches are against a specific load version, so...
             + "set, any load version override is ignored. "
             + "If a selection filter and a match filter are provided, they are ANDed together. "
     )] = None,
     # TODO FEATURE support a choice of AND or OR for matches & selections
-    selection_id: Annotated[str | None, Query(
+    selection_id: Annotated[str, Query(
         description="A selection ID to set the view to the selection rather than the entire "
             + "collection. If a selection ID is set, any load version override is ignored. "
             + "If a selection filter and a match filter are provided, they are ANDed together. "
@@ -492,8 +492,8 @@ async def get_samples_by_id(
 async def get_missing_ids(
     r: Request,
     collection_id: Annotated[str, PATH_VALIDATOR_COLLECTION_ID],
-    match_id: Annotated[str | None, Query(description="A match ID.")] = None,
-    selection_id: Annotated[str | None, Query(description="A selection ID.")] = None,
+    match_id: Annotated[str, Query(description="A match ID.")] = None,
+    selection_id: Annotated[str, Query(description="A selection ID.")] = None,
     user: kb_auth.KBaseUser = Depends(_OPT_AUTH),
 ) -> common_models.DataProductMissingIDs:
     return await _get_missing_ids(

diff --git a/src/service/data_products/taxa_count.py b/src/service/data_products/taxa_count.py
@@ -141,7 +141,7 @@ class TaxaCounts(SubsetProcessStates):
     """
     The taxa counts data set.
     """
-    data: list[TaxaCount] | None
+    data: list[TaxaCount] | None = None
 
 
 _FLD_COL_ID = "colid"
@@ -193,13 +193,13 @@ async def get_taxa_counts(
         example="phylum",
         description="The taxonomic rank at which to return results"
     ),
-    match_id: str | None = Query(
+    match_id: str = Query(
         default = None,
         description="A match ID to include the match count in the taxa count data. "
             + "Authentication is required. "
             # matches are against a specific load version, so...
             + "Note that if a match ID is set, any load version override is ignored."),
-    selection_id: str | None = Query(
+    selection_id: str = Query(
         default = None,
         description="A selection ID to include the selection count in the taxa count data. "
             + "Note that if a selection ID is set, any load version override is ignored."),

diff --git a/src/service/matchers/lineage_matcher.py b/src/service/matchers/lineage_matcher.py
@@ -4,7 +4,7 @@
 
 import logging
 
-from pydantic import BaseModel, Field, Extra
+from pydantic import ConfigDict, BaseModel, Field
 from typing import Any
 
 from src.common.constants import GTDB_UNCLASSIFIED_PREFIX
@@ -32,10 +32,9 @@ class GTDBLineageMatcherCollectionParameters(BaseModel):
         description="The GTDB version of the collection in which the matcher is installed. " +
             "Input data to the matcher must match this version of GTDB or the match will " +
             "abort.",
-        regex=r"^\d{2,4}\.\d{1,2}$"  # giving a little room for expansion
+        pattern=r"^\d{2,4}\.\d{1,2}$"  # giving a little room for expansion
     )
-    class Config:
-        extra = Extra.forbid
+    model_config = ConfigDict(extra="forbid")
 
 
 class GTDBLineageMatcherUserParameters(BaseModel):
@@ -44,8 +43,7 @@ class GTDBLineageMatcherUserParameters(BaseModel):
         example=GTDBRank.SPECIES,
         description="A rank in the the GTDB lineage."
     )
-    class Config:
-        extra = Extra.forbid
+    model_config = ConfigDict(extra="forbid")
 
 
 async def _process_match(
@@ -153,6 +151,6 @@ def generate_match_process(
         "KBaseSets.AssemblySet",
     ],
     required_data_products=[genome_attributes.ID],
-    user_parameters=GTDBLineageMatcherUserParameters.schema(),
-    collection_parameters=GTDBLineageMatcherCollectionParameters.schema()
+    user_parameters=GTDBLineageMatcherUserParameters.model_json_schema(),
+    collection_parameters=GTDBLineageMatcherCollectionParameters.model_json_schema()
 )
diff --git a/src/service/matchers/minhash_homology_matcher.py b/src/service/matchers/minhash_homology_matcher.py
@@ -17,7 +17,7 @@
 import asyncio
 import logging
 
-from pydantic import BaseModel, Field, Extra, HttpUrl
+from pydantic import ConfigDict, BaseModel, Field, HttpUrl
 
 from src.service import data_product_specs
 from src.service import models
@@ -48,8 +48,7 @@ class MinHashHomologyMatcherCollectionParameters(BaseModel):
         description="The name of the sketch database in the Assembly Homology Service to match "
             + "against. This parameter is sent to the sketch service."
     )
-    class Config:
-        extra = Extra.forbid
+    model_config = ConfigDict(extra="forbid")
 
 
 class MinHashHomologyMatcherUserParameters(BaseModel):
@@ -63,8 +62,7 @@ class MinHashHomologyMatcherUserParameters(BaseModel):
     )
     # TODO HOMOLOGY_MATCHER may want to support more parameters if we switch to a different
     #                       implementation like sourmash
-    class Config:
-        extra = Extra.forbid
+    model_config = ConfigDict(extra="forbid")
 
 
 async def _get_sketch_service_client(collection_parameters: dict[str, Any]):
@@ -225,6 +223,6 @@ def generate_match_process(
         "KBaseSets.AssemblySet",
     ],
     required_data_products=[genome_attributes.ID],
-    user_parameters=MinHashHomologyMatcherUserParameters.schema(),
-    collection_parameters=MinHashHomologyMatcherCollectionParameters.schema()
+    user_parameters=MinHashHomologyMatcherUserParameters.model_json_schema(),
+    collection_parameters=MinHashHomologyMatcherCollectionParameters.model_json_schema()
 )