datahub-project · hsheth2 · Jul 16, 2024 · Jul 10, 2024 · Jul 15, 2024 · Jul 15, 2024
diff --git a/metadata-ingestion/docs/dev_guides/classification.md b/metadata-ingestion/docs/dev_guides/classification.md
@@ -10,7 +10,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | ------------------------- | -------- | --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
 | enabled                   |          | boolean                                 | Whether classification should be used to auto-detect glossary terms                                                                                                                                                                                                                                                                      | False                                                      |
 | sample_size               |          | int                                     | Number of sample values used for classification.                                                                                                                                                                                                                                                                                         | 100                                                        |
-| max_workers               |          | int                                     | Number of worker threads to use for classification. Set to 1 to disable.                                                                                                                                                                                                                                                                 | Number of cpu cores or 4                                   |
+| max_workers               |          | int                                     | Number of worker processes to use for classification. Set to 1 to disable.                                                                                                                                                                                                                                                               | Number of cpu cores or 4                                   |
 | info_type_to_term         |          | Dict[str,string]                        | Optional mapping to provide glossary term identifier for info type.                                                                                                                                                                                                                                                                      | By default, info type is used as glossary term identifier. |
 | classifiers               |          | Array of object                         | Classifiers to use to auto-detect glossary terms. If more than one classifier, infotype predictions from the classifier defined later in sequence take precedance.                                                                                                                                                                       | [{'type': 'datahub', 'config': None}]                      |
 | table_pattern             |          | AllowDenyPattern (see below for fields) | Regex patterns to filter tables for classification. This is used in combination with other patterns in parent config. Specify regex to match the entire table name in `database.schema.table` format. e.g. to match all tables starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*' | {'allow': ['.*'], 'deny': [], 'ignoreCase': True}          |

diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/classifier.py
@@ -39,7 +39,7 @@ class ClassificationConfig(ConfigModel):
 
     max_workers: int = Field(
         default=(os.cpu_count() or 4),
-        description="Number of worker threads to use for classification. Set to 1 to disable.",
+        description="Number of worker processes to use for classification. Set to 1 to disable.",
     )
 
     table_pattern: AllowDenyPattern = Field(