diff --git a/sdk/search/azure-search-documents/azure/search/documents/__init__.py b/sdk/search/azure-search-documents/azure/search/documents/__init__.py index 75893973ec59..02bd8a2266d4 100644 --- a/sdk/search/azure-search-documents/azure/search/documents/__init__.py +++ b/sdk/search/azure-search-documents/azure/search/documents/__init__.py @@ -73,6 +73,8 @@ ImageAnalysisSkill, Index, Indexer, + IndexingSchedule, + IndexingParameters, InputFieldMappingEntry, KeepTokenFilter, KeyPhraseExtractionSkill, @@ -165,6 +167,8 @@ "ImageAnalysisSkill", "Index", "Indexer", + "IndexingSchedule", + "IndexingParameters", "IndexAction", "IndexDocumentsBatch", "IndexingResult", diff --git a/sdk/search/azure-search-documents/azure/search/documents/_service/aio/_indexers_client.py b/sdk/search/azure-search-documents/azure/search/documents/_service/aio/_indexers_client.py index c17509beaf96..90b74f62dbd8 100644 --- a/sdk/search/azure-search-documents/azure/search/documents/_service/aio/_indexers_client.py +++ b/sdk/search/azure-search-documents/azure/search/documents/_service/aio/_indexers_client.py @@ -39,21 +39,21 @@ def __init__(self, endpoint, credential, **kwargs): endpoint=endpoint, sdk_moniker=SDK_MONIKER, **kwargs ) # type: _SearchServiceClient - def __enter__(self): + async def __aenter__(self): # type: () -> SearchIndexersClient - self._client.__enter__() # pylint:disable=no-member + await self._client.__aenter__() # pylint:disable=no-member return self - def __exit__(self, *args): + async def __aexit__(self, *args): # type: (*Any) -> None - return self._client.__exit__(*args) # pylint:disable=no-member + return await self._client.__aexit__(*args) # pylint:disable=no-member - def close(self): + async def close(self): # type: () -> None - """Close the :class:`~azure.search.documents.SearchIndexersClient` session. + """Close the :class:`~azure.search.documents.aio.SearchIndexersClient` session. """ - return self._client.close() + return await self._client.close() @distributed_trace_async async def create_indexer(self, indexer, **kwargs): diff --git a/sdk/search/azure-search-documents/samples/README.md b/sdk/search/azure-search-documents/samples/README.md index b0ea398ba97a..51737e595968 100644 --- a/sdk/search/azure-search-documents/samples/README.md +++ b/sdk/search/azure-search-documents/samples/README.md @@ -38,6 +38,10 @@ Then for common search index operations: * Analyze text: [sample_analyze_text.py](sample_analyze_text.py) ([async version](async_samples/sample_analyze_text_async.py)) +* CRUD operations for indexers: [sample_indexers_operations.py](sample_indexers_operations.py) ([async version](async_samples/sample_indexer_operations_async.py)) + +* General workflow of indexer, datasource and index: [sample_indexer_datasource_skillset.py](sample_indexer_datasource_skillset.py) ([async version](async_samples/sample_indexer_datasource_skillset.py)) + ## Prerequisites * Python 2.7, or 3.5 or later is required to use this package (3.5 or later if using asyncio) * You must have an [Azure subscription](https://azure.microsoft.com/free/) diff --git a/sdk/search/azure-search-documents/samples/async_samples/sample_indexers_operations_async.py b/sdk/search/azure-search-documents/samples/async_samples/sample_indexers_operations_async.py new file mode 100644 index 000000000000..5890a35278bb --- /dev/null +++ b/sdk/search/azure-search-documents/samples/async_samples/sample_indexers_operations_async.py @@ -0,0 +1,125 @@ +# coding: utf-8 + +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +""" +FILE: sample_indexer_operations_async.py +DESCRIPTION: + This sample demonstrates how to get, create, update, or delete a Indexer. +USAGE: + python sample_indexer_operations_async.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_SEARCH_SERVICE_ENDPOINT - the endpoint of your Azure Cognitive Search service + 2) AZURE_SEARCH_API_KEY - your search API key +""" + +import asyncio +import os + +service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") +key = os.getenv("AZURE_SEARCH_API_KEY") +connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING") + +from azure.core.credentials import AzureKeyCredential +from azure.search.documents import ( + DataSource, DataContainer, DataSourceCredentials, Index, Indexer, SimpleField, edm +) +from azure.search.documents.aio import SearchServiceClient + +service_client = SearchServiceClient(service_endpoint, AzureKeyCredential(key)) +indexers_client = service_client.get_indexers_client() + +async def create_indexer(): + # create an index + index_name = "hotels" + fields = [ + SimpleField(name="hotelId", type=edm.String, key=True), + SimpleField(name="baseRate", type=edm.Double) + ] + index = Index(name=index_name, fields=fields) + ind_client = service_client.get_indexes_client() + async with ind_client: + await ind_client.create_index(index) + + # [START create_indexer_async] + # create a datasource + ds_client = service_client.get_datasources_client() + credentials = DataSourceCredentials(connection_string=connection_string) + container = DataContainer(name='searchcontainer') + ds = DataSource(name="async-indexer-datasource", type="azureblob", credentials=credentials, container=container) + async with ds_client: + data_source = await ds_client.create_datasource(ds) + + # create an indexer + indexer = Indexer(name="async-sample-indexer", data_source_name="async-indexer-datasource", target_index_name="hotels") + async with indexers_client: + result = await indexers_client.create_indexer(indexer) + print("Create new Indexer - async-sample-indexer") + # [END create_indexer_async] + +async def list_indexers(): + # [START list_indexer_async] + async with indexers_client: + result = await indexers_client.get_indexers() + names = [x.name for x in result] + print("Found {} Indexers in the service: {}".format(len(result), ", ".join(names))) + # [END list_indexer_async] + +async def get_indexer(): + # [START get_indexer_async] + async with indexers_client: + result = await indexers_client.get_indexer("async-sample-indexer") + print("Retrived Indexer 'async-sample-indexer'") + return result + # [END get_indexer_async] + +async def get_indexer_status(): + # [START get_indexer_status_async] + async with indexers_client: + result = await indexers_client.get_indexer_status("async-sample-indexer") + print("Retrived Indexer status for 'async-sample-indexer'") + return result + # [END get_indexer_status_async] + +async def run_indexer(): + # [START run_indexer_async] + async with indexers_client: + result = await indexers_client.run_indexer("async-sample-indexer") + print("Ran the Indexer 'async-sample-indexer'") + return result + # [END run_indexer_async] + +async def reset_indexer(): + # [START reset_indexer_async] + async with indexers_client: + result = await indexers_client.reset_indexer("async-sample-indexer") + print("Reset the Indexer 'async-sample-indexer'") + return result + # [END reset_indexer_async] + +async def delete_indexer(): + # [START delete_indexer_async] + async with indexers_client: + indexers_client.delete_indexer("async-sample-indexer") + print("Indexer 'async-sample-indexer' successfully deleted") + # [END delete_indexer_async] + +async def main(): + # await create_indexer() + # await list_indexers() + # await get_indexer() + # await get_indexer_status() + # await run_indexer() + # await reset_indexer() + # await delete_indexer() + # await service_client.close() + +if __name__ == '__main__': + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) + loop.close() diff --git a/sdk/search/azure-search-documents/samples/files/hotel_small.json b/sdk/search/azure-search-documents/samples/files/hotel_small.json new file mode 100644 index 000000000000..e30a1f96199b --- /dev/null +++ b/sdk/search/azure-search-documents/samples/files/hotel_small.json @@ -0,0 +1,252 @@ +[ + { + "@search.action": "upload", + "hotelId": "1", + "hotelName": "Fancy Stay", + "description": "Best hotel in town if you like luxury hotels. They have an amazing infinity pool, a spa, and a really helpful concierge. The location is perfect -- right downtown, close to all the tourist attractions. We highly recommend this hotel.", + "descriptionFr": "Meilleur hôtel en ville si vous aimez les hôtels de luxe. Ils ont une magnifique piscine à débordement, un spa et un concierge très utile. L'emplacement est parfait – en plein centre, à proximité de toutes les attractions touristiques. Nous recommandons fortement cet hôtel.", + "category": "Luxury", + "tags": [ + "pool", + "view", + "wifi", + "concierge" + ], + "parkingIncluded": false, + "smokingAllowed": false, + "lastRenovationDate": "2010-06-27T00:00:00+00:00", + "rating": 5, + "location": { + "type": "Point", + "coordinates": [ + -122.131577, + 47.678581 + ] + } + }, + { + "@search.action": "upload", + "hotelId": "2", + "hotelName": "Roach Motel", + "description": "Cheapest hotel in town. Infact, a motel.", + "descriptionFr": "Hôtel le moins cher en ville. Infact, un motel.", + "category": "Budget", + "tags": [ + "motel", + "budget" + ], + "parkingIncluded": true, + "smokingAllowed": true, + "lastRenovationDate": "1982-04-28T00:00:00+00:00", + "rating": 1, + "location": { + "type": "Point", + "coordinates": [ + -122.131577, + 49.678581 + ] + } + }, + { + "@search.action": "upload", + "hotelId": "3", + "hotelName": "EconoStay", + "description": "Very popular hotel in town", + "descriptionFr": "Hôtel le plus populaire en ville", + "category": "Budget", + "tags": [ + "wifi", + "budget" + ], + "parkingIncluded": true, + "smokingAllowed": false, + "lastRenovationDate": "1995-07-01T00:00:00+00:00", + "rating": 4, + "location": { + "type": "Point", + "coordinates": [ + -122.131577, + 46.678581 + ] + } + }, + { + "@search.action": "upload", + "hotelId": "4", + "hotelName": "Express Rooms", + "description": "Pretty good hotel", + "descriptionFr": "Assez bon hôtel", + "category": "Budget", + "tags": [ + "wifi", + "budget" + ], + "parkingIncluded": true, + "smokingAllowed": false, + "lastRenovationDate": "1995-07-01T00:00:00+00:00", + "rating": 4, + "location": { + "type": "Point", + "coordinates": [ + -122.131577, + 48.678581 + ] + } + }, + { + "@search.action": "upload", + "hotelId": "5", + "hotelName": "Comfy Place", + "description": "Another good hotel", + "descriptionFr": "Un autre bon hôtel", + "category": "Budget", + "tags": [ + "wifi", + "budget" + ], + "parkingIncluded": true, + "smokingAllowed": false, + "lastRenovationDate": "2012-08-12T00:00:00+00:00", + "rating": 4, + "location": { + "type": "Point", + "coordinates": [ + -122.131577, + 48.678581 + ] + } + }, + { + "@search.action": "upload", + "hotelId": "6", + "description": "Surprisingly expensive. Model suites have an ocean-view." + }, + { + "@search.action": "upload", + "hotelId": "7", + "hotelName": "Modern Stay", + "description": "Modern architecture, very polite staff and very clean. Also very affordable.", + "descriptionFr": "Architecture moderne, personnel poli et très propre. Aussi très abordable." + }, + { + "@search.action": "upload", + "hotelId": "8", + "description": "Has some road noise and is next to the very police station. Bathrooms had morel coverings.", + "descriptionFr": "Il y a du bruit de la route et se trouve à côté de la station de police. Les salles de bain avaient des revêtements de morilles." + }, + { + "@search.action": "upload", + "hotelId": "9", + "hotelName": "Secret Point Motel", + "description": "The hotel is ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Time's Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.", + "descriptionFr": "L'hôtel est idéalement situé sur la principale artère commerciale de la ville en plein cœur de New York. A quelques minutes se trouve la place du temps et le centre historique de la ville, ainsi que d'autres lieux d'intérêt qui font de New York l'une des villes les plus attractives et cosmopolites de l'Amérique.", + "category": "Boutique", + "tags": [ + "pool", + "air conditioning", + "concierge" + ], + "parkingIncluded": false, + "smokingAllowed": true, + "lastRenovationDate": "1970-01-18T00:00:00-05:00", + "rating": 4, + "location": { + "type": "Point", + "coordinates": [ + -73.975403, + 40.760586 + ] + }, + "address": { + "streetAddress": "677 5th Ave", + "city": "New York", + "stateProvince": "NY", + "country": "USA", + "postalCode": "10022" + }, + "rooms": [ + { + "description": "Budget Room, 1 Queen Bed (Cityside)", + "descriptionFr": "Chambre Économique, 1 grand lit (côté ville)", + "type": "Budget Room", + "baseRate": 9.69, + "bedOptions": "1 Queen Bed", + "sleepsCount": 2, + "smokingAllowed": true, + "tags": [ + "vcr/dvd" + ] + }, + { + "description": "Budget Room, 1 King Bed (Mountain View)", + "descriptionFr": "Chambre Économique, 1 très grand lit (Mountain View)", + "type": "Budget Room", + "baseRate": 8.09, + "bedOptions": "1 King Bed", + "sleepsCount": 2, + "smokingAllowed": true, + "tags": [ + "vcr/dvd", + "jacuzzi tub" + ] + } + ] + }, + { + "@search.action": "upload", + "hotelId": "10", + "hotelName": "Countryside Hotel", + "description": "Save up to 50% off traditional hotels. Free WiFi, great location near downtown, full kitchen, washer & dryer, 24/7 support, bowling alley, fitness center and more.", + "descriptionFr": "Économisez jusqu'à 50% sur les hôtels traditionnels. WiFi gratuit, très bien situé près du centre-ville, cuisine complète, laveuse & sécheuse, support 24/7, bowling, centre de fitness et plus encore.", + "category": "Budget", + "tags": [ + "24-hour front desk service", + "coffee in lobby", + "restaurant" + ], + "parkingIncluded": false, + "smokingAllowed": true, + "lastRenovationDate": "1999-09-06T00:00:00+00:00", + "rating": 3, + "location": { + "type": "Point", + "coordinates": [ + -78.940483, + 35.90416 + ] + }, + "address": { + "streetAddress": "6910 Fayetteville Rd", + "city": "Durham", + "stateProvince": "NC", + "country": "USA", + "postalCode": "27713" + }, + "rooms": [ + { + "description": "Suite, 1 King Bed (Amenities)", + "descriptionFr": "Suite, 1 très grand lit (Services)", + "type": "Suite", + "baseRate": 2.44, + "bedOptions": "1 King Bed", + "sleepsCount": 2, + "smokingAllowed": true, + "tags": [ + "coffee maker" + ] + }, + { + "description": "Budget Room, 1 Queen Bed (Amenities)", + "descriptionFr": "Chambre Économique, 1 grand lit (Services)", + "type": "Budget Room", + "baseRate": 7.69, + "bedOptions": "1 Queen Bed", + "sleepsCount": 2, + "smokingAllowed": false, + "tags": [ + "coffee maker" + ] + } + ] + } +] diff --git a/sdk/search/azure-search-documents/samples/sample_indexer_datasource_skillset.py b/sdk/search/azure-search-documents/samples/sample_indexer_datasource_skillset.py new file mode 100644 index 000000000000..2aecc5558a33 --- /dev/null +++ b/sdk/search/azure-search-documents/samples/sample_indexer_datasource_skillset.py @@ -0,0 +1,138 @@ +""" +FILE: sample_indexer_datasource_skillset.py +DESCRIPTION: + This sample demonstrates use an indexer, datasource and skillset together. + + Indexer is used to efficiently write data to an index using a datasource. + So we first identify a supported data source - we use azure storage blobs + in this example. Then we create an index which is compatible with the datasource. + Further, we create an azure cognitive search datasource which we require to finally + create an indexer. + + Additionally, we will also use skillsets to provide some AI enhancements in our indexers. + + Once we create the indexer, we run the indexer and perform some basic operations like getting + the indexer status. + + The datasource used in this sample is stored as metadata for empty blobs in "searchcontainer". + The json file can be found in samples/files folder named hotel_small.json has the metdata of + each blob. +USAGE: + python sample_indexer_datasource_skillset.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_SEARCH_SERVICE_ENDPOINT - the endpoint of your Azure Cognitive Search service + 2) AZURE_SEARCH_API_KEY - your search API key + 3) AZURE_STORAGE_CONNECTION_STRING - The connection string for the storage blob account that is + being used to create the datasource. +""" + +import os +import datetime + +service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") +key = os.getenv("AZURE_SEARCH_API_KEY") +connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING") + +from azure.core.credentials import AzureKeyCredential +from azure.search.documents import ( + DataSource, DataContainer, DataSourceCredentials, Index, Indexer, SimpleField, edm, + EntityRecognitionSkill, InputFieldMappingEntry, OutputFieldMappingEntry, Skillset, + CorsOptions, IndexingSchedule, SearchableField, IndexingParameters +) +from azure.search.documents import SearchServiceClient + + +service_client = SearchServiceClient(service_endpoint, AzureKeyCredential(key)) + +def _create_index(): + name = "hotel-index" + + # Here we create an index with listed fields. + fields = [ + SimpleField(name="hotelId", type=edm.String, filterable=True, sortable=True, key=True), + SearchableField(name="hotelName", type=edm.String), + SimpleField(name="description", type=edm.String), + SimpleField(name="descriptionFr", type=edm.String), + SimpleField(name="category", type=edm.String), + SimpleField(name="parkingIncluded", type=edm.Boolean, filterable=True), + SimpleField(name="smokingAllowed", type=edm.Boolean, filterable=True), + SimpleField(name="lastRenovationDate", type=edm.String), + SimpleField(name="rating", type=edm.Int64, sortable=True), + SimpleField(name="location", type=edm.GeographyPoint), + ] + cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60) + + # pass in the name, fields and cors options and create the index + index = Index( + name=name, + fields=fields, + cors_options=cors_options) + index_client = service_client.get_indexes_client() + result = index_client.create_index(index) + return result + +def _create_datasource(): + # Here we create a datasource. As mentioned in the description we have stored it in + # "searchcontainer" + ds_client = service_client.get_datasources_client() + credentials = DataSourceCredentials(connection_string=connection_string) + container = DataContainer(name='searchcontainer') + ds = DataSource(name="hotel-datasource", type="azureblob", credentials=credentials, container=container) + data_source = ds_client.create_datasource(ds) + return data_source + +def _create_skillset(): + client = service_client.get_skillsets_client() + inp = InputFieldMappingEntry(name="text", source="/document/lastRenovationDate") + output = OutputFieldMappingEntry(name="dateTimes", target_name="RenovatedDate") + s = EntityRecognitionSkill(name="merge-skill", inputs=[inp], outputs=[output]) + + result = client.create_skillset(name='hotel-data-skill', skills=[s], description="example skillset") + return result + +def sample_indexer_workflow(): + # Now that we have a datasource and an index, we can create an indexer. + + skillset_name = _create_skillset().name + print("Skillset is created") + + ds_name = _create_datasource().name + print("Data source is created") + + ind_name = _create_index().name + print("Index is created") + + # we pass the data source, skillsets and targeted index to build an indexer + parameters = IndexingParameters(configuration={"parsingMode": "jsonArray"}) + indexer = Indexer( + name="hotel-data-indexer", + data_source_name=ds_name, + target_index_name=ind_name, + skillset_name=skillset_name, + parameters=parameters + ) + + indexer_client = service_client.get_indexers_client() + indexer_client.create_indexer(indexer) # create the indexer + + # to get an indexer + result = indexer_client.get_indexer("hotel-data-indexer") + print(result) + + # To run an indexer, we can use run_indexer() + indexer_client.run_indexer(result.name) + + # Using create or update to schedule an indexer + + schedule = IndexingSchedule(interval=datetime.timedelta(hours=24)) + result.schedule = schedule + updated_indexer = indexer_client.create_or_update_indexer(result) + + print(updated_indexer) + + # get the status of an indexer + indexer_client.get_indexer_status(updated_indexer.name) + +if __name__=="__main__": + sample_indexer_workflow() diff --git a/sdk/search/azure-search-documents/samples/sample_indexers_operations.py b/sdk/search/azure-search-documents/samples/sample_indexers_operations.py new file mode 100644 index 000000000000..0214cbc364e6 --- /dev/null +++ b/sdk/search/azure-search-documents/samples/sample_indexers_operations.py @@ -0,0 +1,109 @@ +# coding: utf-8 + +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +""" +FILE: sample_indexer_operations.py +DESCRIPTION: + This sample demonstrates how to get, create, update, or delete a Indexer. +USAGE: + python sample_indexer_operations.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_SEARCH_SERVICE_ENDPOINT - the endpoint of your Azure Cognitive Search service + 2) AZURE_SEARCH_API_KEY - your search API key +""" + +import os + +service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") +key = os.getenv("AZURE_SEARCH_API_KEY") +connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING") + +from azure.core.credentials import AzureKeyCredential +from azure.search.documents import ( + DataSource, DataContainer, DataSourceCredentials, Index, Indexer, SimpleField, edm +) +from azure.search.documents import SearchServiceClient + +service_client = SearchServiceClient(service_endpoint, AzureKeyCredential(key)) +indexers_client = service_client.get_indexers_client() + +def create_indexer(): + # create an index + index_name = "indexer-hotels" + fields = [ + SimpleField(name="hotelId", type=edm.String, key=True), + SimpleField(name="baseRate", type=edm.Double) + ] + index = Index(name=index_name, fields=fields) + ind_client = service_client.get_indexes_client() + ind_client.create_index(index) + + # [START create_indexer] + # create a datasource + ds_client = service_client.get_datasources_client() + credentials = DataSourceCredentials(connection_string=connection_string) + container = DataContainer(name='searchcontainer') + ds = DataSource(name="indexer-datasource", type="azureblob", credentials=credentials, container=container) + data_source = ds_client.create_datasource(ds) + + # create an indexer + indexer = Indexer(name="sample-indexer", data_source_name="indexer-datasource", target_index_name="hotels") + result = indexers_client.create_indexer(indexer) + print("Create new Indexer - sample-indexer") + # [END create_indexer] + +def list_indexers(): + # [START list_indexer] + result = indexers_client.get_indexers() + names = [x.name for x in result] + print("Found {} Indexers in the service: {}".format(len(result), ", ".join(names))) + # [END list_indexer] + +def get_indexer(): + # [START get_indexer] + result = indexers_client.get_indexer("sample-indexer") + print("Retrived Indexer 'sample-indexer'") + return result + # [END get_indexer] + +def get_indexer_status(): + # [START get_indexer_status] + result = indexers_client.get_indexer_status("sample-indexer") + print("Retrived Indexer status for 'sample-indexer'") + return result + # [END get_indexer_status] + +def run_indexer(): + # [START run_indexer] + result = indexers_client.run_indexer("sample-indexer") + print("Ran the Indexer 'sample-indexer'") + return result + # [END run_indexer] + +def reset_indexer(): + # [START reset_indexer] + result = indexers_client.reset_indexer("sample-indexer") + print("Reset the Indexer 'sample-indexer'") + return result + # [END reset_indexer] + +def delete_indexer(): + # [START delete_indexer] + indexers_client.delete_indexer("sample-indexer") + print("Indexer 'sample-indexer' successfully deleted") + # [END delete_indexer] + +if __name__ == '__main__': + # create_indexer() + # list_indexers() + # get_indexer() + # get_indexer_status() + # run_indexer() + # reset_indexer() + # delete_indexer()