diff --git a/.github/workflows/dashboard-deploy.yml b/.github/workflows/dashboard-deploy.yml index 08d102c..6a91d21 100644 --- a/.github/workflows/dashboard-deploy.yml +++ b/.github/workflows/dashboard-deploy.yml @@ -8,16 +8,16 @@ on: - .github/workflows/dashboard-deploy.yml env: - RESOURCE_GROUP: "ssatt-dev-rg" - COSMOS_ACCOUNT_NAME: "ebcbin5oofjcs" - COSMOS_DB_NAME: "database01" - COSMOS_GRAPH_COLLECTION: "graph01" - SEARCH_RESOURCE_GROUP: "demo-rg-01" - SEARCH_SERVICE_NAME: "ssattiraju-search-01" - AZURE_CONTAINER_REGISTRY: "ebcbin5oofjcs.azurecr.io" - SEARCH_INDEX: "cosmosdb-index" - STREAMLIT_SERVER_PORT: 8888 - STREAMLIT_SERVER_HEADLESS: "true" + RESOURCE_GROUP: "cosmosdb_ontologypoc_manish" + COSMOS_ACCOUNT_NAME: "ontologypoc" + COSMOS_DB_NAME: "ontology_nn" + COSMOS_GRAPH_COLLECTION: "graphnn7" + #SEARCH_RESOURCE_GROUP: "demo-rg-01" + #SEARCH_SERVICE_NAME: "ssattiraju-search-01" + AZURE_CONTAINER_REGISTRY: "ontologypoc" + #SEARCH_INDEX: "cosmosdb-index" + #STREAMLIT_SERVER_PORT: 8888 + #STREAMLIT_SERVER_HEADLESS: "true" jobs: dashboard-deploy: diff --git a/.github/workflows/infra-deploy.yml b/.github/workflows/infra-deploy.yml index 38985a7..9523849 100644 --- a/.github/workflows/infra-deploy.yml +++ b/.github/workflows/infra-deploy.yml @@ -8,8 +8,8 @@ on: - .github/workflows/infra-deploy.yml env: - AZURE_SUBSCRIPTION_ID: "7c1d967f-37f1-4047-bef7-05af9aa80fe2" - AZURE_LOCATION: "southeastasia" + AZURE_SUBSCRIPTION_ID: "c58a65e8-e2c6-4779-948c-29fdccfd9eba" + AZURE_LOCATION: "East US 2" jobs: validation: diff --git a/load_data/Load_Airportsgraphdatasample.ipynb b/load_data/Load_Airportsgraphdatasample.ipynb index e6350ad..582c531 100644 --- a/load_data/Load_Airportsgraphdatasample.ipynb +++ b/load_data/Load_Airportsgraphdatasample.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cc14dfef-7e6f-4167-99c4-2d52f66cdfdc","showTitle":false,"title":""}},"outputs":[],"source":["import os\n","import uuid\n","from array import array\n","from pyspark.sql import DataFrame\n","import pyspark.sql.functions as f\n","from pyspark.sql.types import StringType,BooleanType,StructType,StructField,IntegerType, DecimalType\n","from pyspark.sql.functions import lit\n","from decimal import Decimal\n","\n","f_uuid = f.udf(lambda: str(uuid.uuid4()), StringType())\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"f95f7542-927d-4be8-a85d-c1d5413f5ca1","showTitle":false,"title":""}},"outputs":[],"source":["cosmosEndpoint = \"https://xxxxxx.documents.azure.com:443/\"\n","cosmosMasterKey = \"*******\"\n","cosmosDatabaseName = \"*******\"\n","cosmosContainerName = \"*******\"\n","\n","cfg = {\n"," \"spark.cosmos.accountEndpoint\" : cosmosEndpoint,\n"," \"spark.cosmos.accountKey\" : cosmosMasterKey,\n"," \"spark.cosmos.database\" : cosmosDatabaseName,\n"," \"spark.cosmos.container\" : cosmosContainerName,\n","}\n","# Configure Catalog Api to be used\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog\", \"com.azure.cosmos.spark.CosmosCatalog\")\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountEndpoint\", cosmosEndpoint)\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountKey\", cosmosMasterKey)\n","spark.conf.set(\"spark.cosmos.throughputControl.enabled\",True)\n","spark.conf.set(\"spark.cosmos.throughputControl.targetThroughput\",20000)\n","\n","def write_to_cosmos_graph(df: DataFrame):\n"," \n"," df.write\\\n"," .format(\"cosmos.oltp\")\\\n"," .options(**cfg)\\\n"," .mode(\"Append\")\\\n"," .save()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"2ff7cc9a-f1fd-46f8-bb1d-df35b0a0bbd0","showTitle":false,"title":""}},"outputs":[],"source":["def create_vertex_df(\n"," df: DataFrame,\n"," vertex_properties_col_name: list, partition_col: str,\n"," vertex_label: str,id: str\n","):\n"," columns = [id, partition_col,\"label\"]\n"," columns.extend(['nvl2({x}, array(named_struct(\"id\", uuid(), \"_value\", {x})), NULL) AS {x}'.format(x=x) for x in vertex_properties_col_name])\n"," if \"label\" in df.columns:\n"," df=df.withColumn(\"label\",df[vertex_label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(vertex_label))\n"," \n"," return df.selectExpr(*columns).withColumnRenamed(id,\"id\")\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bbcae6a8-9f97-4af2-b282-642670db3fdf","showTitle":false,"title":""}},"outputs":[],"source":["def create_edge_df(srcdf: DataFrame, destdf: DataFrame, label: str, partition_col: str, \n"," vertexidcol: str, sinkcol: str, sinklabel: str, vertexlabel: str, sinkpartitioncol: str,srcjoincol: str,destjoincol: str,isedgetable: bool):\n"," if(isedgetable):\n"," #we have edge table\n"," if(sinklabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",srcdf[sinklabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",f.lit(sinklabel))\n"," if(vertexlabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",srcdf[vertexlabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",f.lit(vertexlabel))\n"," srcdf=srcdf.selectExpr(\"_sinkLabel\",\"_vertexLabel\",srcjoincol,partition_col)\n"," destdf=destdf.selectExpr(label,destjoincol,vertexidcol,sinkcol,sinkpartitioncol)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," if(\"label\" in df.columns):\n"," df=df.withColumn(\"label\",df[label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(label))\n"," df=df.withColumn(\"_sink\",df[sinkcol]).withColumn(\"_sinkPartition\",df[sinkpartitioncol]).withColumn(\"_vertexId\",df[vertexidcol])\\\n"," .withColumn(\"id\",f_uuid()).withColumn(\"_isEdge\",f.lit(True))\n"," else:\n"," destdf=destdf.withColumn(\"_sink\",destdf[sinkcol]).withColumn(\"_sinkPartition\",destdf[sinkpartitioncol]).select(destjoincol,\"_sink\",\"_sinkPartition\")\n"," srcdf=srcdf.withColumn(\"_vertexId\",srcdf[vertexidcol]).select(srcjoincol,\"_vertexId\",partition_col)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," df=df.withColumn(\"label\",f.lit(label)).withColumn(\"id\",f_uuid()).withColumn(\"_sinkLabel\",f.lit(sinklabel))\\\n"," .withColumn(\"_vertexLabel\",f.lit(vertexlabel)).withColumn(\"_isEdge\",f.lit(True))\n"," \n"," columns=[\"label\",\"_sink\",\"_sinkLabel\",\"_vertexId\",\"_vertexLabel\",\"_isEdge\",\"_sinkPartition\",partition_col,\"id\"]\n"," return df.selectExpr(*columns)\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"23bc9460-9911-4b8e-ba06-7b2ace9234e5","showTitle":false,"title":""}},"outputs":[],"source":["#vertex_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-nodes.csv\"))\n","\n","airroutes=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"code:string\",\"code\")\\\n"," .withColumnRenamed(\"desc:string\",\"desc\").withColumnRenamed(\"country:string\",\"country\").withColumnRenamed(\"city:string\",\"city\")\\\n"," .selectExpr(\"cast(srno as string) srno\",\"cast(id as string) id\",\"label\",\"code\",\"desc\",\"country\",\"city\")\n","\n","airroutes.show()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"70199341-695b-4361-8dbd-113e531061b0","showTitle":false,"title":""}},"outputs":[],"source":["#edges_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-edges.csv\"))\n","\n","airroutesedges=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"~from\",\"from\")\\\n"," .withColumnRenamed(\"~to\",\"to\").withColumnRenamed(\"dist:int\",\"dist\")\\\n"," .selectExpr(\"id\",\"cast(from as string) from\",\"cast(to as string) to\",\"label\",\"dist\",\"srno\")\n","\n","airroutesedges.show()\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cf8cd1dc-04b0-49fd-8bf0-9b27b0a3bf94","showTitle":false,"title":""}},"outputs":[],"source":["#Vertex\n","vertex_airroutes = create_vertex_df(\n"," df=airroutes,\n"," vertex_properties_col_name=[\"code\",\"desc\",\"country\",\"code\"],\n"," vertex_label = \"label\",id=\"id\",partition_col=\"srno\"\n",")\n","\n","vertex_airroutes.display()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"83670e9e-a63b-4988-b547-a312ab133a2e","showTitle":false,"title":""}},"outputs":[],"source":["edges_airroutes=create_edge_df(airroutes,airroutesedges,\"label\",\"srno\",\"from\",\"to\",\"label\",\"label\",\"to\",\"srno\",\"from\",True)\n","\n","edges_airroutes.schema\n","\n","#edges_airroutes.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"0db5fcf3-822f-4313-8782-9ee3eceddf66","showTitle":false,"title":""}},"outputs":[],"source":["#Write Vertex\n","write_to_cosmos_graph(vertex_airroutes)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"28a54f54-d08e-459e-ae60-6efdf74bcc23","showTitle":false,"title":""}},"outputs":[],"source":["#Write Edges\n","write_to_cosmos_graph(edges_airroutes)"]}],"metadata":{"application/vnd.databricks.v1+notebook":{"dashboards":[],"language":"python","notebookMetadata":{"pythonIndentUnit":2},"notebookName":"Airportsgraphdatasample","notebookOrigID":2336516133702252,"widgets":{}},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} +{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cc14dfef-7e6f-4167-99c4-2d52f66cdfdc","showTitle":false,"title":""}},"outputs":[],"source":["import os\n","import uuid\n","from array import array\n","from pyspark.sql import DataFrame\n","import pyspark.sql.functions as f\n","from pyspark.sql.types import StringType,BooleanType,StructType,StructField,IntegerType, DecimalType\n","from pyspark.sql.functions import lit\n","from decimal import Decimal\n","\n","f_uuid = f.udf(lambda: str(uuid.uuid4()), StringType())\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"f95f7542-927d-4be8-a85d-c1d5413f5ca1","showTitle":false,"title":""}},"outputs":[],"source":["cosmosEndpoint = \"wss://ontologypoc.gremlin.cosmos.azure.com:443/\"\n","cosmosMasterKey = \"RcyO8fytL4FX7s8Lo9ZejRhvLwXLjN0Kp9GCUHXKTeyBLBuwrAPoAfDDBLPuoEh0jrqBMtBXbCw4ACDblJYPqg==\"\n","cosmosDatabaseName = \"ontology_nn\"\n","cosmosContainerName = \"graphnn7\"\n","\n","cfg = {\n"," \"spark.cosmos.accountEndpoint\" : cosmosEndpoint,\n"," \"spark.cosmos.accountKey\" : cosmosMasterKey,\n"," \"spark.cosmos.database\" : cosmosDatabaseName,\n"," \"spark.cosmos.container\" : cosmosContainerName,\n","}\n","# Configure Catalog Api to be used\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog\", \"com.azure.cosmos.spark.CosmosCatalog\")\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountEndpoint\", cosmosEndpoint)\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountKey\", cosmosMasterKey)\n","spark.conf.set(\"spark.cosmos.throughputControl.enabled\",True)\n","spark.conf.set(\"spark.cosmos.throughputControl.targetThroughput\",20000)\n","\n","def write_to_cosmos_graph(df: DataFrame):\n"," \n"," df.write\\\n"," .format(\"cosmos.oltp\")\\\n"," .options(**cfg)\\\n"," .mode(\"Append\")\\\n"," .save()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"2ff7cc9a-f1fd-46f8-bb1d-df35b0a0bbd0","showTitle":false,"title":""}},"outputs":[],"source":["def create_vertex_df(\n"," df: DataFrame,\n"," vertex_properties_col_name: list, partition_col: str,\n"," vertex_label: str,id: str\n","):\n"," columns = [id, partition_col,\"label\"]\n"," columns.extend(['nvl2({x}, array(named_struct(\"id\", uuid(), \"_value\", {x})), NULL) AS {x}'.format(x=x) for x in vertex_properties_col_name])\n"," if \"label\" in df.columns:\n"," df=df.withColumn(\"label\",df[vertex_label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(vertex_label))\n"," \n"," return df.selectExpr(*columns).withColumnRenamed(id,\"id\")\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bbcae6a8-9f97-4af2-b282-642670db3fdf","showTitle":false,"title":""}},"outputs":[],"source":["def create_edge_df(srcdf: DataFrame, destdf: DataFrame, label: str, partition_col: str, \n"," vertexidcol: str, sinkcol: str, sinklabel: str, vertexlabel: str, sinkpartitioncol: str,srcjoincol: str,destjoincol: str,isedgetable: bool):\n"," if(isedgetable):\n"," #we have edge table\n"," if(sinklabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",srcdf[sinklabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",f.lit(sinklabel))\n"," if(vertexlabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",srcdf[vertexlabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",f.lit(vertexlabel))\n"," srcdf=srcdf.selectExpr(\"_sinkLabel\",\"_vertexLabel\",srcjoincol,partition_col)\n"," destdf=destdf.selectExpr(label,destjoincol,vertexidcol,sinkcol,sinkpartitioncol)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," if(\"label\" in df.columns):\n"," df=df.withColumn(\"label\",df[label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(label))\n"," df=df.withColumn(\"_sink\",df[sinkcol]).withColumn(\"_sinkPartition\",df[sinkpartitioncol]).withColumn(\"_vertexId\",df[vertexidcol])\\\n"," .withColumn(\"id\",f_uuid()).withColumn(\"_isEdge\",f.lit(True))\n"," else:\n"," destdf=destdf.withColumn(\"_sink\",destdf[sinkcol]).withColumn(\"_sinkPartition\",destdf[sinkpartitioncol]).select(destjoincol,\"_sink\",\"_sinkPartition\")\n"," srcdf=srcdf.withColumn(\"_vertexId\",srcdf[vertexidcol]).select(srcjoincol,\"_vertexId\",partition_col)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," df=df.withColumn(\"label\",f.lit(label)).withColumn(\"id\",f_uuid()).withColumn(\"_sinkLabel\",f.lit(sinklabel))\\\n"," .withColumn(\"_vertexLabel\",f.lit(vertexlabel)).withColumn(\"_isEdge\",f.lit(True))\n"," \n"," columns=[\"label\",\"_sink\",\"_sinkLabel\",\"_vertexId\",\"_vertexLabel\",\"_isEdge\",\"_sinkPartition\",partition_col,\"id\"]\n"," return df.selectExpr(*columns)\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"23bc9460-9911-4b8e-ba06-7b2ace9234e5","showTitle":false,"title":""}},"outputs":[],"source":["#vertex_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-nodes.csv\"))\n","\n","airroutes=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"code:string\",\"code\")\\\n"," .withColumnRenamed(\"desc:string\",\"desc\").withColumnRenamed(\"country:string\",\"country\").withColumnRenamed(\"city:string\",\"city\")\\\n"," .selectExpr(\"cast(srno as string) srno\",\"cast(id as string) id\",\"label\",\"code\",\"desc\",\"country\",\"city\")\n","\n","airroutes.show()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"70199341-695b-4361-8dbd-113e531061b0","showTitle":false,"title":""}},"outputs":[],"source":["#edges_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-edges.csv\"))\n","\n","airroutesedges=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"~from\",\"from\")\\\n"," .withColumnRenamed(\"~to\",\"to\").withColumnRenamed(\"dist:int\",\"dist\")\\\n"," .selectExpr(\"id\",\"cast(from as string) from\",\"cast(to as string) to\",\"label\",\"dist\",\"srno\")\n","\n","airroutesedges.show()\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cf8cd1dc-04b0-49fd-8bf0-9b27b0a3bf94","showTitle":false,"title":""}},"outputs":[],"source":["#Vertex\n","vertex_airroutes = create_vertex_df(\n"," df=airroutes,\n"," vertex_properties_col_name=[\"code\",\"desc\",\"country\",\"code\"],\n"," vertex_label = \"label\",id=\"id\",partition_col=\"srno\"\n",")\n","\n","vertex_airroutes.display()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"83670e9e-a63b-4988-b547-a312ab133a2e","showTitle":false,"title":""}},"outputs":[],"source":["edges_airroutes=create_edge_df(airroutes,airroutesedges,\"label\",\"srno\",\"from\",\"to\",\"label\",\"label\",\"to\",\"srno\",\"from\",True)\n","\n","edges_airroutes.schema\n","\n","#edges_airroutes.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"0db5fcf3-822f-4313-8782-9ee3eceddf66","showTitle":false,"title":""}},"outputs":[],"source":["#Write Vertex\n","write_to_cosmos_graph(vertex_airroutes)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"28a54f54-d08e-459e-ae60-6efdf74bcc23","showTitle":false,"title":""}},"outputs":[],"source":["#Write Edges\n","write_to_cosmos_graph(edges_airroutes)"]}],"metadata":{"application/vnd.databricks.v1+notebook":{"dashboards":[],"language":"python","notebookMetadata":{"pythonIndentUnit":2},"notebookName":"Airportsgraphdatasample","notebookOrigID":2336516133702252,"widgets":{}},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index 8408e1b..b495164 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -22,10 +22,10 @@ "cell_type": "code", "source": [ "# update the values from your infra\r\n", - "cosmosEndpoint = \"https://cosmos-7w6s3xqb4piyi.documents.azure.com:443/\"\r\n", - "cosmosMasterKey = \"cosmos-key\" \r\n", - "cosmosDatabaseName = \"database01\"\r\n", - "cosmosContainerName = \"graph01\"" + "cosmosEndpoint = \"wss://ontologypoc.gremlin.cosmos.azure.com:443/\"\r\n", + "cosmosMasterKey = \"RcyO8fytL4FX7s8Lo9ZejRhvLwXLjN0Kp9GCUHXKTeyBLBuwrAPoAfDDBLPuoEh0jrqBMtBXbCw4ACDblJYPqg==\" \r\n", + "cosmosDatabaseName = \"ontology_nn\"\r\n", + "cosmosContainerName = \"graphnn7\"" ], "outputs": [], "execution_count": null, @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('abfss://data@7w6s3xqb4piyi.dfs.core.windows.net/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", + "df = spark.read.load('https://owl2jsonmanish.blob.core.windows.net/cosmosdbgp/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], @@ -297,4 +297,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/load_data/data/PS_20174392719_1491204439457_log.csv b/load_data/data/PS_20174392719_14912041439457_log.csv similarity index 99% rename from load_data/data/PS_20174392719_1491204439457_log.csv rename to load_data/data/PS_20174392719_14912041439457_log.csv index 1a253be..5290a04 100644 --- a/load_data/data/PS_20174392719_1491204439457_log.csv +++ b/load_data/data/PS_20174392719_14912041439457_log.csv @@ -1,3 +1,4 @@ version https://git-lfs.github.com/spec/v1 oid sha256:16910f90577b0d981bf8ff289714510bb89bc71bff7d3f220f024e287e4eea6b size 493534783 + diff --git a/visualize/app.py b/visualize/app.py index 01590b7..a824e1d 100644 --- a/visualize/app.py +++ b/visualize/app.py @@ -11,27 +11,27 @@ # Load environment variables from .env file load_dotenv() -cosmos_database = os.getenv("COSMOS_DATABASE") -cosmos_graph_collection = os.getenv("COSMOS_GRAPH_COLLECTION") -cosmos_key = os.getenv("COSMOS_KEY") -cosmos_endpoint = os.getenv("COSMOS_ENDPOINT") -search_key = os.getenv("SEARCH_KEY") -search_index = os.getenv("SEARCH_INDEX") -search_endpoint = os.getenv("SEARCH_ENDPOINT") +cosmos_database = os.getenv("ontology_nn") +cosmos_graph_collection = os.getenv("graphnn7") +cosmos_key = os.getenv("RcyO8fytL4FX7s8Lo9ZejRhvLwXLjN0Kp9GCUHXKTeyBLBuwrAPoAfDDBLPuoEh0jrqBMtBXbCw4ACDblJYPqg==") +cosmos_endpoint = os.getenv("wss://ontologypoc.gremlin.cosmos.azure.com:443/") +# search_key = os.getenv("SEARCH_KEY") +# search_index = os.getenv("SEARCH_INDEX") +# search_endpoint = os.getenv("SEARCH_ENDPOINT") class Transaction: def __init__(self) -> None: # Limit the max number of results returned by a raw gremlin query to avoid excess RU's and timeouts self.GREMLIN_QUERY_LIMIT = 100 - self.credential = AzureKeyCredential(search_key) + # self.credential = AzureKeyCredential(search_key) # Create cognitive search client - self.search_client = SearchClient( - endpoint=search_endpoint, - index_name=search_index, - credential=self.credential, - ) + # self.search_client = SearchClient( + # endpoint=search_endpoint, + # index_name=search_index, + # credential=self.credential, + # ) # Create cosmos client self.cql = client.Client( @@ -133,14 +133,14 @@ def execute_gremlin_query(self, query: str) -> None: st.error(e) # Execute Azure search to find accounts either sent or received - def execute_search(self, search_text: str, filter=None) -> None: - accountId_list = [] - response = self.search_client.search( - search_text=search_text, - include_total_count=True, - filter=filter, - search_fields=["sink", "vertexId"], - ) + # def execute_search(self, search_text: str, filter=None) -> None: + # accountId_list = [] + # response = self.search_client.search( + # search_text=search_text, + # include_total_count=True, + # filter=filter, + # search_fields=["sink", "vertexId"], + # ) for r in response: accountId_list.append(r["vertexId"]) accountId_list.append(r["sink"])