Skip to content

Commit

Permalink
Format all working *.py and *.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
ehsanmok committed Feb 11, 2021
1 parent a7f2836 commit 21631fa
Show file tree
Hide file tree
Showing 200 changed files with 16,361 additions and 16,815 deletions.
4 changes: 2 additions & 2 deletions 00_quickstart/01_Setup_Dependencies.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"metadata": {},
"outputs": [],
"source": [
"!conda install -y pytorch==1.6.0 -c pytorch "
"!conda install -y pytorch==1.6.0 -c pytorch"
]
},
{
Expand Down Expand Up @@ -260,7 +260,7 @@
"metadata": {},
"outputs": [],
"source": [
"setup_dependencies_passed=True"
"setup_dependencies_passed = True"
]
},
{
Expand Down
24 changes: 15 additions & 9 deletions 00_quickstart/02_Copy_TSV_To_S3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@
"import sagemaker\n",
"import pandas as pd\n",
"\n",
"sess = sagemaker.Session()\n",
"sess = sagemaker.Session()\n",
"bucket = sess.default_bucket()\n",
"role = sagemaker.get_execution_role()\n",
"region = boto3.Session().region_name\n",
"account_id = boto3.client('sts').get_caller_identity().get('Account')\n",
"account_id = boto3.client(\"sts\").get_caller_identity().get(\"Account\")\n",
"\n",
"sm = boto3.Session().client(service_name='sagemaker', region_name=region)"
"sm = boto3.Session().client(service_name=\"sagemaker\", region_name=region)"
]
},
{
Expand All @@ -99,9 +99,9 @@
"try:\n",
" setup_dependencies_passed\n",
"except NameError:\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++')\n",
" print('[ERROR] YOU HAVE TO RUN ALL PREVIOUS NOTEBOOKS')\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++')"
" print(\"++++++++++++++++++++++++++++++++++++++++++++++\")\n",
" print(\"[ERROR] YOU HAVE TO RUN ALL PREVIOUS NOTEBOOKS\")\n",
" print(\"++++++++++++++++++++++++++++++++++++++++++++++\")"
]
},
{
Expand All @@ -126,7 +126,7 @@
"metadata": {},
"outputs": [],
"source": [
"s3_public_path_tsv = 's3://amazon-reviews-pds/tsv'"
"s3_public_path_tsv = \"s3://amazon-reviews-pds/tsv\""
]
},
{
Expand All @@ -151,7 +151,7 @@
"metadata": {},
"outputs": [],
"source": [
"s3_private_path_tsv = 's3://{}/amazon-reviews-pds/tsv'.format(bucket)\n",
"s3_private_path_tsv = \"s3://{}/amazon-reviews-pds/tsv\".format(bucket)\n",
"print(s3_private_path_tsv)"
]
},
Expand Down Expand Up @@ -223,7 +223,13 @@
"source": [
"from IPython.core.display import display, HTML\n",
"\n",
"display(HTML('<b>Review <a target=\"blank\" href=\"https://s3.console.aws.amazon.com/s3/buckets/sagemaker-{}-{}/amazon-reviews-pds/?region={}&tab=overview\">S3 Bucket</a></b>'.format(region, account_id, region)))\n"
"display(\n",
" HTML(\n",
" '<b>Review <a target=\"blank\" href=\"https://s3.console.aws.amazon.com/s3/buckets/sagemaker-{}-{}/amazon-reviews-pds/?region={}&tab=overview\">S3 Bucket</a></b>'.format(\n",
" region, account_id, region\n",
" )\n",
" )\n",
")"
]
},
{
Expand Down
27 changes: 14 additions & 13 deletions 00_quickstart/03_Create_Athena_Database.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"import boto3\n",
"import sagemaker\n",
"\n",
"sess = sagemaker.Session()\n",
"sess = sagemaker.Session()\n",
"bucket = sess.default_bucket()\n",
"role = sagemaker.get_execution_role()\n",
"region = boto3.Session().region_name"
Expand Down Expand Up @@ -62,10 +62,10 @@
"try:\n",
" s3_public_path_tsv\n",
"except NameError:\n",
" print('*****************************************************************************')\n",
" print('[ERROR] PLEASE RE-RUN THE PREVIOUS COPY TSV TO S3 NOTEBOOK ******************')\n",
" print('[ERROR] THIS NOTEBOOK WILL NOT RUN PROPERLY. ********************************')\n",
" print('*****************************************************************************')"
" print(\"*****************************************************************************\")\n",
" print(\"[ERROR] PLEASE RE-RUN THE PREVIOUS COPY TSV TO S3 NOTEBOOK ******************\")\n",
" print(\"[ERROR] THIS NOTEBOOK WILL NOT RUN PROPERLY. ********************************\")\n",
" print(\"*****************************************************************************\")"
]
},
{
Expand Down Expand Up @@ -95,10 +95,10 @@
"try:\n",
" s3_private_path_tsv\n",
"except NameError:\n",
" print('*****************************************************************************')\n",
" print('[ERROR] PLEASE RE-RUN THE PREVIOUS COPY TSV TO S3 NOTEBOOK ******************')\n",
" print('[ERROR] THIS NOTEBOOK WILL NOT RUN PROPERLY. ********************************')\n",
" print('*****************************************************************************')"
" print(\"*****************************************************************************\")\n",
" print(\"[ERROR] PLEASE RE-RUN THE PREVIOUS COPY TSV TO S3 NOTEBOOK ******************\")\n",
" print(\"[ERROR] THIS NOTEBOOK WILL NOT RUN PROPERLY. ********************************\")\n",
" print(\"*****************************************************************************\")"
]
},
{
Expand Down Expand Up @@ -141,7 +141,7 @@
"metadata": {},
"outputs": [],
"source": [
"database_name = 'dsoaws'"
"database_name = \"dsoaws\""
]
},
{
Expand All @@ -160,7 +160,7 @@
"outputs": [],
"source": [
"# Set S3 staging directory -- this is a temporary directory used for Athena queries\n",
"s3_staging_dir = 's3://{0}/athena/staging'.format(bucket)"
"s3_staging_dir = \"s3://{0}/athena/staging\".format(bucket)"
]
},
{
Expand All @@ -178,7 +178,7 @@
"metadata": {},
"outputs": [],
"source": [
"statement = 'CREATE DATABASE IF NOT EXISTS {}'.format(database_name)\n",
"statement = \"CREATE DATABASE IF NOT EXISTS {}\".format(database_name)\n",
"print(statement)"
]
},
Expand All @@ -189,6 +189,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"pd.read_sql(statement, conn)"
]
},
Expand All @@ -205,7 +206,7 @@
"metadata": {},
"outputs": [],
"source": [
"statement = 'SHOW DATABASES'\n",
"statement = \"SHOW DATABASES\"\n",
"\n",
"df_show = pd.read_sql(statement, conn)\n",
"df_show.head(5)"
Expand Down
59 changes: 35 additions & 24 deletions 00_quickstart/04_Register_S3_TSV_With_Athena.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"import boto3\n",
"import sagemaker\n",
"\n",
"sess = sagemaker.Session()\n",
"sess = sagemaker.Session()\n",
"bucket = sess.default_bucket()\n",
"role = sagemaker.get_execution_role()\n",
"region = boto3.Session().region_name"
Expand Down Expand Up @@ -64,9 +64,9 @@
"try:\n",
" ingest_create_athena_db_passed\n",
"except NameError:\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++')\n",
" print('[ERROR] YOU HAVE TO RUN ALL PREVIOUS NOTEBOOKS. You did not create the Athena Database.')\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++')"
" print(\"++++++++++++++++++++++++++++++++++++++++++++++\")\n",
" print(\"[ERROR] YOU HAVE TO RUN ALL PREVIOUS NOTEBOOKS. You did not create the Athena Database.\")\n",
" print(\"++++++++++++++++++++++++++++++++++++++++++++++\")"
]
},
{
Expand All @@ -85,11 +85,11 @@
"outputs": [],
"source": [
"if not ingest_create_athena_db_passed:\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++')\n",
" print('[ERROR] YOU HAVE TO RUN ALL PREVIOUS NOTEBOOKS. You did not create the Athena Database.')\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++')\n",
" print(\"++++++++++++++++++++++++++++++++++++++++++++++\")\n",
" print(\"[ERROR] YOU HAVE TO RUN ALL PREVIOUS NOTEBOOKS. You did not create the Athena Database.\")\n",
" print(\"++++++++++++++++++++++++++++++++++++++++++++++\")\n",
"else:\n",
" print('[OK]') "
" print(\"[OK]\")"
]
},
{
Expand All @@ -110,10 +110,10 @@
"try:\n",
" s3_private_path_tsv\n",
"except NameError:\n",
" print('*****************************************************************************')\n",
" print('[ERROR] PLEASE RE-RUN THE PREVIOUS COPY TSV TO S3 NOTEBOOK ******************')\n",
" print('[ERROR] THIS NOTEBOOK WILL NOT RUN PROPERLY. ********************************')\n",
" print('*****************************************************************************')"
" print(\"*****************************************************************************\")\n",
" print(\"[ERROR] PLEASE RE-RUN THE PREVIOUS COPY TSV TO S3 NOTEBOOK ******************\")\n",
" print(\"[ERROR] THIS NOTEBOOK WILL NOT RUN PROPERLY. ********************************\")\n",
" print(\"*****************************************************************************\")"
]
},
{
Expand Down Expand Up @@ -179,7 +179,7 @@
"outputs": [],
"source": [
"# Set S3 staging directory -- this is a temporary directory used for Athena queries\n",
"s3_staging_dir = 's3://{0}/athena/staging'.format(bucket)"
"s3_staging_dir = \"s3://{0}/athena/staging\".format(bucket)"
]
},
{
Expand All @@ -189,8 +189,8 @@
"outputs": [],
"source": [
"# Set Athena parameters\n",
"database_name = 'dsoaws'\n",
"table_name_tsv = 'amazon_reviews_tsv'"
"database_name = \"dsoaws\"\n",
"table_name_tsv = \"amazon_reviews_tsv\""
]
},
{
Expand Down Expand Up @@ -226,7 +226,9 @@
" review_body string,\n",
" review_date string\n",
") ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\\\t' LINES TERMINATED BY '\\\\n' LOCATION '{}'\n",
"TBLPROPERTIES ('compressionType'='gzip', 'skip.header.line.count'='1')\"\"\".format(database_name, table_name_tsv, s3_private_path_tsv)\n",
"TBLPROPERTIES ('compressionType'='gzip', 'skip.header.line.count'='1')\"\"\".format(\n",
" database_name, table_name_tsv, s3_private_path_tsv\n",
")\n",
"\n",
"print(statement)"
]
Expand All @@ -238,6 +240,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"pd.read_sql(statement, conn)"
]
},
Expand All @@ -254,7 +257,7 @@
"metadata": {},
"outputs": [],
"source": [
"statement = 'SHOW TABLES in {}'.format(database_name)\n",
"statement = \"SHOW TABLES in {}\".format(database_name)\n",
"\n",
"df_show = pd.read_sql(statement, conn)\n",
"df_show.head(5)"
Expand Down Expand Up @@ -292,10 +295,12 @@
"metadata": {},
"outputs": [],
"source": [
"product_category = 'Digital_Software'\n",
"product_category = \"Digital_Software\"\n",
"\n",
"statement = \"\"\"SELECT * FROM {}.{}\n",
" WHERE product_category = '{}' LIMIT 100\"\"\".format(database_name, table_name_tsv, product_category)\n",
" WHERE product_category = '{}' LIMIT 100\"\"\".format(\n",
" database_name, table_name_tsv, product_category\n",
")\n",
"\n",
"print(statement)"
]
Expand All @@ -317,11 +322,11 @@
"outputs": [],
"source": [
"if not df.empty:\n",
" print('[OK]')\n",
" print(\"[OK]\")\n",
"else:\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++++++++++')\n",
" print('[ERROR] YOUR DATA HAS NOT BEEN REGISTERED WITH ATHENA. LOOK IN PREVIOUS CELLS TO FIND THE ISSUE.')\n",
" print('++++++++++++++++++++++++++++++++++++++++++++++++++++++')"
" print(\"++++++++++++++++++++++++++++++++++++++++++++++++++++++\")\n",
" print(\"[ERROR] YOUR DATA HAS NOT BEEN REGISTERED WITH ATHENA. LOOK IN PREVIOUS CELLS TO FIND THE ISSUE.\")\n",
" print(\"++++++++++++++++++++++++++++++++++++++++++++++++++++++\")"
]
},
{
Expand All @@ -339,7 +344,13 @@
"source": [
"from IPython.core.display import display, HTML\n",
"\n",
"display(HTML('<b>Review <a target=\"top\" href=\"https://console.aws.amazon.com/glue/home?region={}#\">AWS Glue Catalog</a></b>'.format(region)))\n"
"display(\n",
" HTML(\n",
" '<b>Review <a target=\"top\" href=\"https://console.aws.amazon.com/glue/home?region={}#\">AWS Glue Catalog</a></b>'.format(\n",
" region\n",
" )\n",
" )\n",
")"
]
},
{
Expand Down
Loading

0 comments on commit 21631fa

Please sign in to comment.