From 02845f4fd6d1d121f868af560de37bdeb8b8ecdf Mon Sep 17 00:00:00 2001 From: Roshanjoseph468 <163798475+Roshanjoseph468@users.noreply.github.com> Date: Tue, 5 Nov 2024 02:02:12 -0500 Subject: [PATCH] Revert "Add files via upload" --- json to csv.ipynb | 1 - 1 file changed, 1 deletion(-) delete mode 100644 json to csv.ipynb diff --git a/json to csv.ipynb b/json to csv.ipynb deleted file mode 100644 index 5757cd2..0000000 --- a/json to csv.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8930,"status":"ok","timestamp":1726186485507,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"},"user_tz":240},"id":"PygWqEtwbIvV","outputId":"ffd8329c-c3e8-4c3b-f4b1-adf026a0af09"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting pyspark\n"," Using cached pyspark-3.5.2-py2.py3-none-any.whl\n","Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n","Installing collected packages: pyspark\n","Successfully installed pyspark-3.5.2\n"]}],"source":["# install PySpark\n","!pip install pyspark"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2636,"status":"ok","timestamp":1726186508819,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"},"user_tz":240},"id":"QO8TWPuCiw6R","outputId":"be50dcb8-15aa-4d9c-9e2a-355b9c9db545"},"outputs":[{"name":"stdout","output_type":"stream","text":["Requirement already satisfied: openpyxl in /usr/local/lib/python3.10/dist-packages (3.1.5)\n","Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl) (1.1.0)\n"]}],"source":["# install openpyxl\n","!pip install openpyxl"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"vAIR0kaEbVW5"},"outputs":[],"source":["# importing necessary libraries\n","\n","from pyspark.sql import SparkSession\n","from pyspark.sql.functions import *\n","from pyspark.sql.types import *"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"J2mGteZRbc3V"},"outputs":[],"source":["# Spark session\n","spark = SparkSession.builder.appName(\"Read_JSON\").getOrCreate()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"4yGPoJYHZwXb"},"outputs":[],"source":["# Reading the json file\n","\n","def read_json(file_path):\n"," df = spark.read.json(file_path)\n"," # df.show(10)\n"," return df"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xvlcSXkVa_QE"},"outputs":[],"source":["# df.show()\n","# filepath = '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_tip.json' #'/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json' #'/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_checkin.json' #'/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_business.json'\n","# read_json(filepath)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"BfWduucPcbOa"},"outputs":[],"source":["# from pyspark.sql.functions import to_json\n","# from pyspark.sql.functions import col, to_json\n","\n","# # Convert the 'attributes' column to a JSON string\n","# df = df.withColumn(\"attributes\", to_json(col(\"attributes\")))\n","\n","# # Convert the 'hours' column to a JSON string\n","# df = df.withColumn(\"hours\", to_json(col(\"hours\")))\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QQY2yGQYcHs9"},"outputs":[],"source":["# df.show(5)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"3IgN0cCAcpy4"},"outputs":[],"source":["# write the dataframe as an excel file\n","\n","def write_As_xl(df, root_path, csvfilename):\n"," # spark = SparkSession.builder.appName(\"YourAppName\").getOrCreate()\n"," # Convert the DataFrame to Pandas DataFrame for easier writing to Excel\n"," pandas_df = df.toPandas()\n","\n"," # Write the Pandas DataFrame to an Excel file in Google Drive\n"," pandas_df.to_excel(csv_root_path + csvfilename + '.xlsx', index=False)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"MjGrkUJoc5kw"},"outputs":[],"source":["# fp = '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_tip.json'\n","\n","# p1 = fp.split('/')[-1].split('.')[0]\n","# print(p1)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":426},"executionInfo":{"elapsed":5683,"status":"error","timestamp":1726187212131,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"},"user_tz":240},"id":"05X1G2udjez4","outputId":"3603eb17-2e4c-4a09-9321-89224a268645"},"outputs":[{"name":"stdout","output_type":"stream","text":["Requirement already satisfied: pyarrow in /usr/local/lib/python3.10/dist-packages (14.0.2)\n","Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow) (1.26.4)\n","Requirement already satisfied: pyspark in /usr/local/lib/python3.10/dist-packages (3.5.2)\n","Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n"]},{"ename":"ConnectionRefusedError","evalue":"[Errno 111] Connection refused","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m# Create a SparkSession\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mspark\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappName\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"YourAppName\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetOrCreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m# Enable Arrow optimization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pyspark/sql/session.py\u001b[0m in \u001b[0;36mgetOrCreate\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 501\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 502\u001b[0m getattr(\n\u001b[0;32m--> 503\u001b[0;31m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"SparkSession$\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"MODULE$\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 504\u001b[0m ).applyModifiableSettings(session._jsparkSession, self._options)\n\u001b[1;32m 505\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1710\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mUserHelpAutoCompletion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1711\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1712\u001b[0;31m answer = self._gateway_client.send_command(\n\u001b[0m\u001b[1;32m 1713\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mREFLECTION_COMMAND_NAME\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1714\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mREFL_GET_UNKNOWN_SUB_COMMAND_NAME\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"\\n\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_id\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command, retry, binary)\u001b[0m\n\u001b[1;32m 1034\u001b[0m \u001b[0;32mif\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mbinary\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0;32mis\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1035\u001b[0m \"\"\"\n\u001b[0;32m-> 1036\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1037\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_get_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconnection\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_new_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_create_new_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_parameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython_parameters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m self.gateway_property, self)\n\u001b[0;32m--> 291\u001b[0;31m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect_to_java_server\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 292\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_thread_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36mconnect_to_java_server\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 436\u001b[0m self.socket = self.ssl_context.wrap_socket(\n\u001b[1;32m 437\u001b[0m self.socket, server_hostname=self.java_address)\n\u001b[0;32m--> 438\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_address\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_port\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 439\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmakefile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_connected\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused"]}],"source":["# main code\n","\n","file_paths = ['/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_tip.json',\n"," '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_checkin.json',\n"," '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_business.json']#,\n"," '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json',\n"," '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json']\n","\n","# reading an coverting each json file to csv\n","\n","for filepath in file_paths:\n","\n"," csv_root_path = '/content/drive/MyDrive/CSV/'\n","\n"," rt_df = read_json(filepath)\n","\n"," csvfilename = filepath.split('/')[-1].split('.')[0]\n","\n"," print(filepath, ' -----> ', csvfilename, '.xlsx')\n","\n"," write_As_xl(rt_df, csv_root_path, csvfilename)\n","\n","\n","\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3111,"status":"ok","timestamp":1726186965601,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"},"user_tz":240},"id":"rZI0NVxRmrWk","outputId":"56fd8965-6377-44c9-b4b3-9efa1824bb5a"},"outputs":[{"name":"stdout","output_type":"stream","text":["Requirement already satisfied: pyarrow in /usr/local/lib/python3.10/dist-packages (14.0.2)\n","Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow) (1.26.4)\n"]}],"source":["!pip install pyarrow"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":446},"id":"R0SMycOnwry1","executionInfo":{"status":"error","timestamp":1726196630545,"user_tz":240,"elapsed":6514,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"}},"outputId":"6c8c4468-9d88-45e7-8706-b8f1731aa6ab"},"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: pyarrow in /usr/local/lib/python3.10/dist-packages (14.0.2)\n","Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow) (1.26.4)\n","Requirement already satisfied: pyspark in /usr/local/lib/python3.10/dist-packages (3.5.2)\n","Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n"]},{"output_type":"error","ename":"ConnectionRefusedError","evalue":"[Errno 111] Connection refused","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m# Create a SparkSession\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mspark\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappName\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"YourAppName\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetOrCreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m# Enable Arrow optimization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pyspark/sql/session.py\u001b[0m in \u001b[0;36mgetOrCreate\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 501\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 502\u001b[0m getattr(\n\u001b[0;32m--> 503\u001b[0;31m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"SparkSession$\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"MODULE$\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 504\u001b[0m ).applyModifiableSettings(session._jsparkSession, self._options)\n\u001b[1;32m 505\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1710\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mUserHelpAutoCompletion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1711\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1712\u001b[0;31m answer = self._gateway_client.send_command(\n\u001b[0m\u001b[1;32m 1713\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mREFLECTION_COMMAND_NAME\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1714\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mREFL_GET_UNKNOWN_SUB_COMMAND_NAME\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"\\n\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_id\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command, retry, binary)\u001b[0m\n\u001b[1;32m 1034\u001b[0m \u001b[0;32mif\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mbinary\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0;32mis\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1035\u001b[0m \"\"\"\n\u001b[0;32m-> 1036\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1037\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_get_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconnection\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_new_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_create_new_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_parameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython_parameters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m self.gateway_property, self)\n\u001b[0;32m--> 291\u001b[0;31m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect_to_java_server\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 292\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_thread_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36mconnect_to_java_server\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 436\u001b[0m self.socket = self.ssl_context.wrap_socket(\n\u001b[1;32m 437\u001b[0m self.socket, server_hostname=self.java_address)\n\u001b[0;32m--> 438\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_address\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_port\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 439\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmakefile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_connected\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused"]}],"source":["# Install necessary packages\n","!pip install pyarrow\n","!pip install pyspark\n","\n","# Import necessary libraries\n","import pyarrow\n","from pyspark.sql import SparkSession\n","\n","# Create a SparkSession\n","spark = SparkSession.builder.appName(\"YourAppName\").getOrCreate()\n","\n","# Enable Arrow optimization\n","# spark.conf.set(\"spark.sql.execution.arrow.pyspark.enabled\", \"true\")\n","\n","# file_paths = ['/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json', '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json']\n","\n","df = spark.read.json('/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json')\n","\n","df"]},{"cell_type":"code","source":["# write this as a csv\n","\n","# Define the output path for the CSV file\n","output_path = '/content/drive/MyDrive/CSV/yelp_academic_dataset_user.csv'\n","\n","# Write the DataFrame to a CSV file\n","df.write.csv(output_path, header=True)\n"],"metadata":{"id":"7131CBiPVL9C"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["df.show(10)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PR2Lx3cuRRcs","executionInfo":{"status":"ok","timestamp":1726195538451,"user_tz":240,"elapsed":2482,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"}},"outputId":"a87bae05-ea9a-4861-c0a8-7a891611eae4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["+-------------+---------------+---------------+----------------+--------------+---------------+---------------+---------------+-----------------+----------------+------------------+-----------------+-----+--------------------+----+--------------------+-----+--------+------------+------+--------------------+-------------------+\n","|average_stars|compliment_cool|compliment_cute|compliment_funny|compliment_hot|compliment_list|compliment_more|compliment_note|compliment_photos|compliment_plain|compliment_profile|compliment_writer| cool| elite|fans| friends|funny| name|review_count|useful| user_id| yelping_since|\n","+-------------+---------------+---------------+----------------+--------------+---------------+---------------+---------------+-----------------+----------------+------------------+-----------------+-----+--------------------+----+--------------------+-----+--------+------------+------+--------------------+-------------------+\n","| 3.91| 467| 56| 467| 250| 18| 65| 232| 180| 844| 55| 239| 5994| 2007| 267|NSCy54eWehBJyZdG2...| 1259| Walker| 585| 7217|qVc8ODYU5SZjKXVBg...|2007-01-25 16:47:26|\n","| 3.74| 3131| 157| 3131| 1145| 251| 264| 1847| 1946| 7054| 184| 1521|27281|2009,2010,2011,20...|3138|ueRPE0CX75ePGMqOF...|13066| Daniel| 4333| 43091|j14WgRoU_-2ZE1aw1...|2009-01-25 04:35:42|\n","| 3.32| 119| 17| 119| 89| 3| 13| 66| 18| 96| 10| 35| 1003|2009,2010,2011,20...| 52|LuO3Bn4f3rlhyHIaN...| 1010| Steph| 665| 2086|2WnXYQFK0hXEoTxPt...|2008-07-25 10:41:00|\n","| 4.27| 26| 6| 26| 24| 2| 4| 12| 9| 16| 1| 10| 299| 2009,2010,2011| 28|enx1vVPnfdNUdPho6...| 330| Gwen| 224| 512|SZDeASXq7o05mMNLs...|2005-11-29 04:38:33|\n","| 3.54| 0| 0| 0| 1| 0| 1| 1| 0| 1| 0| 0| 7| | 1|PBK4q9KEEBHhFvSXC...| 15| Karen| 79| 29|hA5lMy-EnncsH4JoR...|2007-01-05 19:40:59|\n","| 3.85| 2543| 361| 2543| 1713| 147| 163| 1212| 323| 5696| 191| 815|11211|2006,2007,2008,20...|1357|xBDpTUbai0DXrvxCe...| 9940| Jane| 1221| 14953|q_QQ5kBBwlCcbL1s4...|2005-03-14 20:26:35|\n","| 2.75| 0| 0| 0| 0| 0| 0| 0| 0| 1| 0| 0| 0| | 1|HDAQ74AEznP-YsMk1...| 1| Rob| 12| 6|cxuxXkcihfCbqt5By...|2009-02-24 03:09:06|\n","| 3.73| 12| 0| 12| 4| 0| 7| 8| 0| 6| 2| 5| 143| | 23|y2GyxJF5VQWohxgw_...| 102| Mike| 358| 399|E9kcWJdJUHuTKfQur...|2008-12-11 22:11:56|\n","| 4.04| 5| 3| 5| 2| 0| 0| 3| 1| 4| 0| 3| 46| | 7|tOQDlz36rI__SOsbL...| 40|Rachelle| 40| 109|lO1iq-f75hnPNZkTy...|2008-12-29 22:40:56|\n","| 3.4| 3| 0| 3| 0| 0| 0| 1| 0| 6| 0| 0| 23| | 4|gy5fWeSv3Gamuq9Ox...| 20| John| 109| 154|AUi8MPWJ0mLkMfwbu...|2010-01-07 18:32:04|\n","+-------------+---------------+---------------+----------------+--------------+---------------+---------------+---------------+-----------------+----------------+------------------+-----------------+-----+--------------------+----+--------------------+-----+--------+------------+------+--------------------+-------------------+\n","only showing top 10 rows\n","\n"]}]},{"cell_type":"code","source":["# file_paths = ['/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json', '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json']\n","\n","df2 = spark.read.json('/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json')\n","df2.show()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"yQ1lV_GaRV82","executionInfo":{"status":"ok","timestamp":1726195633013,"user_tz":240,"elapsed":69880,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"}},"outputId":"d6fbbd6f-77a9-4b2f-e758-aaafee6cafbd"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["+--------------------+----+-------------------+-----+--------------------+-----+--------------------+------+--------------------+\n","| business_id|cool| date|funny| review_id|stars| text|useful| user_id|\n","+--------------------+----+-------------------+-----+--------------------+-----+--------------------+------+--------------------+\n","|XQfwVwDr-v0ZS3_Cb...| 0|2018-07-07 22:09:11| 0|KU_O5udG6zpxOg-Vc...| 3.0|If you decide to ...| 0|mh_-eMZ6K5RLWhZyI...|\n","|7ATYjTIgM3jUlt4UM...| 1|2012-01-03 15:28:18| 0|BiTunyQ73aT9WBnpR...| 5.0|I've taken a lot ...| 1|OyoGAe7OKpv6SyGZT...|\n","|YjUWPpI6HXG530lwP...| 0|2014-02-05 20:30:30| 0|saUsX_uimxRlCVr67...| 3.0|Family diner. Had...| 0|8g_iMtfSiwikVnbP2...|\n","|kxX2SOes4o-D3ZQBk...| 1|2015-01-04 00:01:03| 0|AqPFMleE6RsU23_au...| 5.0|Wow! Yummy, diff...| 1|_7bHUi9Uuf5__HHc_...|\n","|e4Vwtrqf-wpJfwesg...| 1|2017-01-14 20:54:15| 0|Sx8TMOWLNuJBWer-0...| 4.0|Cute interior and...| 1|bcjbaE6dDog4jkNY9...|\n","|04UD14gamNjLY0IDY...| 1|2015-09-23 23:10:31| 2|JrIxlS1TzJ-iCu79u...| 1.0|I am a long term ...| 1|eUta8W_HdHMXPzLBB...|\n","|gmjsEdUsKpj9Xxu6p...| 0|2015-01-03 23:21:18| 2|6AxgBCNX_PNTOxmbR...| 5.0|Loved this tour! ...| 0|r3zeYsv1XFBRA4dJp...|\n","|LHSTtnW3YHCeUkRDG...| 0|2015-08-07 02:29:16| 0|_ZeMknuYdlQcUqng_...| 5.0|Amazingly amazing...| 2|yfFzsLmaWF2d4Sr0U...|\n","|B5XSoSG3SfvQGtKEG...| 0|2016-03-30 22:46:33| 1|ZKvDG2sBvHVdF5oBN...| 3.0|This easter inste...| 1|wSTuiTk-sKNdcFypr...|\n","|gebiRewfieSdtt17P...| 0|2016-07-25 07:31:06| 0|pUycOfUwM8vqX7KjR...| 3.0|Had a party of 6 ...| 0|59MxRhNVhU9MYndMk...|\n","|uMvVYRgGNXf5boolA...| 0|2015-06-21 14:48:06| 0|rGQRf8UafX7OTlMNN...| 5.0|My experience wit...| 2|1WHRWwQmZOZDAhp2Q...|\n","|EQ-TZ2eeD_E0BHuvo...| 0|2015-08-19 14:31:45| 0|l3Wk_mvAog6XANIuG...| 4.0|Locals recommende...| 0|ZbqSHbgCjzVAqaa7N...|\n","|lj-E32x9_FA7GmUrB...| 0|2014-06-27 22:44:01| 0|XW_LfMv0fV21l9c6x...| 4.0|Love going here f...| 0|9OAtfnWag-ajVxRbU...|\n","|RZtGWDLCAtuipwaZ-...| 0|2009-10-14 19:57:14| 0|8JFGBuHMoiNDyfcxu...| 4.0|Good food--loved ...| 0|smOvOajNG0lS4Pq7d...|\n","|otQS34_MymijPTdNB...| 0|2011-10-27 17:12:05| 2|UBp0zWyH60Hmw6Fsa...| 4.0|The bun makes the...| 0|4Uh27DgGzsp6PqrH9...|\n","|BVndHaLihEYbr76Z0...| 0|2014-10-11 16:22:06| 0|OAhBYw8IQ6wlfw1ow...| 5.0|Great place for b...| 0|1C2lxzUo1Hyye4RFI...|\n","|YtSqYv1Q_pOltsVPS...| 0|2013-06-24 11:21:25| 0|oyaMhzBSwfGgemSGu...| 5.0|Tremendous servic...| 0|Dd1jQj7S-BFGqRbAp...|\n","|rBdG_23USc7DletfZ...| 0|2014-08-10 19:41:43| 0|LnGZB0fjfgeVDVz5I...| 4.0|The hubby and I h...| 1|j2wlzrntrbKwyOcOi...|\n","|CLEWowfkj-wKYJlQD...| 1|2016-03-07 00:02:18| 0|u2vzZaOqJ2feRshaa...| 5.0|I go to blow bar ...| 2|NDZvyYHTUWWu-kqgQ...|\n","|eFvzHawVJofxSnD7T...| 0|2014-11-12 15:30:27| 0|Xs8Z8lmKkosqW5mw_...| 5.0|My absolute favor...| 0|IQsF3Rc6IgCzjVV9D...|\n","+--------------------+----+-------------------+-----+--------------------+-----+--------------------+------+--------------------+\n","only showing top 20 rows\n","\n"]}]},{"cell_type":"code","source":["# Import necessary libraries\n","import pyarrow\n","from pyspark.sql import SparkSession\n","\n","# Create a SparkSession\n","# spark = SparkSession.builder.appName(\"YourAppName\").getOrCreate()\n","\n","# Enable Arrow optimization\n","# spark.conf.set(\"spark.sql.execution.arrow.pyspark.enabled\", \"true\")\n","\n","# file_paths = ['/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json', '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json']\n","\n","df = spark.read.json('/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json')\n","\n","\n","pandas_df = df.toPandas()\n","\n","pandas_df.to_excel('/content/drive/MyDrive/CSV/yelp_academic_dataset_user.xlsx', index=False)\n","\n","# main code\n","\n","# file_paths = ['/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json',\n","# '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json']\n","\n","# reading an coverting each json file to csv\n","\n","# for filepath in file_paths:\n","\n","# csv_root_path = '/content/drive/MyDrive/CSV/'\n","\n","# rt_df = read_json(filepath)\n","\n","# csvfilename = filepath.split('/')[-1].split('.')[0]\n","\n","# print(filepath, ' -----> ', csvfilename, '.xlsx')\n","\n","# write_As_xl1(rt_df, csv_root_path, csvfilename)\n","\n","# Stop the SparkSession when done to release resources\n","spark.stop()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":373},"id":"shIBbE00S4K_","executionInfo":{"status":"error","timestamp":1726195989619,"user_tz":240,"elapsed":181,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"}},"outputId":"9956378d-43b5-44af-d496-2dc099d00040"},"execution_count":null,"outputs":[{"output_type":"error","ename":"ConnectionRefusedError","evalue":"[Errno 111] Connection refused","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;31m# file_paths = ['/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json', '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json']\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pyspark/sql/session.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1704\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1705\u001b[0m \"\"\"\n\u001b[0;32m-> 1706\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mDataFrameReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1708\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pyspark/sql/readwriter.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, spark)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"SparkSession\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jsparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_spark\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1319\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEND_COMMAND_PART\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1320\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1321\u001b[0;31m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1322\u001b[0m return_value = get_return_value(\n\u001b[1;32m 1323\u001b[0m answer, self.gateway_client, self.target_id, self.name)\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command, retry, binary)\u001b[0m\n\u001b[1;32m 1034\u001b[0m \u001b[0;32mif\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mbinary\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0;32mis\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1035\u001b[0m \"\"\"\n\u001b[0;32m-> 1036\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1037\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_get_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconnection\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_new_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_create_new_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_parameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython_parameters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m self.gateway_property, self)\n\u001b[0;32m--> 291\u001b[0;31m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect_to_java_server\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 292\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_thread_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36mconnect_to_java_server\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 436\u001b[0m self.socket = self.ssl_context.wrap_socket(\n\u001b[1;32m 437\u001b[0m self.socket, server_hostname=self.java_address)\n\u001b[0;32m--> 438\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_address\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_port\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 439\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmakefile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_connected\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused"]}]},{"cell_type":"code","execution_count":null,"metadata":{"id":"7A7-ucndxFu7","colab":{"base_uri":"https://localhost:8080/","height":356},"executionInfo":{"status":"error","timestamp":1726195892914,"user_tz":240,"elapsed":186,"user":{"displayName":"BZRecProject","userId":"17728562531777262703"}},"outputId":"78806a49-057e-46c8-ad9b-87f66d3664d8"},"outputs":[{"output_type":"error","ename":"ConnectionRefusedError","evalue":"[Errno 111] Connection refused","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpandas_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoPandas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpandas_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_excel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/content/drive/MyDrive/CSV/yelp_academic_dataset_user.xlsx'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# main code\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pyspark/sql/pandas/conversion.py\u001b[0m in \u001b[0;36mtoPandas\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 90\u001b[0;31m \u001b[0mjconf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jconf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mjconf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marrowPySparkEnabled\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pyspark/sql/session.py\u001b[0m in \u001b[0;36m_jconf\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 622\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_jconf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"JavaObject\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[0;34m\"\"\"Accessor for the JVM SQL-specific configurations\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 624\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jsparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msessionState\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 625\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 626\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mnewSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"SparkSession\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1319\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEND_COMMAND_PART\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1320\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1321\u001b[0;31m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1322\u001b[0m return_value = get_return_value(\n\u001b[1;32m 1323\u001b[0m answer, self.gateway_client, self.target_id, self.name)\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command, retry, binary)\u001b[0m\n\u001b[1;32m 1034\u001b[0m \u001b[0;32mif\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mbinary\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0;32mis\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1035\u001b[0m \"\"\"\n\u001b[0;32m-> 1036\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1037\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_get_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconnection\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_new_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36m_create_new_connection\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_parameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython_parameters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m self.gateway_property, self)\n\u001b[0;32m--> 291\u001b[0;31m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect_to_java_server\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 292\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_thread_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/py4j/clientserver.py\u001b[0m in \u001b[0;36mconnect_to_java_server\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 436\u001b[0m self.socket = self.ssl_context.wrap_socket(\n\u001b[1;32m 437\u001b[0m self.socket, server_hostname=self.java_address)\n\u001b[0;32m--> 438\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_address\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_port\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 439\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmakefile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_connected\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused"]}],"source":["pandas_df = df.toPandas()\n","\n","pandas_df.to_excel('/content/drive/MyDrive/CSV/yelp_academic_dataset_user.xlsx', index=False)\n","\n","# main code\n","\n","# file_paths = ['/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_user.json',\n","# '/content/drive/MyDrive/yelp_dataset___/yelp_academic_dataset_review.json']\n","\n","# reading an coverting each json file to csv\n","\n","# for filepath in file_paths:\n","\n","# csv_root_path = '/content/drive/MyDrive/CSV/'\n","\n","# rt_df = read_json(filepath)\n","\n","# csvfilename = filepath.split('/')[-1].split('.')[0]\n","\n","# print(filepath, ' -----> ', csvfilename, '.xlsx')\n","\n","# write_As_xl1(rt_df, csv_root_path, csvfilename)\n","\n","# # Stop the SparkSession when done to release resources\n","# spark.stop()"]}],"metadata":{"colab":{"provenance":[],"gpuType":"T4","mount_file_id":"1LtxiSfF9eW_LSRlSL0F7jdFIGlIOupvt","authorship_tag":"ABX9TyNmr6c5rIOo2N6dgTWc1nqU"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"accelerator":"GPU"},"nbformat":4,"nbformat_minor":0} \ No newline at end of file