diff --git a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb
index 03e1fc603..f058f9043 100644
--- a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb
+++ b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb
@@ -19,12 +19,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 135,
"metadata": {
"collapsed": false
},
"outputs": [],
- "source": []
+ "source": [
+ "import pandas as pd"
+ ]
},
{
"cell_type": "markdown",
@@ -42,12 +44,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 136,
"metadata": {
"collapsed": false
},
"outputs": [],
- "source": []
+ "source": [
+ "chipo = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv', sep='\\t')"
+ ]
},
{
"cell_type": "markdown",
@@ -58,13 +62,159 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 137,
"metadata": {
"collapsed": false,
"scrolled": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " item_name | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Fresh Tomato Salsa | \n",
+ " NaN | \n",
+ " $2.39 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Izze | \n",
+ " [Clementine] | \n",
+ " $3.39 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Nantucket Nectar | \n",
+ " [Apple] | \n",
+ " $3.39 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Tomatillo-Green Chili Salsa | \n",
+ " NaN | \n",
+ " $2.39 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Chicken Bowl | \n",
+ " [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | \n",
+ " $16.98 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " Chicken Bowl | \n",
+ " [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... | \n",
+ " $10.98 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " Side of Chips | \n",
+ " NaN | \n",
+ " $1.69 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Tomatillo Red Chili Salsa, [Fajita Vegetables... | \n",
+ " $11.75 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " Steak Soft Tacos | \n",
+ " [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... | \n",
+ " $9.25 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... | \n",
+ " $9.25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity item_name \\\n",
+ "0 1 1 Chips and Fresh Tomato Salsa \n",
+ "1 1 1 Izze \n",
+ "2 1 1 Nantucket Nectar \n",
+ "3 1 1 Chips and Tomatillo-Green Chili Salsa \n",
+ "4 2 2 Chicken Bowl \n",
+ "5 3 1 Chicken Bowl \n",
+ "6 3 1 Side of Chips \n",
+ "7 4 1 Steak Burrito \n",
+ "8 4 1 Steak Soft Tacos \n",
+ "9 5 1 Steak Burrito \n",
+ "\n",
+ " choice_description item_price \n",
+ "0 NaN $2.39 \n",
+ "1 [Clementine] $3.39 \n",
+ "2 [Apple] $3.39 \n",
+ "3 NaN $2.39 \n",
+ "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 \n",
+ "5 [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... $10.98 \n",
+ "6 NaN $1.69 \n",
+ "7 [Tomatillo Red Chili Salsa, [Fajita Vegetables... $11.75 \n",
+ "8 [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... $9.25 \n",
+ "9 [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... $9.25 "
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.head(10)"
+ ]
},
{
"cell_type": "markdown",
@@ -75,26 +225,89 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 153,
"metadata": {
"collapsed": false
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4622"
+ ]
+ },
+ "execution_count": 153,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Solution 1\n",
- "\n"
+ "len(chipo)\n",
+ "chipo.shape[0]\n"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 154,
"metadata": {
"collapsed": false
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 154,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Solution 2\n",
- "\n"
+ "chipo.count()\n",
+ "chipo.info\n"
]
},
{
@@ -106,12 +319,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 155,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6"
+ ]
+ },
+ "execution_count": 155,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(chipo.columns)\n",
+ "chipo.shape[1]"
+ ]
},
{
"cell_type": "markdown",
@@ -122,12 +349,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index(['order_id', 'quantity', 'item_name', 'choice_description',\n",
+ " 'item_price'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(chipo.columns)"
+ ]
},
{
"cell_type": "markdown",
@@ -138,12 +377,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=4622, step=1)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.index"
+ ]
},
{
"cell_type": "markdown",
@@ -154,12 +406,107 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 123,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Most-ordered item: Chicken Bowl \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "chipo_order = chipo.groupby(['item_name']).sum().sort_values(by='quantity',ascending=False)\n",
+ "chipo_most_order = chipo_order.index[0]\n",
+ "\n",
+ "print(\"Most-ordered item:\", chipo_most_order,'\\n')\n",
+ "# print(chipo_order)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 156,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ " chipo_revenue | \n",
+ "
\n",
+ " \n",
+ " item_name | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Chicken Bowl | \n",
+ " 713926 | \n",
+ " 761 | \n",
+ " [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | \n",
+ " 7342.73 | \n",
+ " 8044.63 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity \\\n",
+ "item_name \n",
+ "Chicken Bowl 713926 761 \n",
+ "\n",
+ " choice_description item_price \\\n",
+ "item_name \n",
+ "Chicken Bowl [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 7342.73 \n",
+ "\n",
+ " chipo_revenue \n",
+ "item_name \n",
+ "Chicken Bowl 8044.63 "
+ ]
+ },
+ "execution_count": 156,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c = chipo.groupby('item_name')\n",
+ "c = c.sum()\n",
+ "c = c.sort_values(['quantity'], ascending=False)\n",
+ "c.head(1)"
+ ]
},
{
"cell_type": "markdown",
@@ -170,12 +517,110 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 126,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "761"
+ ]
+ },
+ "execution_count": 126,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo_most_order_time = chipo_order.quantity.iloc[0]\n",
+ "# Series取数:\n",
+ "# 1. chipo_most_order_time = chipo_order['Chicken Bowl']\n",
+ "# 2. chipo_most_order_time = chipo_order.iloc[0]\n",
+ "# 3. chipo_most_order_time = chipo_order.loc['Chicken Bowl']\n",
+ "chipo_most_order_time\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 157,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ " chipo_revenue | \n",
+ "
\n",
+ " \n",
+ " item_name | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Chicken Bowl | \n",
+ " 713926 | \n",
+ " 761 | \n",
+ " [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | \n",
+ " 7342.73 | \n",
+ " 8044.63 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity \\\n",
+ "item_name \n",
+ "Chicken Bowl 713926 761 \n",
+ "\n",
+ " choice_description item_price \\\n",
+ "item_name \n",
+ "Chicken Bowl [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 7342.73 \n",
+ "\n",
+ " chipo_revenue \n",
+ "item_name \n",
+ "Chicken Bowl 8044.63 "
+ ]
+ },
+ "execution_count": 157,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c = chipo.groupby('item_name')\n",
+ "c = c.sum()\n",
+ "c = c.sort_values(['quantity'], ascending=False)\n",
+ "c.head(1)"
+ ]
},
{
"cell_type": "markdown",
@@ -186,12 +631,106 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 128,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Most-ordered item in choice description: [Diet Coke] \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "chipo_order_desc = chipo.groupby(['choice_description']).sum().sort_values(by='quantity', ascending=False)\n",
+ "chipo_most_order_desc = chipo_order_desc.index[0]\n",
+ "print('Most-ordered item in choice description: ', chipo_most_order_desc, '\\n')\n",
+ "# print(chipo_order_desc)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 158,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " item_name | \n",
+ " item_price | \n",
+ " chipo_revenue | \n",
+ "
\n",
+ " \n",
+ " choice_description | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " [Diet Coke] | \n",
+ " 123455 | \n",
+ " 159 | \n",
+ " Canned SodaCanned SodaCanned Soda6 Pack Soft D... | \n",
+ " 326.71 | \n",
+ " 408.41 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity \\\n",
+ "choice_description \n",
+ "[Diet Coke] 123455 159 \n",
+ "\n",
+ " item_name \\\n",
+ "choice_description \n",
+ "[Diet Coke] Canned SodaCanned SodaCanned Soda6 Pack Soft D... \n",
+ "\n",
+ " item_price chipo_revenue \n",
+ "choice_description \n",
+ "[Diet Coke] 326.71 408.41 "
+ ]
+ },
+ "execution_count": 158,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c = chipo.groupby('choice_description').sum()\n",
+ "c = c.sort_values(['quantity'], ascending=False)\n",
+ "c.head(1)\n",
+ "# Diet Coke 159"
+ ]
},
{
"cell_type": "markdown",
@@ -202,12 +741,46 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 133,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4972"
+ ]
+ },
+ "execution_count": 133,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.quantity.sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 159,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4972"
+ ]
+ },
+ "execution_count": 159,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "total_items_orders = chipo.quantity.sum()\n",
+ "total_items_orders"
+ ]
},
{
"cell_type": "markdown",
@@ -225,12 +798,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 56,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "object\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(chipo.item_price.dtype)"
+ ]
},
{
"cell_type": "markdown",
@@ -241,12 +824,222 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 2.39\n",
+ "1 3.39\n",
+ "2 3.39\n",
+ "3 2.39\n",
+ "4 16.98\n",
+ " ... \n",
+ "4617 11.75\n",
+ "4618 11.75\n",
+ "4619 11.25\n",
+ "4620 8.75\n",
+ "4621 8.75\n",
+ "Name: item_price, Length: 4622, dtype: float64"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo_price_edit = chipo.item_price.str[1:]\n",
+ "chipo_price_edit.astype('float')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
"metadata": {
"collapsed": true
},
"outputs": [],
- "source": []
+ "source": [
+ "chipo['item_price'] = chipo['item_price'].apply(lambda p: float(p.replace('$','')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dollarizer = lambda x: float(x[1:-1])\n",
+ "chipo.item_price = chipo.item_price.apply(dollarizer)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " item_name | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Fresh Tomato Salsa | \n",
+ " NaN | \n",
+ " 2.39 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Izze | \n",
+ " [Clementine] | \n",
+ " 3.39 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Nantucket Nectar | \n",
+ " [Apple] | \n",
+ " 3.39 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Tomatillo-Green Chili Salsa | \n",
+ " NaN | \n",
+ " 2.39 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Chicken Bowl | \n",
+ " [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | \n",
+ " 16.98 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4617 | \n",
+ " 1833 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... | \n",
+ " 11.75 | \n",
+ "
\n",
+ " \n",
+ " 4618 | \n",
+ " 1833 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... | \n",
+ " 11.75 | \n",
+ "
\n",
+ " \n",
+ " 4619 | \n",
+ " 1834 | \n",
+ " 1 | \n",
+ " Chicken Salad Bowl | \n",
+ " [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... | \n",
+ " 11.25 | \n",
+ "
\n",
+ " \n",
+ " 4620 | \n",
+ " 1834 | \n",
+ " 1 | \n",
+ " Chicken Salad Bowl | \n",
+ " [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... | \n",
+ " 8.75 | \n",
+ "
\n",
+ " \n",
+ " 4621 | \n",
+ " 1834 | \n",
+ " 1 | \n",
+ " Chicken Salad Bowl | \n",
+ " [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... | \n",
+ " 8.75 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4622 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity item_name \\\n",
+ "0 1 1 Chips and Fresh Tomato Salsa \n",
+ "1 1 1 Izze \n",
+ "2 1 1 Nantucket Nectar \n",
+ "3 1 1 Chips and Tomatillo-Green Chili Salsa \n",
+ "4 2 2 Chicken Bowl \n",
+ "... ... ... ... \n",
+ "4617 1833 1 Steak Burrito \n",
+ "4618 1833 1 Steak Burrito \n",
+ "4619 1834 1 Chicken Salad Bowl \n",
+ "4620 1834 1 Chicken Salad Bowl \n",
+ "4621 1834 1 Chicken Salad Bowl \n",
+ "\n",
+ " choice_description item_price \n",
+ "0 NaN 2.39 \n",
+ "1 [Clementine] 3.39 \n",
+ "2 [Apple] 3.39 \n",
+ "3 NaN 2.39 \n",
+ "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 16.98 \n",
+ "... ... ... \n",
+ "4617 [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... 11.75 \n",
+ "4618 [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... 11.75 \n",
+ "4619 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... 11.25 \n",
+ "4620 [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... 8.75 \n",
+ "4621 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... 8.75 \n",
+ "\n",
+ "[4622 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo"
+ ]
},
{
"cell_type": "markdown",
@@ -257,12 +1050,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 81,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(chipo.item_price.dtype)"
+ ]
},
{
"cell_type": "markdown",
@@ -273,12 +1076,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 143,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "39237.02"
+ ]
+ },
+ "execution_count": 143,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo_revenue = (chipo.quantity * chipo.item_price).sum()\n",
+ "chipo_revenue"
+ ]
},
{
"cell_type": "markdown",
@@ -289,12 +1106,47 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 93,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1834"
+ ]
+ },
+ "execution_count": 93,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo_order_count = len(chipo.groupby('order_id'))\n",
+ "chipo_order_count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1834"
+ ]
+ },
+ "execution_count": 161,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "orders = chipo.order_id.value_counts().count()\n",
+ "orders"
+ ]
},
{
"cell_type": "markdown",
@@ -305,26 +1157,118 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 145,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21.39423118865867"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Solution 1\n",
+ "chipo_mean = chipo_revenue/chipo_order_count\n",
+ "chipo_mean"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 152,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21.39423118865867"
+ ]
+ },
+ "execution_count": 152,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Solution 2\n",
+ "chipo['chipo_revenue'] = chipo.quantity * chipo.item_price\n",
+ "chipo_mean = chipo.groupby(['order_id']).chipo_revenue.sum().mean()\n",
+ "chipo_mean"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
"metadata": {
"collapsed": false
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "order_id\n",
+ "1 2.890000\n",
+ "2 33.960000\n",
+ "3 6.335000\n",
+ "4 10.500000\n",
+ "5 6.850000\n",
+ " ... \n",
+ "1830 11.500000\n",
+ "1831 4.300000\n",
+ "1832 6.600000\n",
+ "1833 11.750000\n",
+ "1834 9.583333\n",
+ "Name: chipo_revenue, Length: 1834, dtype: float64\n"
+ ]
+ }
+ ],
"source": [
- "# Solution 1\n",
- "\n"
+ "# Solution 1 错误的理解为了各个订单的平均收入,revenue distribution across individual orders\n",
+ "chipo_revenue_avg = chipo.groupby(['order_id']).chipo_revenue.mean()\n",
+ "print(chipo_revenue_avg)"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 116,
"metadata": {
"collapsed": false
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "order_id\n",
+ "1 2.890000\n",
+ "2 16.980000\n",
+ "3 6.335000\n",
+ "4 10.500000\n",
+ "5 6.850000\n",
+ " ... \n",
+ "1830 11.500000\n",
+ "1831 4.300000\n",
+ "1832 6.600000\n",
+ "1833 11.750000\n",
+ "1834 9.583333\n",
+ "Length: 1834, dtype: float64"
+ ]
+ },
+ "execution_count": 116,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Solution 2\n",
- "\n"
+ "# Solution 2 错误的理解\n",
+ "left = chipo.groupby(['order_id']).quantity.sum()\n",
+ "right = chipo.groupby(['order_id']).chipo_revenue.sum()\n",
+ "avg_avenue_per_order = right/left\n",
+ "avg_avenue_per_order"
]
},
{
@@ -336,32 +1280,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 119,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "50\n"
+ ]
+ }
+ ],
+ "source": [
+ "item = chipo.groupby(['item_name']).item_name.count()\n",
+ "print(len(item))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 162,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "50"
+ ]
+ },
+ "execution_count": 162,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.item_name.value_counts().count()"
+ ]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
- "display_name": "Python [default]",
+ "display_name": "Python 3",
"language": "python",
- "name": "python2"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
- "version": 2
+ "version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
}
},
"nbformat": 4,
diff --git a/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb
index f32d9ce9f..bf383121e 100644
--- a/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb
+++ b/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb
@@ -19,12 +19,15 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
- "source": []
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
},
{
"cell_type": "markdown",
@@ -42,12 +45,156 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " user_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 85711 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 53 | \n",
+ " F | \n",
+ " other | \n",
+ " 94043 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 23 | \n",
+ " M | \n",
+ " writer | \n",
+ " 32067 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 43537 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 33 | \n",
+ " F | \n",
+ " other | \n",
+ " 15213 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 939 | \n",
+ " 26 | \n",
+ " F | \n",
+ " student | \n",
+ " 33319 | \n",
+ "
\n",
+ " \n",
+ " 940 | \n",
+ " 32 | \n",
+ " M | \n",
+ " administrator | \n",
+ " 02215 | \n",
+ "
\n",
+ " \n",
+ " 941 | \n",
+ " 20 | \n",
+ " M | \n",
+ " student | \n",
+ " 97229 | \n",
+ "
\n",
+ " \n",
+ " 942 | \n",
+ " 48 | \n",
+ " F | \n",
+ " librarian | \n",
+ " 78209 | \n",
+ "
\n",
+ " \n",
+ " 943 | \n",
+ " 22 | \n",
+ " M | \n",
+ " student | \n",
+ " 77841 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
943 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age gender occupation zip_code\n",
+ "user_id \n",
+ "1 24 M technician 85711\n",
+ "2 53 F other 94043\n",
+ "3 23 M writer 32067\n",
+ "4 24 M technician 43537\n",
+ "5 33 F other 15213\n",
+ "... ... ... ... ...\n",
+ "939 26 F student 33319\n",
+ "940 32 M administrator 02215\n",
+ "941 20 M student 97229\n",
+ "942 48 F librarian 78209\n",
+ "943 22 M student 77841\n",
+ "\n",
+ "[943 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user'\n",
+ "users = pd.read_csv(url, sep='|', index_col='user_id')\n",
+ "users"
+ ]
},
{
"cell_type": "markdown",
@@ -58,13 +205,264 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {
"collapsed": false,
"scrolled": true
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " user_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 85711 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 53 | \n",
+ " F | \n",
+ " other | \n",
+ " 94043 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 23 | \n",
+ " M | \n",
+ " writer | \n",
+ " 32067 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 43537 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 33 | \n",
+ " F | \n",
+ " other | \n",
+ " 15213 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 42 | \n",
+ " M | \n",
+ " executive | \n",
+ " 98101 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 57 | \n",
+ " M | \n",
+ " administrator | \n",
+ " 91344 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 36 | \n",
+ " M | \n",
+ " administrator | \n",
+ " 05201 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 29 | \n",
+ " M | \n",
+ " student | \n",
+ " 01002 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 53 | \n",
+ " M | \n",
+ " lawyer | \n",
+ " 90703 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 39 | \n",
+ " F | \n",
+ " other | \n",
+ " 30329 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 28 | \n",
+ " F | \n",
+ " other | \n",
+ " 06405 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 47 | \n",
+ " M | \n",
+ " educator | \n",
+ " 29206 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 45 | \n",
+ " M | \n",
+ " scientist | \n",
+ " 55106 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 49 | \n",
+ " F | \n",
+ " educator | \n",
+ " 97301 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 21 | \n",
+ " M | \n",
+ " entertainment | \n",
+ " 10309 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 30 | \n",
+ " M | \n",
+ " programmer | \n",
+ " 06355 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 35 | \n",
+ " F | \n",
+ " other | \n",
+ " 37212 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 40 | \n",
+ " M | \n",
+ " librarian | \n",
+ " 02138 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 42 | \n",
+ " F | \n",
+ " homemaker | \n",
+ " 95660 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 26 | \n",
+ " M | \n",
+ " writer | \n",
+ " 30068 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 25 | \n",
+ " M | \n",
+ " writer | \n",
+ " 40206 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 30 | \n",
+ " F | \n",
+ " artist | \n",
+ " 48197 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 21 | \n",
+ " F | \n",
+ " artist | \n",
+ " 94533 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 39 | \n",
+ " M | \n",
+ " engineer | \n",
+ " 55107 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age gender occupation zip_code\n",
+ "user_id \n",
+ "1 24 M technician 85711\n",
+ "2 53 F other 94043\n",
+ "3 23 M writer 32067\n",
+ "4 24 M technician 43537\n",
+ "5 33 F other 15213\n",
+ "6 42 M executive 98101\n",
+ "7 57 M administrator 91344\n",
+ "8 36 M administrator 05201\n",
+ "9 29 M student 01002\n",
+ "10 53 M lawyer 90703\n",
+ "11 39 F other 30329\n",
+ "12 28 F other 06405\n",
+ "13 47 M educator 29206\n",
+ "14 45 M scientist 55106\n",
+ "15 49 F educator 97301\n",
+ "16 21 M entertainment 10309\n",
+ "17 30 M programmer 06355\n",
+ "18 35 F other 37212\n",
+ "19 40 M librarian 02138\n",
+ "20 42 F homemaker 95660\n",
+ "21 26 M writer 30068\n",
+ "22 25 M writer 40206\n",
+ "23 30 F artist 48197\n",
+ "24 21 F artist 94533\n",
+ "25 39 M engineer 55107"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.head(25)"
+ ]
},
{
"cell_type": "markdown",
@@ -75,13 +473,146 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {
"collapsed": false,
"scrolled": true
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " user_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 934 | \n",
+ " 61 | \n",
+ " M | \n",
+ " engineer | \n",
+ " 22902 | \n",
+ "
\n",
+ " \n",
+ " 935 | \n",
+ " 42 | \n",
+ " M | \n",
+ " doctor | \n",
+ " 66221 | \n",
+ "
\n",
+ " \n",
+ " 936 | \n",
+ " 24 | \n",
+ " M | \n",
+ " other | \n",
+ " 32789 | \n",
+ "
\n",
+ " \n",
+ " 937 | \n",
+ " 48 | \n",
+ " M | \n",
+ " educator | \n",
+ " 98072 | \n",
+ "
\n",
+ " \n",
+ " 938 | \n",
+ " 38 | \n",
+ " F | \n",
+ " technician | \n",
+ " 55038 | \n",
+ "
\n",
+ " \n",
+ " 939 | \n",
+ " 26 | \n",
+ " F | \n",
+ " student | \n",
+ " 33319 | \n",
+ "
\n",
+ " \n",
+ " 940 | \n",
+ " 32 | \n",
+ " M | \n",
+ " administrator | \n",
+ " 02215 | \n",
+ "
\n",
+ " \n",
+ " 941 | \n",
+ " 20 | \n",
+ " M | \n",
+ " student | \n",
+ " 97229 | \n",
+ "
\n",
+ " \n",
+ " 942 | \n",
+ " 48 | \n",
+ " F | \n",
+ " librarian | \n",
+ " 78209 | \n",
+ "
\n",
+ " \n",
+ " 943 | \n",
+ " 22 | \n",
+ " M | \n",
+ " student | \n",
+ " 77841 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age gender occupation zip_code\n",
+ "user_id \n",
+ "934 61 M engineer 22902\n",
+ "935 42 M doctor 66221\n",
+ "936 24 M other 32789\n",
+ "937 48 M educator 98072\n",
+ "938 38 F technician 55038\n",
+ "939 26 F student 33319\n",
+ "940 32 M administrator 02215\n",
+ "941 20 M student 97229\n",
+ "942 48 F librarian 78209\n",
+ "943 22 M student 77841"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users[-10:]\n",
+ "# users.iloc[-10:]\n",
+ "# users.tail(10)"
+ ]
},
{
"cell_type": "markdown",
@@ -92,12 +623,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "943"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.shape[0]\n",
+ "# len(users)"
+ ]
},
{
"cell_type": "markdown",
@@ -108,12 +653,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.shape[1]"
+ ]
},
{
"cell_type": "markdown",
@@ -124,12 +682,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['age', 'gender', 'occupation', 'zip_code'], dtype='object')"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.columns"
+ ]
},
{
"cell_type": "markdown",
@@ -140,12 +711,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,\n",
+ " ...\n",
+ " 934, 935, 936, 937, 938, 939, 940, 941, 942, 943],\n",
+ " dtype='int64', name='user_id', length=943)"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.index"
+ ]
},
{
"cell_type": "markdown",
@@ -156,12 +743,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "age int64\n",
+ "gender object\n",
+ "occupation object\n",
+ "zip_code object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.dtypes"
+ ]
},
{
"cell_type": "markdown",
@@ -172,12 +776,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "user_id\n",
+ "1 technician\n",
+ "2 other\n",
+ "3 writer\n",
+ "4 technician\n",
+ "5 other\n",
+ " ... \n",
+ "939 student\n",
+ "940 administrator\n",
+ "941 student\n",
+ "942 librarian\n",
+ "943 student\n",
+ "Name: occupation, Length: 943, dtype: object"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users['occupation']"
+ ]
},
{
"cell_type": "markdown",
@@ -188,12 +817,39 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 85,
"metadata": {
"collapsed": false
},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21"
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# occupation_list = users.groupby('occupation').occupation.count()\n",
+ "# len(occupation_list)\n",
+ "\n",
+ "users.occupation.value_counts().count()\n",
+ "\n",
+ "# users.occupation.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "# occupation_list"
+ ]
},
{
"cell_type": "markdown",
@@ -204,12 +860,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 57,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'student'"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "occupation_list.sort_values(ascending=False).index[0]"
+ ]
},
{
"cell_type": "markdown",
@@ -220,12 +889,92 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 61,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 943.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 34.051962 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 12.192740 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 7.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 25.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 31.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 43.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 73.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age\n",
+ "count 943.000000\n",
+ "mean 34.051962\n",
+ "std 12.192740\n",
+ "min 7.000000\n",
+ "25% 25.000000\n",
+ "50% 31.000000\n",
+ "75% 43.000000\n",
+ "max 73.000000"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.describe()"
+ ]
},
{
"cell_type": "markdown",
@@ -236,12 +985,143 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 78,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 943.000000 | \n",
+ " 943 | \n",
+ " 943 | \n",
+ " 943 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " 21 | \n",
+ " 795 | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " NaN | \n",
+ " M | \n",
+ " student | \n",
+ " 55414 | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " NaN | \n",
+ " 670 | \n",
+ " 196 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 34.051962 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 12.192740 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 7.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 25.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 31.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 43.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 73.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age gender occupation zip_code\n",
+ "count 943.000000 943 943 943\n",
+ "unique NaN 2 21 795\n",
+ "top NaN M student 55414\n",
+ "freq NaN 670 196 9\n",
+ "mean 34.051962 NaN NaN NaN\n",
+ "std 12.192740 NaN NaN NaN\n",
+ "min 7.000000 NaN NaN NaN\n",
+ "25% 25.000000 NaN NaN NaN\n",
+ "50% 31.000000 NaN NaN NaN\n",
+ "75% 43.000000 NaN NaN NaN\n",
+ "max 73.000000 NaN NaN NaN"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.describe(include='all')"
+ ]
},
{
"cell_type": "markdown",
@@ -252,12 +1132,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 65,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 943\n",
+ "unique 21\n",
+ "top student\n",
+ "freq 196\n",
+ "Name: occupation, dtype: object"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.occupation.describe()"
+ ]
},
{
"cell_type": "markdown",
@@ -268,12 +1165,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 71,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "34.05196182396607"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users['age'].mean()"
+ ]
},
{
"cell_type": "markdown",
@@ -284,32 +1194,55 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 87,
"metadata": {
"collapsed": false
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "age\n",
+ "7 1\n",
+ "66 1\n",
+ "11 1\n",
+ "10 1\n",
+ "73 1\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# users_age_list = users.groupby(['age']).age.count().sort_values()\n",
+ "# users_age_least = users_age_list.iloc[0]\n",
+ "# users_age_list[users_age_list==users_age_least]\n",
+ "\n",
+ "users.age.value_counts().tail()"
+ ]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
- "display_name": "Python [default]",
+ "display_name": "Python 3",
"language": "python",
- "name": "python2"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
- "version": 2
+ "version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
}
},
"nbformat": 4,
diff --git a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb
index ea507fa1d..0f219fab3 100644
--- a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb
+++ b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb
@@ -30,12 +30,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {
"collapsed": true
},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "ename": "FileNotFoundError",
+ "evalue": "[Errno 2] No such file or directory: '/Users/41uo/Desktop/products.tsv'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[14], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/Users/41uo/Desktop/products.tsv\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 4\u001b[0m food \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/Users/41uo/Desktop/products.tsv'"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "url = r'/Users/41uo/Desktop/products.tsv'\n",
+ "food = pd.read_csv(url)"
+ ]
},
{
"cell_type": "markdown",
@@ -169,21 +191,21 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
- "display_name": "Python [default]",
+ "display_name": "Python 3",
"language": "python",
- "name": "python2"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
- "version": 2
+ "version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
}
},
"nbformat": 4,