diff --git a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb index 03e1fc603..f058f9043 100644 --- a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb @@ -19,12 +19,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 135, "metadata": { "collapsed": false }, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -42,12 +44,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [], - "source": [] + "source": [ + "chipo = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv', sep='\\t')" + ] }, { "cell_type": "markdown", @@ -58,13 +62,159 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 137, "metadata": { "collapsed": false, "scrolled": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
order_idquantityitem_namechoice_descriptionitem_price
011Chips and Fresh Tomato SalsaNaN$2.39
111Izze[Clementine]$3.39
211Nantucket Nectar[Apple]$3.39
311Chips and Tomatillo-Green Chili SalsaNaN$2.39
422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans...$16.98
531Chicken Bowl[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...$10.98
631Side of ChipsNaN$1.69
741Steak Burrito[Tomatillo Red Chili Salsa, [Fajita Vegetables...$11.75
841Steak Soft Tacos[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...$9.25
951Steak Burrito[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...$9.25
\n", + "
" + ], + "text/plain": [ + " order_id quantity item_name \\\n", + "0 1 1 Chips and Fresh Tomato Salsa \n", + "1 1 1 Izze \n", + "2 1 1 Nantucket Nectar \n", + "3 1 1 Chips and Tomatillo-Green Chili Salsa \n", + "4 2 2 Chicken Bowl \n", + "5 3 1 Chicken Bowl \n", + "6 3 1 Side of Chips \n", + "7 4 1 Steak Burrito \n", + "8 4 1 Steak Soft Tacos \n", + "9 5 1 Steak Burrito \n", + "\n", + " choice_description item_price \n", + "0 NaN $2.39 \n", + "1 [Clementine] $3.39 \n", + "2 [Apple] $3.39 \n", + "3 NaN $2.39 \n", + "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 \n", + "5 [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... $10.98 \n", + "6 NaN $1.69 \n", + "7 [Tomatillo Red Chili Salsa, [Fajita Vegetables... $11.75 \n", + "8 [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... $9.25 \n", + "9 [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... $9.25 " + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo.head(10)" + ] }, { "cell_type": "markdown", @@ -75,26 +225,89 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 153, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "4622" + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Solution 1\n", - "\n" + "len(chipo)\n", + "chipo.shape[0]\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 154, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Solution 2\n", - "\n" + "chipo.count()\n", + "chipo.info\n" ] }, { @@ -106,12 +319,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 155, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(chipo.columns)\n", + "chipo.shape[1]" + ] }, { "cell_type": "markdown", @@ -122,12 +349,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['order_id', 'quantity', 'item_name', 'choice_description',\n", + " 'item_price'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(chipo.columns)" + ] }, { "cell_type": "markdown", @@ -138,12 +377,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=4622, step=1)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo.index" + ] }, { "cell_type": "markdown", @@ -154,12 +406,107 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 123, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Most-ordered item: Chicken Bowl \n", + "\n" + ] + } + ], + "source": [ + "chipo_order = chipo.groupby(['item_name']).sum().sort_values(by='quantity',ascending=False)\n", + "chipo_most_order = chipo_order.index[0]\n", + "\n", + "print(\"Most-ordered item:\", chipo_most_order,'\\n')\n", + "# print(chipo_order)" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
order_idquantitychoice_descriptionitem_pricechipo_revenue
item_name
Chicken Bowl713926761[Tomatillo-Red Chili Salsa (Hot), [Black Beans...7342.738044.63
\n", + "
" + ], + "text/plain": [ + " order_id quantity \\\n", + "item_name \n", + "Chicken Bowl 713926 761 \n", + "\n", + " choice_description item_price \\\n", + "item_name \n", + "Chicken Bowl [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 7342.73 \n", + "\n", + " chipo_revenue \n", + "item_name \n", + "Chicken Bowl 8044.63 " + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = chipo.groupby('item_name')\n", + "c = c.sum()\n", + "c = c.sort_values(['quantity'], ascending=False)\n", + "c.head(1)" + ] }, { "cell_type": "markdown", @@ -170,12 +517,110 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 126, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "761" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo_most_order_time = chipo_order.quantity.iloc[0]\n", + "# Series取数:\n", + "# 1. chipo_most_order_time = chipo_order['Chicken Bowl']\n", + "# 2. chipo_most_order_time = chipo_order.iloc[0]\n", + "# 3. chipo_most_order_time = chipo_order.loc['Chicken Bowl']\n", + "chipo_most_order_time\n" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
order_idquantitychoice_descriptionitem_pricechipo_revenue
item_name
Chicken Bowl713926761[Tomatillo-Red Chili Salsa (Hot), [Black Beans...7342.738044.63
\n", + "
" + ], + "text/plain": [ + " order_id quantity \\\n", + "item_name \n", + "Chicken Bowl 713926 761 \n", + "\n", + " choice_description item_price \\\n", + "item_name \n", + "Chicken Bowl [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 7342.73 \n", + "\n", + " chipo_revenue \n", + "item_name \n", + "Chicken Bowl 8044.63 " + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = chipo.groupby('item_name')\n", + "c = c.sum()\n", + "c = c.sort_values(['quantity'], ascending=False)\n", + "c.head(1)" + ] }, { "cell_type": "markdown", @@ -186,12 +631,106 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 128, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Most-ordered item in choice description: [Diet Coke] \n", + "\n" + ] + } + ], + "source": [ + "chipo_order_desc = chipo.groupby(['choice_description']).sum().sort_values(by='quantity', ascending=False)\n", + "chipo_most_order_desc = chipo_order_desc.index[0]\n", + "print('Most-ordered item in choice description: ', chipo_most_order_desc, '\\n')\n", + "# print(chipo_order_desc)" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
order_idquantityitem_nameitem_pricechipo_revenue
choice_description
[Diet Coke]123455159Canned SodaCanned SodaCanned Soda6 Pack Soft D...326.71408.41
\n", + "
" + ], + "text/plain": [ + " order_id quantity \\\n", + "choice_description \n", + "[Diet Coke] 123455 159 \n", + "\n", + " item_name \\\n", + "choice_description \n", + "[Diet Coke] Canned SodaCanned SodaCanned Soda6 Pack Soft D... \n", + "\n", + " item_price chipo_revenue \n", + "choice_description \n", + "[Diet Coke] 326.71 408.41 " + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = chipo.groupby('choice_description').sum()\n", + "c = c.sort_values(['quantity'], ascending=False)\n", + "c.head(1)\n", + "# Diet Coke 159" + ] }, { "cell_type": "markdown", @@ -202,12 +741,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 133, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "4972" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo.quantity.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4972" + ] + }, + "execution_count": 159, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_items_orders = chipo.quantity.sum()\n", + "total_items_orders" + ] }, { "cell_type": "markdown", @@ -225,12 +798,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "object\n" + ] + } + ], + "source": [ + "print(chipo.item_price.dtype)" + ] }, { "cell_type": "markdown", @@ -241,12 +824,222 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2.39\n", + "1 3.39\n", + "2 3.39\n", + "3 2.39\n", + "4 16.98\n", + " ... \n", + "4617 11.75\n", + "4618 11.75\n", + "4619 11.25\n", + "4620 8.75\n", + "4621 8.75\n", + "Name: item_price, Length: 4622, dtype: float64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo_price_edit = chipo.item_price.str[1:]\n", + "chipo_price_edit.astype('float')" + ] + }, + { + "cell_type": "code", + "execution_count": 140, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "chipo['item_price'] = chipo['item_price'].apply(lambda p: float(p.replace('$','')))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dollarizer = lambda x: float(x[1:-1])\n", + "chipo.item_price = chipo.item_price.apply(dollarizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
order_idquantityitem_namechoice_descriptionitem_price
011Chips and Fresh Tomato SalsaNaN2.39
111Izze[Clementine]3.39
211Nantucket Nectar[Apple]3.39
311Chips and Tomatillo-Green Chili SalsaNaN2.39
422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans...16.98
..................
461718331Steak Burrito[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...11.75
461818331Steak Burrito[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...11.75
461918341Chicken Salad Bowl[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...11.25
462018341Chicken Salad Bowl[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...8.75
462118341Chicken Salad Bowl[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...8.75
\n", + "

4622 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " order_id quantity item_name \\\n", + "0 1 1 Chips and Fresh Tomato Salsa \n", + "1 1 1 Izze \n", + "2 1 1 Nantucket Nectar \n", + "3 1 1 Chips and Tomatillo-Green Chili Salsa \n", + "4 2 2 Chicken Bowl \n", + "... ... ... ... \n", + "4617 1833 1 Steak Burrito \n", + "4618 1833 1 Steak Burrito \n", + "4619 1834 1 Chicken Salad Bowl \n", + "4620 1834 1 Chicken Salad Bowl \n", + "4621 1834 1 Chicken Salad Bowl \n", + "\n", + " choice_description item_price \n", + "0 NaN 2.39 \n", + "1 [Clementine] 3.39 \n", + "2 [Apple] 3.39 \n", + "3 NaN 2.39 \n", + "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 16.98 \n", + "... ... ... \n", + "4617 [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... 11.75 \n", + "4618 [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... 11.75 \n", + "4619 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... 11.25 \n", + "4620 [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... 8.75 \n", + "4621 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... 8.75 \n", + "\n", + "[4622 rows x 5 columns]" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo" + ] }, { "cell_type": "markdown", @@ -257,12 +1050,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 81, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "float64\n" + ] + } + ], + "source": [ + "print(chipo.item_price.dtype)" + ] }, { "cell_type": "markdown", @@ -273,12 +1076,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 143, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "39237.02" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo_revenue = (chipo.quantity * chipo.item_price).sum()\n", + "chipo_revenue" + ] }, { "cell_type": "markdown", @@ -289,12 +1106,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 93, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "1834" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo_order_count = len(chipo.groupby('order_id'))\n", + "chipo_order_count" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1834" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orders = chipo.order_id.value_counts().count()\n", + "orders" + ] }, { "cell_type": "markdown", @@ -305,26 +1157,118 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "21.39423118865867" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Solution 1\n", + "chipo_mean = chipo_revenue/chipo_order_count\n", + "chipo_mean" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "21.39423118865867" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Solution 2\n", + "chipo['chipo_revenue'] = chipo.quantity * chipo.item_price\n", + "chipo_mean = chipo.groupby(['order_id']).chipo_revenue.sum().mean()\n", + "chipo_mean" + ] + }, + { + "cell_type": "code", + "execution_count": 104, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "order_id\n", + "1 2.890000\n", + "2 33.960000\n", + "3 6.335000\n", + "4 10.500000\n", + "5 6.850000\n", + " ... \n", + "1830 11.500000\n", + "1831 4.300000\n", + "1832 6.600000\n", + "1833 11.750000\n", + "1834 9.583333\n", + "Name: chipo_revenue, Length: 1834, dtype: float64\n" + ] + } + ], "source": [ - "# Solution 1\n", - "\n" + "# Solution 1 错误的理解为了各个订单的平均收入,revenue distribution across individual orders\n", + "chipo_revenue_avg = chipo.groupby(['order_id']).chipo_revenue.mean()\n", + "print(chipo_revenue_avg)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 116, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "order_id\n", + "1 2.890000\n", + "2 16.980000\n", + "3 6.335000\n", + "4 10.500000\n", + "5 6.850000\n", + " ... \n", + "1830 11.500000\n", + "1831 4.300000\n", + "1832 6.600000\n", + "1833 11.750000\n", + "1834 9.583333\n", + "Length: 1834, dtype: float64" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Solution 2\n", - "\n" + "# Solution 2 错误的理解\n", + "left = chipo.groupby(['order_id']).quantity.sum()\n", + "right = chipo.groupby(['order_id']).chipo_revenue.sum()\n", + "avg_avenue_per_order = right/left\n", + "avg_avenue_per_order" ] }, { @@ -336,32 +1280,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 119, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "50\n" + ] + } + ], + "source": [ + "item = chipo.groupby(['item_name']).item_name.count()\n", + "print(len(item))" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chipo.item_name.value_counts().count()" + ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.10.13" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb index f32d9ce9f..bf383121e 100644 --- a/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb @@ -19,12 +19,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], - "source": [] + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -42,12 +45,156 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agegenderoccupationzip_code
user_id
124Mtechnician85711
253Fother94043
323Mwriter32067
424Mtechnician43537
533Fother15213
...............
93926Fstudent33319
94032Madministrator02215
94120Mstudent97229
94248Flibrarian78209
94322Mstudent77841
\n", + "

943 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " age gender occupation zip_code\n", + "user_id \n", + "1 24 M technician 85711\n", + "2 53 F other 94043\n", + "3 23 M writer 32067\n", + "4 24 M technician 43537\n", + "5 33 F other 15213\n", + "... ... ... ... ...\n", + "939 26 F student 33319\n", + "940 32 M administrator 02215\n", + "941 20 M student 97229\n", + "942 48 F librarian 78209\n", + "943 22 M student 77841\n", + "\n", + "[943 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user'\n", + "users = pd.read_csv(url, sep='|', index_col='user_id')\n", + "users" + ] }, { "cell_type": "markdown", @@ -58,13 +205,264 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "collapsed": false, "scrolled": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agegenderoccupationzip_code
user_id
124Mtechnician85711
253Fother94043
323Mwriter32067
424Mtechnician43537
533Fother15213
642Mexecutive98101
757Madministrator91344
836Madministrator05201
929Mstudent01002
1053Mlawyer90703
1139Fother30329
1228Fother06405
1347Meducator29206
1445Mscientist55106
1549Feducator97301
1621Mentertainment10309
1730Mprogrammer06355
1835Fother37212
1940Mlibrarian02138
2042Fhomemaker95660
2126Mwriter30068
2225Mwriter40206
2330Fartist48197
2421Fartist94533
2539Mengineer55107
\n", + "
" + ], + "text/plain": [ + " age gender occupation zip_code\n", + "user_id \n", + "1 24 M technician 85711\n", + "2 53 F other 94043\n", + "3 23 M writer 32067\n", + "4 24 M technician 43537\n", + "5 33 F other 15213\n", + "6 42 M executive 98101\n", + "7 57 M administrator 91344\n", + "8 36 M administrator 05201\n", + "9 29 M student 01002\n", + "10 53 M lawyer 90703\n", + "11 39 F other 30329\n", + "12 28 F other 06405\n", + "13 47 M educator 29206\n", + "14 45 M scientist 55106\n", + "15 49 F educator 97301\n", + "16 21 M entertainment 10309\n", + "17 30 M programmer 06355\n", + "18 35 F other 37212\n", + "19 40 M librarian 02138\n", + "20 42 F homemaker 95660\n", + "21 26 M writer 30068\n", + "22 25 M writer 40206\n", + "23 30 F artist 48197\n", + "24 21 F artist 94533\n", + "25 39 M engineer 55107" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.head(25)" + ] }, { "cell_type": "markdown", @@ -75,13 +473,146 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": { "collapsed": false, "scrolled": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agegenderoccupationzip_code
user_id
93461Mengineer22902
93542Mdoctor66221
93624Mother32789
93748Meducator98072
93838Ftechnician55038
93926Fstudent33319
94032Madministrator02215
94120Mstudent97229
94248Flibrarian78209
94322Mstudent77841
\n", + "
" + ], + "text/plain": [ + " age gender occupation zip_code\n", + "user_id \n", + "934 61 M engineer 22902\n", + "935 42 M doctor 66221\n", + "936 24 M other 32789\n", + "937 48 M educator 98072\n", + "938 38 F technician 55038\n", + "939 26 F student 33319\n", + "940 32 M administrator 02215\n", + "941 20 M student 97229\n", + "942 48 F librarian 78209\n", + "943 22 M student 77841" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users[-10:]\n", + "# users.iloc[-10:]\n", + "# users.tail(10)" + ] }, { "cell_type": "markdown", @@ -92,12 +623,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "943" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.shape[0]\n", + "# len(users)" + ] }, { "cell_type": "markdown", @@ -108,12 +653,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.shape[1]" + ] }, { "cell_type": "markdown", @@ -124,12 +682,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['age', 'gender', 'occupation', 'zip_code'], dtype='object')" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.columns" + ] }, { "cell_type": "markdown", @@ -140,12 +711,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "Index([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,\n", + " ...\n", + " 934, 935, 936, 937, 938, 939, 940, 941, 942, 943],\n", + " dtype='int64', name='user_id', length=943)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.index" + ] }, { "cell_type": "markdown", @@ -156,12 +743,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "age int64\n", + "gender object\n", + "occupation object\n", + "zip_code object\n", + "dtype: object" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.dtypes" + ] }, { "cell_type": "markdown", @@ -172,12 +776,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "user_id\n", + "1 technician\n", + "2 other\n", + "3 writer\n", + "4 technician\n", + "5 other\n", + " ... \n", + "939 student\n", + "940 administrator\n", + "941 student\n", + "942 librarian\n", + "943 student\n", + "Name: occupation, Length: 943, dtype: object" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users['occupation']" + ] }, { "cell_type": "markdown", @@ -188,12 +817,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 85, "metadata": { "collapsed": false }, + "outputs": [ + { + "data": { + "text/plain": [ + "21" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# occupation_list = users.groupby('occupation').occupation.count()\n", + "# len(occupation_list)\n", + "\n", + "users.occupation.value_counts().count()\n", + "\n", + "# users.occupation.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# occupation_list" + ] }, { "cell_type": "markdown", @@ -204,12 +860,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'student'" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "occupation_list.sort_values(ascending=False).index[0]" + ] }, { "cell_type": "markdown", @@ -220,12 +889,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
age
count943.000000
mean34.051962
std12.192740
min7.000000
25%25.000000
50%31.000000
75%43.000000
max73.000000
\n", + "
" + ], + "text/plain": [ + " age\n", + "count 943.000000\n", + "mean 34.051962\n", + "std 12.192740\n", + "min 7.000000\n", + "25% 25.000000\n", + "50% 31.000000\n", + "75% 43.000000\n", + "max 73.000000" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.describe()" + ] }, { "cell_type": "markdown", @@ -236,12 +985,143 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agegenderoccupationzip_code
count943.000000943943943
uniqueNaN221795
topNaNMstudent55414
freqNaN6701969
mean34.051962NaNNaNNaN
std12.192740NaNNaNNaN
min7.000000NaNNaNNaN
25%25.000000NaNNaNNaN
50%31.000000NaNNaNNaN
75%43.000000NaNNaNNaN
max73.000000NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " age gender occupation zip_code\n", + "count 943.000000 943 943 943\n", + "unique NaN 2 21 795\n", + "top NaN M student 55414\n", + "freq NaN 670 196 9\n", + "mean 34.051962 NaN NaN NaN\n", + "std 12.192740 NaN NaN NaN\n", + "min 7.000000 NaN NaN NaN\n", + "25% 25.000000 NaN NaN NaN\n", + "50% 31.000000 NaN NaN NaN\n", + "75% 43.000000 NaN NaN NaN\n", + "max 73.000000 NaN NaN NaN" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.describe(include='all')" + ] }, { "cell_type": "markdown", @@ -252,12 +1132,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "count 943\n", + "unique 21\n", + "top student\n", + "freq 196\n", + "Name: occupation, dtype: object" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.occupation.describe()" + ] }, { "cell_type": "markdown", @@ -268,12 +1165,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "34.05196182396607" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users['age'].mean()" + ] }, { "cell_type": "markdown", @@ -284,32 +1194,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 87, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "age\n", + "7 1\n", + "66 1\n", + "11 1\n", + "10 1\n", + "73 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# users_age_list = users.groupby(['age']).age.count().sort_values()\n", + "# users_age_least = users_age_list.iloc[0]\n", + "# users_age_list[users_age_list==users_age_least]\n", + "\n", + "users.age.value_counts().tail()" + ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.10.13" } }, "nbformat": 4, diff --git a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb index ea507fa1d..0f219fab3 100644 --- a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb +++ b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises.ipynb @@ -30,12 +30,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/Users/41uo/Desktop/products.tsv'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[14], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/Users/41uo/Desktop/products.tsv\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 4\u001b[0m food \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/Users/41uo/Desktop/products.tsv'" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "url = r'/Users/41uo/Desktop/products.tsv'\n", + "food = pd.read_csv(url)" + ] }, { "cell_type": "markdown", @@ -169,21 +191,21 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.10.13" } }, "nbformat": 4,