50
50
"execution_count" : 2 ,
51
51
"metadata" : {},
52
52
"outputs" : [],
53
- "source" : []
53
+ "source" : [
54
+ " url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n " ,
55
+ " \n " ,
56
+ " chipo = pd.read_csv(url, sep = '\\ t')"
57
+ ]
54
58
},
55
59
{
56
60
"cell_type" : " markdown" ,
210
214
"output_type" : " execute_result"
211
215
}
212
216
],
213
- "source" : []
217
+ "source" : [
218
+ " chipo.head(10)"
219
+ ]
214
220
},
215
221
{
216
222
"cell_type" : " markdown" ,
237
243
"output_type" : " display_data"
238
244
}
239
245
],
240
- "source" : []
246
+ "source" : [
247
+ " # get the Series of the names\n " ,
248
+ " x = chipo.item_name\n " ,
249
+ " \n " ,
250
+ " # use the Counter class from collections to create a dictionary with keys(text) and frequency\n " ,
251
+ " letter_counts = Counter(x)\n " ,
252
+ " \n " ,
253
+ " # convert the dictionary to a DataFrame\n " ,
254
+ " df = pd.DataFrame.from_dict(letter_counts, orient='index')\n " ,
255
+ " \n " ,
256
+ " # sort the values from the top to the least value and slice the first 5 items\n " ,
257
+ " df = df[0].sort_values(ascending = True)[45:50]\n " ,
258
+ " \n " ,
259
+ " # create the plot\n " ,
260
+ " df.plot(kind='bar')\n " ,
261
+ " \n " ,
262
+ " # Set the title and labels\n " ,
263
+ " plt.xlabel('Items')\n " ,
264
+ " plt.ylabel('Number of Times Ordered')\n " ,
265
+ " plt.title('Most ordered Chipotle\\ 's Items')\n " ,
266
+ " \n " ,
267
+ " # show the plot\n " ,
268
+ " plt.show()"
269
+ ]
241
270
},
242
271
{
243
272
"cell_type" : " markdown" ,
275
304
"output_type" : " display_data"
276
305
}
277
306
],
278
- "source" : []
307
+ "source" : [
308
+ " # create a list of prices\n " ,
309
+ " chipo.item_price = [float(value[1:-1]) for value in chipo.item_price] # strip the dollar sign and trailing space\n " ,
310
+ " \n " ,
311
+ " # then groupby the orders and sum\n " ,
312
+ " orders = chipo.groupby('order_id').sum()\n " ,
313
+ " \n " ,
314
+ " # creates the scatterplot\n " ,
315
+ " # plt.scatter(orders.quantity, orders.item_price, s = 50, c = 'green')\n " ,
316
+ " plt.scatter(x = orders.item_price, y = orders.quantity, s = 50, c = 'green')\n " ,
317
+ " \n " ,
318
+ " # Set the title and labels\n " ,
319
+ " plt.xlabel('Order Price')\n " ,
320
+ " plt.ylabel('Items ordered')\n " ,
321
+ " plt.title('Number of items ordered per order price')\n " ,
322
+ " plt.ylim(0)"
323
+ ]
279
324
},
280
325
{
281
326
"cell_type" : " markdown" ,
308
353
"name" : " python" ,
309
354
"nbconvert_exporter" : " python" ,
310
355
"pygments_lexer" : " ipython3" ,
311
- "version" : " 3.9.1 "
356
+ "version" : " 3.6.8 "
312
357
}
313
358
},
314
359
"nbformat" : 4 ,
315
360
"nbformat_minor" : 1
316
- }
361
+ }
0 commit comments