added fruits project

Andres Scharifker · Andres Scharifker · commit f651642c5460 · 2024-09-07T08:11:08.000-04:00
diff --git a/.DS_Store b/.DS_Store
diff --git a/fruits/desired_fruits.csv b/fruits/desired_fruits.csv
@@ -0,0 +1,8 @@
+fruit
+apple
+avocado
+tomato
+peach
+grape
+papaya
+apricot
diff --git a/fruits/fruit_transactions.csv b/fruits/fruit_transactions.csv
@@ -0,0 +1,20 @@
+Name,Date,Fruit
+Tiffany Sosa,2024/08/28,Grape
+Tiffany Sosa,2024/08/28,Grape
+Richard Blackwell,2024/11/20,Apple
+Izaiah Donovan,2024/08/20,Apple
+Tara Murphy,2024/02/23,Grape
+Dashawn Alexander,2024/03/17,Banana
+Dashawn Alexander,2023/03/17,Banana
+Issac Gregory,2024/02/08,Avocado
+Keira Strickland,2024/02/04,Banana
+Dangelo Dyer,2023/09/17,Apple
+Reese Schultz,2023/06/21,Avocado
+Thaddeus Schneider,2024/12/09,Grape
+Anton Valentine,2023/10/09,Avocado
+Trevin Maxwell,2023/08/18,Pomegranate
+Annalise Cook,2024/08/17,Apple
+Devyn Mcbride,2023/11/16,Avocado
+Blaine Maldonado,2023/05/18,Apple
+Aden Phillips,2024/08/16,Pomegranate
+Brooke Bolton,2023/03/27,Avocado
diff --git a/fruits/fruits1.py b/fruits/fruits1.py
@@ -0,0 +1,22 @@
+import requests
+import json
+import pandas as pd 
+
+url = 'https://www.fruityvice.com/api/fruit/all'
+
+response = requests.get(url)
+
+if response.status_code == 200:
+    data = response.json()
+    df = pd.DataFrame(data)
+    df.set_index("name", inplace=True)
+
+else:
+    print("somethings off")
+
+top_calories = df["nutritions"].str["calories"].nlargest(10)
+top_sugar = df["nutritions"].str["sugar"].nlargest(10)
+
+joined = pd.concat([top_calories, top_sugar], axis = 'columns',keys=["Calories","Sugar"], join='inner').sort_values("Calories", ascending=False)
+
+print(joined)
diff --git a/fruits/fruits2.py b/fruits/fruits2.py
@@ -0,0 +1,36 @@
+import pandas as pd 
+import requests 
+import json 
+
+transactions = pd.read_csv("fruit_transactions.csv", parse_dates=["Date"])
+# print(transactions)
+
+url = 'https://www.fruityvice.com/api/fruit/all'
+response = requests.get(url)
+desired_fruits = pd.read_csv("desired_fruits.csv")["fruit"].str.title()
+
+if response.ok: 
+    data = response.json()
+    df = pd.DataFrame(data)
+    df.set_index('name', inplace=True)
+    merged_df = df[df.index.isin(desired_fruits)]
+    # print(merged_df)
+else: 
+    print("something wrong")
+
+person_fruit_attributes = transactions.merge(df, how='left', left_on="Fruit", right_index=True).sort_values("Name")
+
+person_fruit_attributes['Calories'] = person_fruit_attributes['nutritions'].str['calories']
+person_fruit_attributes['Sugar'] = person_fruit_attributes['nutritions'].str['sugar']
+person_fruit_attributes.drop(['nutritions'],axis = 1,inplace=True)
+person_fruit_attributes = person_fruit_attributes[['Name','Date','Fruit','Calories','Sugar']]
+
+# Create the group by object
+person = person_fruit_attributes.groupby('Name')
+# Create the aggregation
+intake_per_person = person[['Sugar','Calories']].sum().sort_values("Sugar", ascending=False)
+
+top_eater = intake_per_person.nlargest(1,columns=['Sugar'])
+
+
+print(top_eater)
diff --git a/fruits/fruits3.py b/fruits/fruits3.py
@@ -0,0 +1,33 @@
+import pandas as pd 
+import requests
+
+url = 'https://www.fruityvice.com/api/fruit/all'
+response = requests.get(url)
+
+def extract_values(dictionary):
+    Calories = dictionary['calories']
+    Sugar = dictionary['sugar']
+    return Calories, Sugar
+
+if response.ok:
+    #Fruits
+    json_data = response.json()
+    fruits = pd.DataFrame(json_data)
+    fruits.set_index('name', inplace=True)
+    fruits[['Calories', 'Sugar']] = fruits['nutritions'].apply(lambda x: pd.Series(extract_values(x)))
+    fruits = fruits[["Calories","Sugar"]]
+ 
+    person = pd.read_csv("fruit_transactions.csv", parse_dates=["Date"])
+    person['YrMonth'] = person['Date'].dt.strftime('%Y%m')
+    person_fruit = person.merge(fruits, how='left', left_on='Fruit', right_index=True)
+    person_fruit_agg = person_fruit.groupby(['Name','YrMonth'])
+    person_fruit_agg = person_fruit_agg[['Calories','Sugar']].sum()
+    # measure = input(" Which Metric do you want to filter by: ")
+    print(person_fruit_agg)
+    # print(person_fruit_agg.nlargest(1,measure))
+
+
+
+
+
+
diff --git a/fruits/projects.txt b/fruits/projects.txt
@@ -0,0 +1,33 @@
+1) 
+- Bring in all of them
+- put them in a dataframe
+- filter to the top 10 by calories
+- filter to the top 10 by sugar
+- find the ones in common
+
+
+2) 
+- Have a CSV with the desired fruits
+- Only bring in the desired fruits
+- Have a CSV with transactions (John Jan 1 Apple for lots of names)
+- Find the person who ate the most calories
+
+3) 
+- Have a CSV with the desired fruits
+- Only bring in the desired fruits
+- Have a CSV with transactions (John Jan 1 Apple for lots of names)
+- Have a class with an attribute called largest consumer and never consumed where the user can input a fruit and the program returns the list of people. 
+
+
+4) 
+- Connect to PostgresSQL 
+- Load the data from 2) 
+- Do some querying 
+
+
+4) 
+- Use Airflow to load the data to Postgres on a schedule
+
+
+5) 
+- Do it with ubuntu in Github?

-Original file line number
+Diff line change
@@ @@ -0,0 +1,8 @@ @@
 +fruit
 +apple
 +avocado
 +tomato
 +peach
 +grape
 +papaya
 +apricot