Skip to content

Commit 8db913e

Browse files
p2 files added
1 parent 770ab56 commit 8db913e

File tree

3 files changed

+145
-0
lines changed

3 files changed

+145
-0
lines changed
+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import pandas as pd
2+
3+
4+
def calculate_demographic_data(print_data=True):
5+
# Read data from file
6+
df = pd.read_csv('adult.data.csv')
7+
8+
# How many of each race are represented in this dataset? This should be a Pandas series with race names as the index labels.
9+
race_count = df['race'].value_counts()
10+
11+
# What is the average age of men?
12+
men = df[df['sex'] == 'Male']
13+
average_age_men = round(men['age'].mean(), 1)
14+
15+
# What is the percentage of people who have a Bachelor's degree?
16+
bachelor = df[df['education'] == 'Bachelors']
17+
percentage_bachelors = round(bachelor.shape[0] / df.shape[0] * 100, 1)
18+
19+
# What percentage of people with advanced education (`Bachelors`, `Masters`, or `Doctorate`) make more than 50K?
20+
# What percentage of people without advanced education make more than 50K?
21+
22+
# with and without `Bachelors`, `Masters`, or `Doctorate`
23+
higher_education = df[df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])]
24+
lower_education = df[~df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])]
25+
26+
# percentage with salary >50K
27+
higher_education_rich = round(higher_education[higher_education['salary'] == '>50K'].shape[0] / higher_education.shape[0] * 100, 1)
28+
lower_education_rich = round(lower_education[lower_education['salary'] == '>50K'].shape[0] / lower_education.shape[0] * 100, 1)
29+
30+
# What is the minimum number of hours a person works per week (hours-per-week feature)?
31+
min_work_hours = df['hours-per-week'].min()
32+
33+
# What percentage of the people who work the minimum number of hours per week have a salary of >50K?
34+
num_min_workers = df[df['hours-per-week'] == min_work_hours]
35+
36+
rich_percentage = round(num_min_workers[num_min_workers['salary'] == '>50K'].shape[0] / num_min_workers.shape[0] * 100, 1)
37+
38+
# What country has the highest percentage of people that earn >50K?
39+
num_high_salary_per_country = df[df['salary'] == ">50K"]['native-country'].value_counts()
40+
num_country = df['native-country'].value_counts()
41+
percentage = num_high_salary_per_country / num_country * 100
42+
43+
highest_earning_country = percentage.idxmax()
44+
highest_earning_country_percentage = round(percentage.max(), 1)
45+
46+
# Identify the most popular occupation for those who earn >50K in India.
47+
high_salary_india_occ = df[(df['salary'] == ">50K") & (df['native-country'] == "India")]['occupation'].value_counts()
48+
49+
top_IN_occupation = high_salary_india_occ.idxmax()
50+
51+
# DO NOT MODIFY BELOW THIS LINE
52+
53+
if print_data:
54+
print("Number of each race:\n", race_count)
55+
print("Average age of men:", average_age_men)
56+
print(f"Percentage with Bachelors degrees: {percentage_bachelors}%")
57+
print(f"Percentage with higher education that earn >50K: {higher_education_rich}%")
58+
print(f"Percentage without higher education that earn >50K: {lower_education_rich}%")
59+
print(f"Min work time: {min_work_hours} hours/week")
60+
print(f"Percentage of rich among those who work fewest hours: {rich_percentage}%")
61+
print("Country with highest percentage of rich:", highest_earning_country)
62+
print(f"Highest percentage of rich people in country: {highest_earning_country_percentage}%")
63+
print("Top occupations in India:", top_IN_occupation)
64+
65+
return {
66+
'race_count': race_count,
67+
'average_age_men': average_age_men,
68+
'percentage_bachelors': percentage_bachelors,
69+
'higher_education_rich': higher_education_rich,
70+
'lower_education_rich': lower_education_rich,
71+
'min_work_hours': min_work_hours,
72+
'rich_percentage': rich_percentage,
73+
'highest_earning_country': highest_earning_country,
74+
'highest_earning_country_percentage':
75+
highest_earning_country_percentage,
76+
'top_IN_occupation': top_IN_occupation
77+
}

Diff for: Demographic Data Analyzer/main.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# This entrypoint file to be used in development. Start by reading README.md
2+
import demographic_data_analyzer
3+
from unittest import main
4+
5+
# Test your function by calling it here
6+
demographic_data_analyzer.calculate_demographic_data()
7+
8+
# Run unit tests automatically
9+
main(module='test_module', exit=False)

Diff for: Demographic Data Analyzer/test_module.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import unittest
2+
import demographic_data_analyzer
3+
4+
class DemographicAnalyzerTestCase(unittest.TestCase):
5+
def setUp(self):
6+
self.data = demographic_data_analyzer.calculate_demographic_data(print_data = False)
7+
8+
def test_race_count(self):
9+
actual = self.data['race_count'].tolist()
10+
expected = [27816, 3124, 1039, 311, 271]
11+
self.assertAlmostEqual(actual, expected, msg="Expected race count values to be [27816, 3124, 1039, 311, 271]")
12+
13+
def test_average_age_men(self):
14+
actual = self.data['average_age_men']
15+
expected = 39.4
16+
self.assertAlmostEqual(actual, expected, msg="Expected different value for average age of men.")
17+
18+
def test_percentage_bachelors(self):
19+
actual = self.data['percentage_bachelors']
20+
expected = 16.4
21+
self.assertAlmostEqual(actual, expected, msg="Expected different value for percentage with Bachelors degrees.")
22+
23+
def test_higher_education_rich(self):
24+
actual = self.data['higher_education_rich']
25+
expected = 46.5
26+
self.assertAlmostEqual(actual, expected, msg="Expected different value for percentage with higher education that earn >50K.")
27+
28+
def test_lower_education_rich(self):
29+
actual = self.data['lower_education_rich']
30+
expected = 17.4
31+
self.assertAlmostEqual(actual, expected, msg="Expected different value for percentage without higher education that earn >50K.")
32+
33+
def test_min_work_hours(self):
34+
actual = self.data['min_work_hours']
35+
expected = 1
36+
self.assertAlmostEqual(actual, expected, msg="Expected different value for minimum work hours.")
37+
38+
def test_rich_percentage(self):
39+
actual = self.data['rich_percentage']
40+
expected = 10
41+
self.assertAlmostEqual(actual, expected, msg="Expected different value for percentage of rich among those who work fewest hours.")
42+
43+
def test_highest_earning_country(self):
44+
actual = self.data['highest_earning_country']
45+
expected = 'Iran'
46+
self.assertEqual(actual, expected, "Expected different value for highest earning country.")
47+
48+
def test_highest_earning_country_percentage(self):
49+
actual = self.data['highest_earning_country_percentage']
50+
expected = 41.9
51+
self.assertAlmostEqual(actual, expected, msg="Expected different value for highest earning country percentage.")
52+
53+
def test_top_IN_occupation(self):
54+
actual = self.data['top_IN_occupation']
55+
expected = 'Prof-specialty'
56+
self.assertEqual(actual, expected, "Expected different value for top occupations in India.")
57+
58+
if __name__ == "__main__":
59+
unittest.main()

0 commit comments

Comments
 (0)