-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
99 lines (82 loc) · 3.51 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
print("Execution started. \n Collecting the libraries required.")
import os
import warnings
import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd
import plotly.graph_objs as go
import plotly.plotly as py
import seaborn as sns
from plotly.offline import init_notebook_mode, iplot
import plotly.offline as offline
# ignore warnings
warnings.filterwarnings("ignore")
print(os.listdir("../data visualization"))
world = pd.read_csv("../data visualization/countries of the world.csv")
print(world.head(10))
print(world.info())
#Cleaning the data
world.columns = (["country","region","population","area","density","coastline","migration","infant_mortality","gdp","literacy","phones","arable","crops","other","climate","birthrate","deathrate","agriculture","industry","service"])
world.country = world.country.astype('category')
world.region = world.region.astype('category')
world.density = world.density.str.replace(",",".").astype(float)
world.coastline = world.coastline.str.replace(",",".").astype(float)
world.migration = world.migration.str.replace(",",".").astype(float)
world.infant_mortality = world.infant_mortality.str.replace(",",".").astype(float)
world.literacy = world.literacy.str.replace(",",".").astype(float)
world.phones = world.phones.str.replace(",",".").astype(float)
world.arable = world.arable.str.replace(",",".").astype(float)
world.crops = world.crops.str.replace(",",".").astype(float)
world.other = world.other.str.replace(",",".").astype(float)
world.climate = world.climate.str.replace(",",".").astype(float)
world.birthrate = world.birthrate.str.replace(",",".").astype(float)
world.deathrate = world.deathrate.str.replace(",",".").astype(float)
world.agriculture = world.agriculture.str.replace(",",".").astype(float)
world.industry = world.industry.str.replace(",",".").astype(float)
world.service = world.service.str.replace(",",".").astype(float)
print(world.info())
missing = world.isnull().sum()
print(missing)
world.fillna(world.mean(),inplace=True)
world.region = world.region.str.strip()
group = world.groupby("region")
group.mean()
print(world.head(10))
#Plotting the graph
region = world.region.value_counts()
plt.figure(figsize=(10,7))
sns.barplot(x=region.index,y=region.values)
plt.xticks(rotation=45)
plt.ylabel('Number of countries')
plt.xlabel('Region')
plt.title('Number of Countries by REGİON',color = 'red',fontsize=20)
plt.show()
print(world.corr())
f,ax = plt.subplots(figsize=(18, 16))
sns.heatmap(world.corr(), annot=True, linewidths=.8, fmt= '.1f',ax=ax)
plt.show()
gdp=world.sort_values(["gdp"],ascending=False)
df = gdp.iloc[:100,:]
# Creating trace1
trace1 = go.Scatter(
x = df.gdp,
y = df.birthrate,
mode = "lines",
name = "Birthrate",
marker = dict(color = 'rgba(235,66,30, 0.8)'),
text= df.country)
# Creating trace2
trace2 = go.Scatter(
x = df.gdp,
y = df.deathrate,
mode = "lines+markers",
name = "Deathrate",
marker = dict(color = 'rgba(10,10,180, 0.8)'),
text= df.country)
z = [trace1, trace2]
layout = dict(title = 'Birthrate and Deathrate of World Countries (Top 100)',
xaxis= dict(title= 'GDP',ticklen= 5,zeroline= False)
)
fig = dict(data = z, layout = layout)
iplot(fig)#,image='png')
offline.plot(fig)