-
Notifications
You must be signed in to change notification settings - Fork 0
/
COVID-19.R
130 lines (85 loc) · 3.98 KB
/
COVID-19.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#################### STEP 1 ########################
# Load the readr, ggplot2, and dplyr packages
library(readr)
library(ggplot2)
library(dplyr)
# Read datasets/confirmed_cases_worldwide.csv into confirmed_cases_worldwide
confirmed_cases_worldwide <- read_csv("F:/datas/datasets/confirmed_cases_worldwide.csv")
# See the result
confirmed_cases_worldwide
#################### STEP 2 ########################
# Draw a line plot of cumulative cases vs. date
# Label the y-axis
ggplot(confirmed_cases_worldwide,aes(date,cum_cases)) +
geom_line() +
labs(y="Cumulative confirmed cases")
#################### STEP 3 ########################
# Read in datasets/confirmed_cases_china_vs_world.csv
confirmed_cases_china_vs_world <- read_csv("F:/datas/datasets/confirmed_cases_china_vs_world.csv")
# See the result
glimpse(confirmed_cases_china_vs_world)
# Draw a line plot of cumulative cases vs. date, grouped and colored by is_china
# Define aesthetics within the line geom
plt_cum_confirmed_cases_china_vs_world <- ggplot(confirmed_cases_china_vs_world) +
geom_line(aes(x=date,y=cum_cases,color=is_china,group=is_china)) +
ylab("Cumulative confirmed cases")
# See the plot
plt_cum_confirmed_cases_china_vs_world
#################### STEP 4 ########################
who_events <- tribble(
~ date, ~ event,
"2020-01-30", "Global health\nemergency declared",
"2020-03-11", "Pandemic\ndeclared",
"2020-02-13", "China reporting\nchange"
) %>%
mutate(date = as.Date(date))
# Using who_events, add vertical dashed lines with an xintercept at date
# and text at date, labeled by event, and at 100000 on the y-axis
plt_cum_confirmed_cases_china_vs_world +
geom_vline(aes(xintercept = date),data=who_events,linetype="dashed") +
geom_text(aes(label=event,x=date),data=who_events,y=1e5)
#################### STEP 5 ########################
# Filter for China, from Feb 15
china_after_feb15 <- confirmed_cases_china_vs_world %>%
filter(is_china=="China",date>="2020-02-15")
# Using china_after_feb15, draw a line plot cum_cases vs. date
# Add a smooth trend line using linear regression, no error bars
ggplot(china_after_feb15,aes(x=date,y=cum_cases))+
geom_line() +
geom_smooth(method="lm",se=FALSE) +
ylab("Cumulative confirmed cases")
#################### STEP 6 ########################
# Filter confirmed_cases_china_vs_world for not China
not_china <- confirmed_cases_china_vs_world %>%
filter(is_china=="Not China")
# Using not_china, draw a line plot cum_cases vs. date
# Add a smooth trend line using linear regression, no error bars
plt_not_china_trend_lin <- ggplot(not_china,aes(date,cum_cases)) +
geom_line() +
geom_smooth(method="lm",se= FALSE) +
ylab("Cumulative confirmed cases")
# See the result
plt_not_china_trend_lin
#################### STEP 7 ########################
# Modify the plot to use a logarithmic scale on the y-axis
plt_not_china_trend_lin +
scale_y_log10()
#################### STEP 8 ########################
# Run this to get the data for each country
confirmed_cases_by_country <- read_csv("F:/datas/datasets/confirmed_cases_by_country.csv")
glimpse(confirmed_cases_by_country)
# Group by country, summarize to calculate total cases, find the top 7
top_countries_by_total_cases <- confirmed_cases_by_country %>%
group_by(country) %>%
summarize(total_cases=max(cum_cases)) %>%
top_n(7)
# See the result
top_countries_by_total_cases
#################### STEP 9 ########################
# Run this to get the data for the top 7 countries
confirmed_cases_top7_outside_china=read_csv("F:/datas/datasets/confirmed_cases_top7_outside_china.csv")
#
glimpse(confirmed_cases_top7_outside_china)
# Using confirmed_cases_top7_outside_china, draw a line plot of
# cum_cases vs. date, grouped and colored by country
ggplot(confirmed_cases_top7_outside_china,aes(date,cum_cases,color=country,group=country))+geom_line()+ylab("Cumulative confirmed cases")