-
Notifications
You must be signed in to change notification settings - Fork 0
/
task4.r
125 lines (124 loc) · 5.76 KB
/
task4.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
> require(data.table)
> load("task1.RData")
> aways<-matches$AwayGoal
> homes<-matches$HomeGoal
> sort(unique(homes))
[1] 0 1 2 3 4 5 6 7 8
> sort(unique(aways))
[1] 0 1 2 3 4 5 6 7
> awaym<-mean(aways,na.rm=TRUE)
> awaym
[1] 1.190332
> homem<-mean(homes,na.rm=TRUE)
> homem
[1] 1.553776
> #because poisson distribution goes to infinity,
> #we should consider much larger values than 8 as well
> awayp<-dpois(0:99,awaym)
> homep<-dpois(0:99,homem)
> awayp
[1] 3.041202e-01 3.620041e-01 2.154526e-01 8.548672e-02 2.543940e-02
[6] 6.056269e-03 1.201495e-03 2.043113e-04 3.039979e-05 4.020650e-06
[11] 4.785910e-07 5.178930e-08 5.137206e-09 4.703833e-10 3.999375e-11
[16] 3.173723e-12 2.361116e-13 1.653243e-14 1.093282e-15 6.849312e-17
[21] 4.076479e-18 2.310650e-19 1.250200e-20 6.470235e-22 3.209054e-23
[26] 1.527936e-24 6.995200e-26 3.083931e-27 1.311037e-28 5.381273e-30
[31] 2.135168e-31 8.198578e-33 3.049698e-34 1.100047e-35 3.851238e-37
[36] 1.309787e-38 4.330782e-40 1.393262e-41 4.364329e-43 1.332052e-44
[41] 3.963960e-46 1.150837e-47 3.261614e-49 9.028849e-51 2.442575e-52
[46] 6.461058e-54 1.671914e-55 4.234327e-57 1.050053e-58 2.550842e-60
[51] 6.072699e-62 1.417359e-63 3.244477e-65 7.286804e-67 1.606244e-68
[56] 3.476299e-70 7.389197e-72 1.543088e-73 3.166875e-75 6.389209e-77
[61] 1.267547e-78 2.473446e-80 4.748747e-82 8.972360e-84 1.668764e-85
[66] 3.055975e-87 5.511554e-89 9.791913e-91 1.714063e-92 2.956964e-94
[71] 5.028242e-96 8.429970e-98 1.393676e-99 2.272517e-101 3.655474e-103
[76] 5.801638e-105 9.086680e-107 1.404697e-108 2.143662e-110 3.229963e-112
[81] 4.805911e-114 7.062508e-116 1.025211e-117 1.470292e-119 2.083495e-121
[86] 2.917707e-123 4.038420e-125 5.525359e-127 7.473879e-129 9.995954e-131
[91] 1.322056e-132 1.729326e-134 2.237470e-136 2.863799e-138 3.626460e-140
[96] 4.543887e-142 5.634099e-144 6.913866e-146 8.397754e-148 1.009709e-149
> N<-sum(!(is.na(homes)))
> N
[1] 3310
> #then to find the expected number of matches with that many goals
> #we multiply the probabilities with this number
> awayp<-awayp*3310
> homep<-homep*3310
> awayp
[1] 1.006638e+03 1.198234e+03 7.131480e+02 2.829611e+02 8.420442e+01
[6] 2.004625e+01 3.976950e+00 6.762703e-01 1.006233e-01 1.330835e-02
[11] 1.584136e-03 1.714226e-04 1.700415e-05 1.556969e-06 1.323793e-07
[16] 1.050502e-08 7.815293e-10 5.472233e-11 3.618764e-12 2.267122e-13
[21] 1.349314e-14 7.648251e-16 4.138164e-17 2.141648e-18 1.062197e-19
[26] 5.057469e-21 2.315411e-22 1.020781e-23 4.339531e-25 1.781201e-26
[31] 7.067405e-28 2.713729e-29 1.009450e-30 3.641154e-32 1.274760e-33
[36] 4.335394e-35 1.433489e-36 4.611697e-38 1.444593e-39 4.409091e-41
[41] 1.312071e-42 3.809269e-44 1.079594e-45 2.988549e-47 8.084924e-49
[46] 2.138610e-50 5.534037e-52 1.401562e-53 3.475677e-55 8.443287e-57
[51] 2.010063e-58 4.691458e-60 1.073922e-61 2.411932e-63 5.316668e-65
[56] 1.150655e-66 2.445824e-68 5.107621e-70 1.048236e-71 2.114828e-73
[61] 4.195581e-75 8.187107e-77 1.571835e-78 2.969851e-80 5.523609e-82
[66] 1.011528e-83 1.824325e-85 3.241123e-87 5.673549e-89 9.787550e-91
[71] 1.664348e-92 2.790320e-94 4.613067e-96 7.522031e-98 1.209962e-99
[76] 1.920342e-101 3.007691e-103 4.649548e-105 7.095522e-107 1.069118e-108
[81] 1.590757e-110 2.337690e-112 3.393449e-114 4.866665e-116 6.896368e-118
[86] 9.657611e-120 1.336717e-121 1.828894e-123 2.473854e-125 3.308661e-127
[91] 4.376007e-129 5.724068e-131 7.406026e-133 9.479174e-135 1.200358e-136
[96] 1.504026e-138 1.864887e-140 2.288490e-142 2.779656e-144 3.342136e-146
> for(i in 0:8){
+ awayobs[i+1]<-sum(aways==i,na.rm=TRUE)
+ homeobs[i+1]<-sum(homes==i,na.rm=TRUE)}
> awayobs
[1] 1104 1098 665 311 93 28 10 1 0 0 0 0 0 0 0
[16] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[31] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[46] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[61] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[76] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[91] 0 0 0 0 0 0 0 0 0 0
> homeobs
[1] 748 1063 810 426 174 59 22 5 3 0 0 0 0 0 0
[16] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[31] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[46] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[61] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[76] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[91] 0 0 0 0 0 0 0 0 0 0
>#we calculate our test statistics
> chisqhome<-sum((homeobs-homep)^2/homep)
> chisqaway<-sum((awayobs-awayp)^2/awayp)
> chisqhome
[1] 22.72505
> chisqaway
[1] 37.29772
>#we explore the values
> pchisq(chisqhome,df=99)
[1] 6.149315e-17
> pchisq(chisqhome,df=8)
[1] 0.9962642
> qchisq(.95,df=99)
[1] 123.2252
> qchisq(.95,df=8)
[1] 15.50731
> qchisq(.95,df=7)
[1] 14.06714
> qchisq(.95,df=6)
[1] 12.59159
> #it depends on the degree of freedom we choose
> #because they are both zero for number of goals
> #higher than 8
> #so the observed is zero and distribution expects
> #sonething very close to zero
> #but when we consider the sensible interval of
> #number of goals in [0,8] as it is what we observe
> #and the region expected does not round to zero
> #then we should reject the hypothesis for alpha
> #more than 0.004 something for home:
> pchisq(chisqhome,df=8)
[1] 0.9962642
> pchisq(chisqaway,df=8)
[1] 0.9999899
> pchisq(chisqaway,df=7)
[1] 0.9999959
> #and certainly for away
>