-
Notifications
You must be signed in to change notification settings - Fork 0
/
LeesHogueDavisHwk3.html
781 lines (584 loc) · 113 KB
/
LeesHogueDavisHwk3.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!-- saved from url=(0014)about:internet -->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Confidence Intervals Homework</title>
<base target="_blank"/>
<style type="text/css">
body, td {
font-family: sans-serif;
background-color: white;
font-size: 12px;
margin: 8px;
}
tt, code, pre {
font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace;
}
h1 {
font-size:2.2em;
}
h2 {
font-size:1.8em;
}
h3 {
font-size:1.4em;
}
h4 {
font-size:1.0em;
}
h5 {
font-size:0.9em;
}
h6 {
font-size:0.8em;
}
a:visited {
color: rgb(50%, 0%, 50%);
}
pre {
margin-top: 0;
max-width: 95%;
border: 1px solid #ccc;
}
pre code {
display: block; padding: 0.5em;
}
code.r {
background-color: #F8F8F8;
}
table, td, th {
border: none;
}
blockquote {
color:#666666;
margin:0;
padding-left: 1em;
border-left: 0.5em #EEE solid;
}
hr {
height: 0px;
border-bottom: none;
border-top-width: thin;
border-top-style: dotted;
border-top-color: #999999;
}
@media print {
* {
background: transparent !important;
color: black !important;
filter:none !important;
-ms-filter: none !important;
}
body {
font-size:12pt;
max-width:100%;
}
a, a:visited {
text-decoration: underline;
}
hr {
visibility: hidden;
page-break-before: always;
}
pre, blockquote {
padding-right: 1em;
page-break-inside: avoid;
}
tr, img {
page-break-inside: avoid;
}
img {
max-width: 100% !important;
}
@page :left {
margin: 15mm 20mm 15mm 10mm;
}
@page :right {
margin: 15mm 10mm 15mm 20mm;
}
p, h2, h3 {
orphans: 3; widows: 3;
}
h2, h3 {
page-break-after: avoid;
}
}
</style>
<!-- Styles for R syntax highlighter -->
<style type="text/css">
pre .operator,
pre .paren {
color: rgb(104, 118, 135)
}
pre .literal {
color: rgb(88, 72, 246)
}
pre .number {
color: rgb(0, 0, 205);
}
pre .comment {
color: rgb(76, 136, 107);
}
pre .keyword {
color: rgb(0, 0, 255);
}
pre .identifier {
color: rgb(0, 0, 0);
}
pre .string {
color: rgb(3, 106, 7);
}
</style>
<!-- R syntax highlighter -->
<script type="text/javascript">
var hljs=new function(){function m(p){return p.replace(/&/gm,"&").replace(/</gm,"<")}function f(r,q,p){return RegExp(q,"m"+(r.cI?"i":"")+(p?"g":""))}function b(r){for(var p=0;p<r.childNodes.length;p++){var q=r.childNodes[p];if(q.nodeName=="CODE"){return q}if(!(q.nodeType==3&&q.nodeValue.match(/\s+/))){break}}}function h(t,s){var p="";for(var r=0;r<t.childNodes.length;r++){if(t.childNodes[r].nodeType==3){var q=t.childNodes[r].nodeValue;if(s){q=q.replace(/\n/g,"")}p+=q}else{if(t.childNodes[r].nodeName=="BR"){p+="\n"}else{p+=h(t.childNodes[r])}}}if(/MSIE [678]/.test(navigator.userAgent)){p=p.replace(/\r/g,"\n")}return p}function a(s){var r=s.className.split(/\s+/);r=r.concat(s.parentNode.className.split(/\s+/));for(var q=0;q<r.length;q++){var p=r[q].replace(/^language-/,"");if(e[p]){return p}}}function c(q){var p=[];(function(s,t){for(var r=0;r<s.childNodes.length;r++){if(s.childNodes[r].nodeType==3){t+=s.childNodes[r].nodeValue.length}else{if(s.childNodes[r].nodeName=="BR"){t+=1}else{if(s.childNodes[r].nodeType==1){p.push({event:"start",offset:t,node:s.childNodes[r]});t=arguments.callee(s.childNodes[r],t);p.push({event:"stop",offset:t,node:s.childNodes[r]})}}}}return t})(q,0);return p}function k(y,w,x){var q=0;var z="";var s=[];function u(){if(y.length&&w.length){if(y[0].offset!=w[0].offset){return(y[0].offset<w[0].offset)?y:w}else{return w[0].event=="start"?y:w}}else{return y.length?y:w}}function t(D){var A="<"+D.nodeName.toLowerCase();for(var B=0;B<D.attributes.length;B++){var C=D.attributes[B];A+=" "+C.nodeName.toLowerCase();if(C.value!==undefined&&C.value!==false&&C.value!==null){A+='="'+m(C.value)+'"'}}return A+">"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("</"+p.nodeName.toLowerCase()+">")}while(p!=v.node);s.splice(r,1);while(r<s.length){z+=t(s[r]);r++}}}}return z+m(x.substr(q))}function j(){function q(x,y,v){if(x.compiled){return}var u;var s=[];if(x.k){x.lR=f(y,x.l||hljs.IR,true);for(var w in x.k){if(!x.k.hasOwnProperty(w)){continue}if(x.k[w] instanceof Object){u=x.k[w]}else{u=x.k;w="keyword"}for(var r in u){if(!u.hasOwnProperty(r)){continue}x.k[r]=[w,u[r]];s.push(r)}}}if(!v){if(x.bWK){x.b="\\b("+s.join("|")+")\\s"}x.bR=f(y,x.b?x.b:"\\B|\\b");if(!x.e&&!x.eW){x.e="\\B|\\b"}if(x.e){x.eR=f(y,x.e)}}if(x.i){x.iR=f(y,x.i)}if(x.r===undefined){x.r=1}if(!x.c){x.c=[]}x.compiled=true;for(var t=0;t<x.c.length;t++){if(x.c[t]=="self"){x.c[t]=x}q(x.c[t],y,false)}if(x.starts){q(x.starts,y,false)}}for(var p in e){if(!e.hasOwnProperty(p)){continue}q(e[p].dM,e[p],true)}}function d(B,C){if(!j.called){j();j.called=true}function q(r,M){for(var L=0;L<M.c.length;L++){if((M.c[L].bR.exec(r)||[null])[0]==r){return M.c[L]}}}function v(L,r){if(D[L].e&&D[L].eR.test(r)){return 1}if(D[L].eW){var M=v(L-1,r);return M?M+1:0}return 0}function w(r,L){return L.i&&L.iR.test(r)}function K(N,O){var M=[];for(var L=0;L<N.c.length;L++){M.push(N.c[L].b)}var r=D.length-1;do{if(D[r].e){M.push(D[r].e)}r--}while(D[r+1].eW);if(N.i){M.push(N.i)}return f(O,M.join("|"),true)}function p(M,L){var N=D[D.length-1];if(!N.t){N.t=K(N,E)}N.t.lastIndex=L;var r=N.t.exec(M);return r?[M.substr(L,r.index-L),r[0],false]:[M.substr(L),"",true]}function z(N,r){var L=E.cI?r[0].toLowerCase():r[0];var M=N.k[L];if(M&&M instanceof Array){return M}return false}function F(L,P){L=m(L);if(!P.k){return L}var r="";var O=0;P.lR.lastIndex=0;var M=P.lR.exec(L);while(M){r+=L.substr(O,M.index-O);var N=z(P,M);if(N){x+=N[1];r+='<span class="'+N[0]+'">'+M[0]+"</span>"}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'<span class="'+M.cN+'">':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"</span>":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L>1){O=D[D.length-2].cN?"</span>":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length>1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.r>r.keyword_count+r.r){r=s}if(s.keyword_count+s.r>p.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((<[^>]+>|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"<br>")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s|^)(language-)?"+v+"(\\s|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML="<pre><code>"+y.value+"</code></pre>";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p<r.length;p++){var q=b(r[p]);if(q){n(q,hljs.tabReplace)}}}function l(){if(window.addEventListener){window.addEventListener("DOMContentLoaded",o,false);window.addEventListener("load",o,false)}else{if(window.attachEvent){window.attachEvent("onload",o)}else{window.onload=o}}}var e={};this.LANGUAGES=e;this.highlight=d;this.highlightAuto=g;this.fixMarkup=i;this.highlightBlock=n;this.initHighlighting=o;this.initHighlightingOnLoad=l;this.IR="[a-zA-Z][a-zA-Z0-9_]*";this.UIR="[a-zA-Z_][a-zA-Z0-9_]*";this.NR="\\b\\d+(\\.\\d+)?";this.CNR="\\b(0[xX][a-fA-F0-9]+|(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)";this.BNR="\\b(0b[01]+)";this.RSR="!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"</",c:[hljs.CLCM,hljs.CBLCLM,hljs.QSM,{cN:"string",b:"'\\\\?.",e:"'",i:"."},{cN:"number",b:"\\b(\\d+(\\.\\d*)?|\\.\\d+)(u|U|l|L|ul|UL|f|F)"},hljs.CNM,{cN:"preprocessor",b:"#",e:"$"},{cN:"stl_container",b:"\\b(deque|list|queue|stack|vector|map|set|bitset|multiset|multimap|unordered_map|unordered_set|unordered_multiset|unordered_multimap|array)\\s*<",e:">",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"<\\-(?!\\s*\\d)",e:hljs.IMMEDIATE_RE,r:2},{cN:"operator",b:"\\->|<\\-",e:hljs.IMMEDIATE_RE,r:1},{cN:"operator",b:"%%|~",e:hljs.IMMEDIATE_RE},{cN:"operator",b:">=|<=|==|!=|\\|\\||&&|=|\\+|\\-|\\*|/|\\^|>|<|!|&|\\||\\$|:",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"%",e:"%",i:"\\n",r:1},{cN:"identifier",b:"`",e:"`",r:0},{cN:"string",b:'"',e:'"',c:[hljs.BE],r:0},{cN:"string",b:"'",e:"'",c:[hljs.BE],r:0},{cN:"paren",b:"[[({\\])}]",e:hljs.IMMEDIATE_RE,r:0}]}};
hljs.initHighlightingOnLoad();
</script>
<!-- MathJax scripts -->
<script type="text/javascript" src="https://c328740.ssl.cf1.rackcdn.com/mathjax/2.0-latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
</script>
</head>
<body>
<h1>Confidence Intervals Homework</h1>
<h3>NAMES: Eitan Lees, Susannah Hogue, Tyler Davis — Partial Solution</h3>
<p><em>This is entirely our own work except as noted at the end of the document.</em></p>
<p><strong>Due No Later than 5pm, November 15, 2012</strong></p>
<pre><code>[1] "2012-11-15 13:51:50 EST"
</code></pre>
<p><strong>Prob1</strong> - Import the data set <code>Spruce</code> into <code>R</code>.</p>
<pre><code class="r">Spruce <- importData("Spruce")
head(Spruce)
</code></pre>
<pre><code> Tree Competition Fertilizer Height0 Height5 Diameter0 Diameter5
1 1 NC F 15.0 60.0 1.984 7.4
2 2 NC F 9.0 45.2 1.191 5.2
3 3 NC F 12.0 42.0 1.786 5.7
4 4 NC F 13.7 49.5 1.587 6.4
5 5 NC F 12.0 47.3 1.587 6.2
6 6 NC F 12.0 56.4 1.587 7.4
Ht.change Di.change
1 45.0 5.416
2 36.2 4.009
3 30.0 3.914
4 35.8 4.812
5 35.3 4.613
6 44.4 5.812
</code></pre>
<ul>
<li>Create exploratory plots to check the distribution of the variable <code>Ht.change</code>.</li>
</ul>
<pre><code class="r">EDA.hist(Spruce$Ht.change)
EDA.qq(Spruce$Ht.change)
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-3"/> <img src="" alt="plot of chunk unnamed-chunk-3"/> </p>
<p>The data has an approximately normal distribution.</p>
<ul>
<li>Find a 95% \( t \) confidence interval for the mean height change over the 5-year period of the study and give a sentence interpreting your interval.</li>
</ul>
<blockquote>
<p>SOLUTION:</p>
</blockquote>
<pre><code class="r">spruce_t <- t.test(Spruce$Ht.change)
</code></pre>
<p>We are 95% confident that the change in height for a spruce tree in a 5 year period is within (<code>28.3368</code>, <code>33.5298</code>).</p>
<ul>
<li>Create exploratory plots to compare the distributions of the variable <code>Ht.change</code> for the seedlings in the fertilized and nonfertilized plots.</li>
</ul>
<p><strong>With Fertilizer</strong></p>
<pre><code class="r">EDA.hist(Spruce$Ht.change[Spruce$Fertilizer == "F"])
EDA.qq(Spruce$Ht.change[Spruce$Fertilizer == "F"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-5"/> <img src="" alt="plot of chunk unnamed-chunk-5"/> </p>
<p><strong>Without Fertilizer</strong></p>
<pre><code class="r">EDA.hist(Spruce$Ht.change[Spruce$Fertilizer != "F"])
EDA.qq(Spruce$Ht.change[Spruce$Fertilizer != "F"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-6"/> <img src="" alt="plot of chunk unnamed-chunk-6"/> </p>
<ul>
<li>Find the 95% one-sided lower \( t \) confidence bound for the difference in mean heights (\( \mu_F - \mu_{NF} \)) over the 5-year period of the study and give a sentence interpreting your interval.</li>
</ul>
<blockquote>
<p>SOLUTION:</p>
</blockquote>
<pre><code class="r">spruce_t2 <- t.test(Spruce$Ht.change ~ Spruce$Fertilizer, alternative = "greater")
</code></pre>
<p>We are 95% confident that the difference in heights between spruce trees that were fertilized and those that were not during a 5 year period is at least <code>11.4666</code>.</p>
<p><strong>Prob2</strong> - Consider the data set <code>Girls2004</code> with birth weights of baby girls born in Wyoming or Alaska.</p>
<ul>
<li>Create exploratory plots and compare the distribution of weight between the babies born in the two states.</li>
</ul>
<pre><code class="r">Girls <- importData("Girls2004")
head(Girls)
</code></pre>
<pre><code> ID State MothersAge Smoker Weight Gestation
1 1 WY 15-19 No 3085 40
2 2 WY 35-39 No 3515 39
3 3 WY 25-29 No 3775 40
4 4 WY 20-24 No 3265 39
5 5 WY 25-29 No 2970 40
6 6 WY 20-24 No 2850 38
</code></pre>
<blockquote>
<p>SOLUTION:</p>
</blockquote>
<p><strong>Wyoming</strong></p>
<pre><code class="r">EDA.hist(Girls$Weight[Girls$State == "WY"])
EDA.qq(Girls$Weight[Girls$State == "WY"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-9"/> <img src="" alt="plot of chunk unnamed-chunk-9"/> </p>
<p><strong>Alaska</strong></p>
<pre><code class="r">EDA.hist(Girls$Weight[Girls$State == "AK"])
EDA.qq(Girls$Weight[Girls$State == "AK"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-10"/> <img src="" alt="plot of chunk unnamed-chunk-10"/> </p>
<ul>
<li>Find a 95% \( t \) confidence interval for the mean difference in weights for girls born in these two states. Give a sentence interpreting this interval.</li>
</ul>
<blockquote>
<p>SOLUTION:</p>
</blockquote>
<pre><code class="r">girls_t <- t.test(Girls$Weight[Girls$State == "AK"], Girls$Weight[Girls$State ==
"WY"])
</code></pre>
<p>We are 95% confident that the difference in weights between girls born in Wyoming and girls born in Alaska is within (<code>83.294</code>, <code>533.606</code>).</p>
<ul>
<li>Create exploratory plots and compare the distribution of weights between babies born to nosmkokers and babies born to smokers.</li>
</ul>
<p><strong>Smokers</strong></p>
<pre><code class="r">EDA.hist(Girls$Weight[Girls$Smoker == "Yes"])
EDA.qq(Girls$Weight[Girls$Smoker == "Yes"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-12"/> <img src="" alt="plot of chunk unnamed-chunk-12"/> </p>
<p><strong>Non-Smokers</strong></p>
<pre><code class="r">EDA.hist(Girls$Weight[Girls$Smoker == "No"])
EDA.qq(Girls$Weight[Girls$Smoker == "No"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-13"/> <img src="" alt="plot of chunk unnamed-chunk-13"/> </p>
<ul>
<li>Find a 95% \( t \) confidence interval for the difference in mean weights between babies born to nonsmokers and smokers. Give a sentence interpreting this interval.</li>
</ul>
<blockquote>
<p>SOLUTION:</p>
</blockquote>
<pre><code class="r">girls_t2 <- t.test(Girls$Weight[Girls$Smoker == "Yes"], Girls$Weight[Girls$Smoker ==
"No"])
</code></pre>
<p>We are 95% confident that the difference in weights between girls born to smokers and girls born to non-smokers is within (<code>-617.9197</code>, <code>44.033</code>).</p>
<p><strong>Prob3</strong> - Import the <code>FlightDelays</code> data set into <code>R</code>. Although the data represent all flights for United Airlines and American Airlines in May and June 2009, assume for this exercise that these flights are a sample from all flights flown by the two airlines under similar conditions. We will compare the lengths of flight delays betwen the two airlines.</p>
<pre><code class="r">Flight <- importData("FlightDelays")
head(Flight)
</code></pre>
<pre><code> ID Carrier FlightNo Destination DepartTime Day Month FlightLength Delay
1 1 UA 403 DEN 4-8am Fri May 281 -1
2 2 UA 405 DEN 8-Noon Fri May 277 102
3 3 UA 409 DEN 4-8pm Fri May 279 4
4 4 UA 511 ORD 8-Noon Fri May 158 -2
5 5 UA 667 ORD 4-8am Fri May 143 -3
6 6 UA 669 ORD 4-8am Fri May 150 0
Delayed30
1 No
2 Yes
3 No
4 No
5 No
6 No
</code></pre>
<ul>
<li>Create exploratory plots of the lengths of delays for the two airlines.</li>
</ul>
<p><strong>United Airlines</strong></p>
<pre><code class="r">EDA.hist(Flight$Delay[Flight$Carrier == "UA"])
EDA.qq(Flight$Delay[Flight$Carrier == "UA"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-16"/> <img src="" alt="plot of chunk unnamed-chunk-16"/> </p>
<p><strong>American Airlines</strong></p>
<pre><code class="r">EDA.hist(Flight$Delay[Flight$Carrier == "AA"])
EDA.qq(Flight$Delay[Flight$Carrier == "AA"])
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-17"/> <img src="" alt="plot of chunk unnamed-chunk-17"/> </p>
<ul>
<li>Find a 95% \( t \) confidence interval for the difference in mean flight delays between the two airlines and interpret this interval.</li>
</ul>
<blockquote>
<p>SOLUTION: </p>
</blockquote>
<pre><code class="r">flight_t <- t.test(Flight$Delay[Flight$Carrier == "UA"], Flight$Delay[Flight$Carrier ==
"AA"])
</code></pre>
<p>We are 95% confident that the difference in delay times between United Airlines and American Airlines is within (<code>2.8682</code>, <code>8.9032</code>).</p>
<p><strong>Prob4</strong> - Run a simulation to see if the \( t \) ratio \( T = (\bar{X} -\mu)/(S/\sqrt{n}) \) has a \( t \) distribution or even an approximate \( t \) distirubiton when the samples are drawn from a nonnormal distribution. Be sure to superimpose the appropriate \( t \) density curve over the density of your simulated \( T \). Try two different nonnormal distributions \( \left( Unif(a = 0, b = 1), Exp(\lambda = 1) \right) \) and remember to see if sample size makes a difference (use \( n = 15 \) and \( n=500 \)).</p>
<pre><code class="r">########################## HELP!!!! ################################
n <- 15
mu <- 0
N <- rexp(n)
# Tb <- mean(N)-mu/sqrt(n)
curve(dt(x, n - 1), -5, 5)
curve(dexp(x, 1), add = T)
########################## HELP!!!! ################################
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-19"/> </p>
<p><strong>Prob5</strong> - One question is the 2002 General Social Survey asked participants whom they voted for in the 2000 election. Of the 980 women who voted, 459 voted for Bush. Of the 759 men who voted, 426 voted for Bush.</p>
<ul>
<li>Find a 95% confidence interval for the proportion of women who voted for Bush.</li>
</ul>
<blockquote>
<p>SOLUTION: We are 95% confident that the proportion of women who voted for Bush from the 2002 General Social Survey is within (<code>0.4368</code>, <code>0.5002</code>)</p>
</blockquote>
<ul>
<li>Find a 95% confidence interval for the proportion of men who voted for Bush. Do the intervals for the men and women overlap? What, if anything, can you conclude about gender difference in voter preference?</li>
</ul>
<blockquote>
<p>SOLUTION: We are 95% confident that the proportion of men who voted for Bush from the 2002 General Social Survey is is within (<code>0.5251</code>, <code>0.5968</code>). The intervals for the proportion of men who voted for Bush and the proportion of women who voted for Bush do not overlap but since we tested the proportion of means seperately, we cannot conclude anything about their apparent difference. To be able to conclude something, we would need to test a difference of proportions explicitly.</p>
</blockquote>
<p><strong>Prob6</strong> - A retail store wishes to conduct a marketing survey of its customers to see if customers would favor longer store hours. How many people should be in their sample if the marketers want their margin of error to be at most 3% with 95% confidence, assuming</p>
<ul>
<li>they have no preconceived idea of how customers will respond, and</li>
</ul>
<pre><code class="r">error <- 0.03
p_sqiggle <- 0.5
sample_size <- ceiling((p_sqiggle * (1 - p_sqiggle))/(error/1.96)^2)
</code></pre>
<blockquote>
<p>SOLUTION: To provide a confidenced level of 95% and a margin of error of a maximum 3%, the marketers should get responses from a minimum <code>1068</code> people.</p>
</blockquote>
<ul>
<li>a previous survey indicated that about 65% of customers favor longer store hours.</li>
</ul>
<pre><code class="r">p_sqiggle <- 0.65
sample_size <- ceiling((p_sqiggle * (1 - p_sqiggle))/(error/1.96)^2)
</code></pre>
<blockquote>
<p>SOLUTION: Given the knowledge of the survey where 65% of respondants favored longer hours, the store should use a sample size of <code>972</code> people.</p>
</blockquote>
<p><strong>Prob7</strong> - Suppose researchers wish to study the effectiveness of a new drug to alleviate hives due to math anxiety. Seven hundred math students are randomly assigned to take either this drug or a placebo. Suppose 34 of the 350 students who took the drug break out in hives compared to 56 of the 350 students who took the placebo.</p>
<ul>
<li>Compute a 95% confidence interval for the proportion of students taking the drug who break out in hives.</li>
</ul>
<pre><code class="r">test <- prop.test(34, 350)
</code></pre>
<blockquote>
<p>SOLUTION: We are 95% confident that the proportion of students taking the drug who break out in hives is within (<code>0.0691</code>, <code>0.1343</code>)</p>
</blockquote>
<ul>
<li>Compute a 95% confidence interval for the proportion of students taking the placebo who break out in hives.</li>
</ul>
<pre><code class="r">test <- prop.test(56, 350)
</code></pre>
<blockquote>
<p>SOLUTION: We are 95% confident that the proportion of students taking the placebo who break out in hives is within (<code>0.124</code>, <code>0.2036</code>)</p>
</blockquote>
<ul>
<li>Do the intervals overlap? What, if anything, can you conclude about the effectiveness of the drug?</li>
</ul>
<blockquote>
<p>SOLUTION: Despite the intervals overlapping, we cannot conclude anything.</p>
</blockquote>
<ul>
<li>Compute 95% confidence interval for the difference in proportions of students who break out in hives by using or not using this drug and give a sentence interpreting this interval.</li>
</ul>
<pre><code class="r">x <- c(34, 56)
n <- c(350, 350)
test <- prop.test(x, n)
</code></pre>
<blockquote>
<p>SOLUTION: We are 95% confident that the difference in the proportion of students taking the placebo who break out in hives to the proportion ofstudents taking the placebo who break out in hives is within (<code>-0.1151</code>, <code>-0.0106</code>). Since zero is not included in the interval we have sufficient statistical evidence to reject the null hypothesis that the drug and the placebo will effect the student with equal probability.</p>
</blockquote>
<p><strong>Prob8</strong> - An article in the March 2003 <em>New England Journal of Medicine</em> describes a study to see if aspirin is effective in reducing the incidence of colorectal adenomas, a precursor to most colorectal cancers (Sandler et al. (2003)). Of 517 patients in the study, 259 were randomly assigned to receive aspirin and the remaining 258 received a placebo. One or more adenomas were found in 44 of the aspirin group and 70 in the placebo group. Find a 95% one-sided upper bound for the difference in proportions \( (p_A - p_P) \) and interpret your interval.</p>
<pre><code class="r">x <- c(44, 70)
n <- c(259, 258)
test <- prop.test(x, n, alternative = "less")
</code></pre>
<blockquote>
<p>SOLUTION: We are 95% confident that the difference in adenomas found in the aspirin group and the placebo group is less than or equal to <code>-0.038</code>.</p>
</blockquote>
<p><strong>Prob9</strong> - The data set <code>Bangladesh</code> has measurements on water quality from 271 wells in Bangladsesh. There are two missing values in the chlorine variable. Use the following <code>R</code> code to remove these two observations.</p>
<p><code>> chlorine <- with(Bangladesh, Chlorine[!is.na(Chlorine)])</code></p>
<pre><code class="r">Banga <- importData("Bangladesh")
chlorine <- with(Banga, Chlorine[!is.na(Chlorine)])
</code></pre>
<ul>
<li>Compute the numeric summaries of the cholorine levels and create a plot and comment on the distribution.</li>
</ul>
<pre><code class="r">EDA.hist(chlorine)
EDA.qq(chlorine)
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-27"/> <img src="" alt="plot of chunk unnamed-chunk-27"/> </p>
<blockquote>
<p>SOLUTION: The average cholorine level from 271 wells in Bangladsesh is <code>78.084</code> and has a standard deviation of <code>210.0192</code>. The distribution is skewed to the right, and appears to be exponential. </p>
</blockquote>
<ul>
<li>Find a 95% \( t \) confidence interval for the mean \( \mu \) of chlorine levels in Bangladesh wells.</li>
</ul>
<pre><code class="r">test <- t.test(chlorine)
</code></pre>
<blockquote>
<p>SOLUTION: We are 95% confident that the mean \( \mu \) of chlorine levels in Bangladesh wells is within (<code>52.8726</code>, <code>103.2954</code>).</p>
</blockquote>
<ul>
<li>Find a 95% bootstrap percentile and bootstrap \( t \) confidence intervals for the mean chlorine level and compare results. Which confidence interval will you report?</li>
</ul>
<pre><code class="r">test <- tboot(chlorine)
test
</code></pre>
<pre><code> boot.conf percentile.conf
97.5% 56.43 54.66
2.5% 112.28 105.39
</code></pre>
<blockquote>
<p>SOLUTION: We would report the bootstrap confidence interval (<code>56.4291, 112.278</code>) because it accounts for skewness.</p>
</blockquote>
<ul>
<li><em>Johnson's \( t \) confidence interval</em> adjusts for skewness by shifting endpoints right or left for positive or negative skewness, respectively. The interval is \( \bar{X} + \hat{\kappa_3}/(6\sqrt{n})(1 + 2q^2) \pm q(S/\sqrt{n}) \), where \( \hat{\kappa_3} \) is a sample estimate of the population skewness \( E(X - \mu)/\sigma^3 \) and \( q \) denotes the \( 1 - \alpha/2 \) quantile for a \( t \) distribution with \( n-1 \) degrees of freedom. Calculate Johnson's \( t \) interval for the arsenic data (in <code>Bangladesh</code>) and compare with the formula \( t \) and bootstrap \( t \) intervals.</li>
</ul>
<blockquote>
<p>SOLUTION:</p>
</blockquote>
<pre><code class="r">skew <- skewness(Banga$Arsenic, na.rm = T)
test <- tboot(Banga$Arsenic)
</code></pre>
<p><strong>Prob10</strong> - The data set <code>MnGroundwater</code> has measurements on water quality of 895 randomly selected wells in Minnesota.</p>
<pre><code class="r">Water <- importData("MnGroundwater")
head(Water)
</code></pre>
<pre><code> County Aquifer.Group Water.Level Alkalinity Aluminum Arsenic
1 Aitkin surficial Quaternary 55 137000 0.059 1.810
2 Aitkin buried Quaternary 30 214000 2.380 0.059
3 Aitkin buried Quaternary 20 120000 0.410 1.440
4 Aitkin buried Quaternary 3 283000 158.190 6.340
5 Aitkin buried Quaternary 0 236000 0.059 10.170
6 Aitkin buried Quaternary 30 229000 0.059 6.900
Chloride Lead pH Basin.Name
1 490 0.17 7.1 Upper Mississippi River
2 89250 0.18 7.6 Upper Mississippi River
3 300 0.52 6.9 Upper Mississippi River
4 780 1.15 8.2 Upper Mississippi River
5 5090 0.02 7.9 Upper Mississippi River
6 2590 0.12 7.8 Upper Mississippi River
</code></pre>
<ul>
<li>Create a histogram, a density, and a normal quantile plot of the alkalinity and comment on the distribution.</li>
</ul>
<pre><code class="r">EDA.hist(Water$Alkalinity)
EDA.qq(Water$Alkalinity)
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-32"/> <img src="" alt="plot of chunk unnamed-chunk-32"/> </p>
<blockquote>
<p>SOLUTION: The distribution of Alkalinity across groundwater wells in Minnesota is approximately normal with a mean of <code>2.9068 &times; 10<sup>5</sup></code> and a standard deviation of <code>1.0833 &times; 10<sup>5</sup></code>.</p>
</blockquote>
<ul>
<li>Find the 95% \( t \) confidence interval for the mean \( \mu \) of alkalinity levels in Minnesota wells.</li>
</ul>
<pre><code class="r">test <- t.test(Water$Alkalinity)
</code></pre>
<blockquote>
<p>SOLUTION: We are 95% confident that the mean \( \mu \) of alkalinity levels in Minnesota wells is within (<code>2.8358 &times; 10<sup>5</sup></code>, <code>2.9779 &times; 10<sup>5</sup></code>).</p>
</blockquote>
<ul>
<li>Find the 95% bootstrap percentile and bootstrap \( t \) confidence intervals for the mean alkalinity level and compare the results. Which confidence interval will you report?</li>
</ul>
<blockquote>
<p>SOLUTION:</p>
</blockquote>
<p><strong>Prob11</strong> Consider the babies born in Texas in 2004 (<code>TXBirths2004</code>). We will compare the weights of babies born to nonsmokers and smokers.</p>
<pre><code class="r">Texas <- importData("TXBirths2004")
head(Texas)
</code></pre>
<pre><code> ID MothersAge Smoker Gender Weight Gestation Number Multiple
1 1 20-24 No Male 3033 39 1 No
2 2 20-24 No Male 3232 40 1 No
3 3 25-29 No Female 3317 37 1 No
4 4 25-29 No Female 2560 36 1 No
5 5 15-19 No Female 2126 37 1 No
6 6 30-34 No Female 2948 38 1 No
</code></pre>
<ul>
<li>How many nonsmokers and smokers are there in this data set?</li>
</ul>
<pre><code class="r">smokers <- sum(Texas$Smoker == "Yes")
nonsmokers <- sum(Texas$Smoker == "No")
</code></pre>
<blockquote>
<p>SOLUTION: In this data set of <code>1587</code> texans, there are <code>90</code> smokers and <code>1497</code> non-smokers.</p>
</blockquote>
<ul>
<li>Create exploratory plots of the weights for the two groups and comment on the distributions.</li>
</ul>
<pre><code class="r">ggplot(data = Texas, aes(x = Weight)) + geom_histogram() + facet_grid(Smoker ~
.)
</code></pre>
<pre><code>stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
</code></pre>
<pre><code>stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
</code></pre>
<pre><code class="r">ggplot(data = Texas, aes(sample = Weight)) + stat_qq() + facet_grid(Smoker ~
.)
smokersEDA <- c(mean(Texas$Weight[Texas$Smoker == "Yes"]), sd(Texas$Weight[Texas$Smoker ==
"Yes"]))
nonsmokersEDA <- c(mean(Texas$Weight[Texas$Smoker == "No"]), sd(Texas$Weight[Texas$Smoker ==
"No"]))
</code></pre>
<p><img src="" alt="plot of chunk unnamed-chunk-36"/> <img src="" alt="plot of chunk unnamed-chunk-36"/> </p>
<blockquote>
<p>SOLUTION: The distribution of smokers is approximately normal with a mean of <code>3205.9889</code> and a standard-deviation of <code>504.2439</code>. The distribution of weights among non-smokers is a little bit skewed to the left, with a mean of <code>3287.4937</code> and a standard-deviation of <code>554.4829</code>.</p>
</blockquote>
<ul>
<li>Compute the 95% confidence interval for the difference in means using the formula \( t \), bootstrap percentile, and bootstrap \( t \) methods and compare your results. Which interval would you report?</li>
</ul>
<pre><code class="r">Weight.Smokers <- subset(Texas, select = Weight, Smoker == "Yes", drop = TRUE)
Weight.Non <- subset(Texas, select = Weight, Smoker == "No", drop = TRUE)
thetahat <- mean(Weight.Smokers) - mean(Weight.Non)
nx <- length(Weight.Smokers)
ny <- length(Weight.Non)
SE <- sqrt(var(Weight.Smokers)/nx + var(Weight.Non)/ny)
N <- 10^4
Tstar <- numeric(N)
DM <- numeric(N)
set.seed(1)
for (i in 1:N) {
bootx <- sample(Weight.Smokers, nx, replace = TRUE)
booty <- sample(Weight.Non, ny, replace = TRUE)
Tstar[i] <- (mean(bootx) - mean(booty) - thetahat)/sqrt(var(bootx)/nx +
var(booty)/ny)
DM[i] <- mean(bootx) - mean(booty)
}
CItboot <- thetahat - quantile(Tstar, c(0.975, 0.025)) * SE
names(CItboot) <- NULL
CItboot
</code></pre>
<pre><code>[1] -189.4 28.8
</code></pre>
<pre><code class="r">CIperct <- quantile(DM, c(0.025, 0.975))
CIperct
</code></pre>
<pre><code> 2.5% 97.5%
-188.63 26.02
</code></pre>
<pre><code class="r">t.test(Weight.Smokers, Weight.Non)$conf
</code></pre>
<pre><code>[1] -190.69 27.68
attr(,"conf.level")
[1] 0.95
</code></pre>
<blockquote>
<p>SOLUTION: The 95% bootstrap \( t \) interval for the difference in means is (<code>-189.4169</code>, <code>28.8041</code>). For comparison, the formula \( t \) interval is (<code>-190.6915</code>, <code>27.682</code>) and the bootstrap percentile interval is (<code>-188.6289</code>, <code>26.0167</code>).</p>
</blockquote>
<ul>
<li>Modify your result from the previous question to obtain a one-sided 95% \( t \) confidence interval (hypothesizing that babies born to nonsmokers weigh more than babies born to smokers).</li>
</ul>
<pre><code class="r">CItboot <- thetahat - quantile(Tstar, 0.975) * SE
names(CItboot) <- NULL
CItboot
</code></pre>
<pre><code>[1] -189.4
</code></pre>
<pre><code class="r">test <- t.test(Weight.Smokers, Weight.Non, alternative = "g")
</code></pre>
<blockquote>
<p>SOLUTION: Using a one sided test we find that the difference in weights of Texan babies is at least <code>-189.4169</code>. This is as compared to the formula \( t \) interval lower limit of <code>-172.8809</code>.</p>
</blockquote>
</body>
</html>