-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path0_rpqa_massage.html
executable file
·752 lines (690 loc) · 79.1 KB
/
0_rpqa_massage.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />
<title>Québec Data Wrangling</title>
<script src="library/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="library/bootstrap-3.3.5/css/paper.min.css" rel="stylesheet" />
<script src="library/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="library/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="library/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="library/navigation-1.1/tabsets.js"></script>
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal */
code > span.bn { color: #40a070; } /* BaseN */
code > span.fl { color: #40a070; } /* Float */
code > span.ch { color: #4070a0; } /* Char */
code > span.st { color: #4070a0; } /* String */
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* Other */
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
code > span.fu { color: #06287e; } /* Function */
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #880000; } /* Constant */
code > span.sc { color: #4070a0; } /* SpecialChar */
code > span.vs { color: #4070a0; } /* VerbatimString */
code > span.ss { color: #bb6688; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #19177c; } /* Variable */
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code > span.op { color: #666666; } /* Operator */
code > span.bu { } /* BuiltIn */
code > span.ex { } /* Extension */
code > span.pp { color: #bc7a00; } /* Preprocessor */
code > span.at { color: #7d9029; } /* Attribute */
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
</style>
<style type="text/css">
pre:not([class]) {
background-color: white;
}
</style>
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
code {
color: inherit;
background-color: rgba(0, 0, 0, 0.04);
}
img {
max-width:100%;
height: auto;
}
</style>
<script src="library/auto_tab_first_section.js"></script>
</head>
<body>
<div style="width:100%;height:200px;background-image:url('library/header/rpqa.jpg');background-size:cover;"></div>
<div class="container-fluid main-container"><div class="row"><div class="col-md-4"><a href="index.html"><i class="glyphicon glyphicon-th-list"></i> Back to index</a></div></div></div>
<div class="container-fluid main-container">
<div id="rpqa-data-wrangling" class="section level1 tab-content">
<h1>RPQA Data wrangling</h1>
<div id="description-of-data" class="section level2">
<h2>description of data</h2>
<p>Data from the « Registre de la Population du Québec Ancien » of the Programme de Recherche en Démographie Historique, Demography Department, Université de Montréal</p>
<p>January 2012</p>
<p>The data is «Family reconstitution data» based on the systematic transcription of names, place and dates of catholic Baptisms, Marriages and Burials registered in Quebec for the period 1621-1799. Data also include death information of the 1800-1850 for people born before 1750. There are two distinct numerotations: one for couples, one for individuals.</p>
<p>The data is presented in two files:</p>
<p>1- A couple’s file, giving for each couple</p>
<ul>
<li>its id number and the id numbers of the spouses</li>
<li>a date followed by a one digit code (1= marriage date; 2 = marriage contract date, used to replace missing marriage date, contracts being normally signed a few days before the actual marriage; 4 = first mention of the couple in the documents (baptism of a child, remarriage of a spouse, etc.); the couple can have married outside Quebec and migrated in, or it can be a couple formed in Quebec for which the marriage was lost and no contract found to replace)</li>
<li>a place: parish were the marriage was celebrated, “000” if a marriage contract (meaning somewhere in Quebec), or a place outside Quebec or no place at all)</li>
</ul>
<p>2- A file of individuals, giving for each:</p>
<ul>
<li>Id number and id numbers of parents</li>
<li>A code of one’s ethnic origin (for certain immigrants: 911 = German, 795 = born in USA, 901 = British, 794 = “English” (American or British)</li>
<li>A flag identifying Indians</li>
<li>A flag identifying Immigrants (= anyone born outside Quebec; mostly born in France but could be born out West (fur trading country) from Quebec parents…</li>
<li>A flag identifying those having died outside Quebec (out-migrants)</li>
<li>Date of birth, followed by a one digit code (1 = date of birth; 2 = date of baptism, can be used as equivalent to date of birth), = 3 or 4: child died soon after birth, before having received formal baptism; = 5 or 6: approximate year of birth, usually from an age declaration; = 7 missing info</li>
<li>Place of birth: usually parish of baptism</li>
<li>Date of death, followed by a one digit code (1 = date of death; 2 = date of burial; 3 or 4 = date found from another source; 5 = missing info)</li>
<li>Place of death: normally: parish of burial</li>
<li>Flag identifying among those who married those who could sign (= 1) and those who could not (= 2)</li>
</ul>
<p>For all place codes: numbers from 0000 to 6909 correspond to Quebec parishes. 3901 = Montreal; 4501 = Quebec city; 6001 = Trois-Rivieres. They are the only urban areas, with Trois-Rivieres being more a village than a town. The Administration and the merchants and such lived either in Montreal or Quebec. All other parishes are rural. 5 digit codes (10011…) correspond to places in France. 7801 to 789 to areas out West (“Pays-d’en-Haut”, outposts for military reasons and fur trade).</p>
<div id="loading-details" class="section level3 accordion">
<h3>Loading details</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">source</span>(<span class="st">"0__helpers.R"</span>)
opts_chunk$<span class="kw">set</span>(<span class="dt">cache=</span><span class="ot">FALSE</span>,<span class="dt">tidy=</span><span class="ot">FALSE</span>,<span class="dt">autodep=</span><span class="ot">TRUE</span>,<span class="dt">fig.width=</span><span class="dv">12</span>,<span class="dt">fig.height=</span><span class="fl">7.5</span>)
<span class="co"># rpqa_fam</span>
<span class="co"># gebkk 1, 2 gebfk, gebmk. 1 datum genau bekannt, 2 = 15. juni eines jahrs, jahrgenau</span>
<span class="co"># rpqa_unions # alle ehen</span>
<span class="co"># unbekanntes schicksal = Nbirth - infantD/1y - childD/1-15y - adultD/15+</span>
<span class="co">#eheF == 1 und eheM == 1 erstehen</span>
<span class="co"># auswahl_id_f == alle ehen der frau, vv</span>
<span class="co"># ehebekannt: kennen anfang und ende der ehe, bei ehebekannt==1 mehr geburten pro ehe</span>
<span class="co"># 1670-1750</span></code></pre></div>
</div>
</div>
<div id="transforming-data" class="section level2">
<h2>Transforming data</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa.individuals =<span class="st"> </span>foreign::<span class="kw">read.dta</span>(<span class="st">"data/RPQA_29Oct2014/RPQA-family.dta"</span>)
rpqa.unions =<span class="st"> </span>foreign::<span class="kw">read.dta</span>(<span class="st">"data/RPQA_29Oct2014/RPQA-union4.dta"</span>)
rpqa.unions =<span class="st"> </span>rpqa.unions %>%<span class="st"> </span><span class="kw">select</span>(id, id_m, id_f, dat, dateMannee, dateMmois, dateMjour, qualiteDateM, codeLieuM, ehebekannt)
rpqa.unions$idParents =<span class="st"> </span><span class="kw">str_c</span>(rpqa.unions$id_m, <span class="st">'_'</span>, rpqa.unions$id_f)
rpqa.unions$idParents[<span class="kw">which</span>(rpqa.unions$idParents==<span class="st">"NA_NA"</span>)] =<span class="st"> </span><span class="ot">NA</span>
rpqa.individuals$idParents =<span class="st"> </span><span class="kw">str_c</span>(rpqa.individuals$idPere, <span class="st">'_'</span>, rpqa.individuals$idMere)
rpqa.individuals$idParents[<span class="kw">which</span>(rpqa.individuals$idParents==<span class="st">"NA_NA"</span>)] =<span class="st"> </span><span class="ot">NA</span>
rpqa =<span class="st"> </span>rpqa.individuals
rpqa$immigrant =<span class="st"> </span><span class="kw">factor</span>(rpqa$immigrant,<span class="dt">levels=</span><span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>),<span class="dt">labels=</span><span class="kw">c</span>(<span class="st">'NO'</span>,<span class="st">''</span>))
rpqa$emigrant =<span class="st"> </span><span class="kw">factor</span>(rpqa$emigrant,<span class="dt">levels=</span><span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>),<span class="dt">labels=</span><span class="kw">c</span>(<span class="st">'NO'</span>,<span class="st">''</span>))
rpqa$amerindien =<span class="st"> </span><span class="kw">factor</span>(rpqa$amerindien,<span class="dt">levels=</span><span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>),<span class="dt">labels=</span><span class="kw">c</span>(<span class="st">'NO'</span>,<span class="st">''</span>))
rpqa$sexe =<span class="st"> </span>car::<span class="kw">Recode</span>(rpqa$sexe,<span class="st">"'M'='m';'x'=NA"</span>)
rpqa$male =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$sexe ==<span class="st"> 'm'</span>,<span class="dv">1</span>,<span class="dv">0</span>)
rpqa$bdate =<span class="st"> </span><span class="kw">as.Date</span>(rpqa$gebk)
rpqa$byear =<span class="st"> </span><span class="kw">year</span>(rpqa$bdate)
rpqa$ddate =<span class="st"> </span><span class="kw">as.Date</span>(rpqa$todk)
rpqa$dyear =<span class="st"> </span><span class="kw">year</span>(rpqa$ddate)
rpqa$bdate.Father =<span class="st"> </span><span class="kw">as.Date</span>(rpqa$gebm)
rpqa$byear.Father =<span class="st"> </span><span class="kw">year</span>(rpqa$bdate.Father)
rpqa$ddate.Father =<span class="st"> </span><span class="kw">as.Date</span>(rpqa$todm)
rpqa$dyear.Father =<span class="st"> </span><span class="kw">year</span>(rpqa$ddate.Father)
rpqa$bdate.Mother =<span class="st"> </span><span class="kw">as.Date</span>(rpqa$gebf)
rpqa$byear.Mother =<span class="st"> </span><span class="kw">year</span>(rpqa$bdate.Mother)
rpqa$ddate.Mother =<span class="st"> </span><span class="kw">as.Date</span>(rpqa$todf)
rpqa$dyear.Mother =<span class="st"> </span><span class="kw">year</span>(rpqa$ddate.Mother)
rpqa$age.days =<span class="st"> </span><span class="kw">as.numeric</span>(rpqa$ddate -<span class="st"> </span>rpqa$bdate)
rpqa$age.days =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age.days <<span class="st"> </span><span class="dv">0</span>, <span class="dv">0</span>, rpqa$age.days)
<span class="co"># head(data.frame(as.numeric(rpqa$ddate - rpqa$bdate)/365, rpqa$ddate, rpqa$bdate))</span>
rpqa$age.days.Father =<span class="st"> </span><span class="kw">as.numeric</span>(rpqa$ddate.Father -<span class="st"> </span>rpqa$bdate.Father)
rpqa$age.days.Mother =<span class="st"> </span><span class="kw">as.numeric</span>(rpqa$ddate.Mother -<span class="st"> </span>rpqa$bdate.Mother)
rpqa$age =<span class="st"> </span>rpqa$age.days /<span class="st"> </span><span class="dv">365</span>/<span class="dv">10</span>
<span class="kw">qplot</span>(rpqa$age *<span class="st"> </span><span class="dv">10</span>)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/transform.data-1.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$age *<span class="st"> </span><span class="dv">10</span>) +<span class="st"> </span><span class="kw">xlim</span>(<span class="dv">1</span>,<span class="ot">NA</span>)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/transform.data-2.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># qplot(rpqa.individuals$age.days)</span>
rpqa$paternalage =<span class="st"> </span><span class="kw">as.numeric</span>(rpqa$bdate -<span class="st"> </span>rpqa$bdate.Father)/<span class="dv">365</span>/<span class="dv">10</span>
<span class="kw">qplot</span>(rpqa$paternalage *<span class="dv">10</span>)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/transform.data-3.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">sort</span>(rpqa$paternalage *<span class="dv">10</span>),<span class="dv">20</span>)</code></pre></div>
<pre><code>## [1] 14.84 15.05 15.47 15.90 15.92 15.92 16.01 16.01 16.13 16.18 16.30
## [12] 16.33 16.35 16.45 16.45 16.52 16.57 16.60 16.60 16.61</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">tail</span>(<span class="kw">sort</span>(rpqa$paternalage *<span class="dv">10</span>),<span class="dv">20</span>)</code></pre></div>
<pre><code>## [1] 78.92 79.98 80.00 80.00 80.76 80.87 81.08 81.18 81.20 81.76 81.91
## [12] 81.99 82.00 82.69 83.04 83.43 83.46 84.79 85.20 85.44</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$maternalage =<span class="st"> </span><span class="kw">as.numeric</span>(rpqa$bdate -<span class="st"> </span>rpqa$bdate.Mother)/<span class="dv">365</span>/<span class="dv">10</span>
<span class="kw">qplot</span>(rpqa$maternalage *<span class="dv">10</span>)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/transform.data-4.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">table</span>(rpqa$age.Mother <<span class="st"> </span>(rpqa$maternalage -<span class="st"> </span><span class="fl">0.1</span>)) <span class="co"># zombie moms</span></code></pre></div>
<pre><code>## < table of extent 0 ></code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">table</span>(rpqa$age.Father <<span class="st"> </span>(rpqa$paternalage -<span class="st"> </span><span class="fl">0.1</span>)) <span class="co"># zombie dads</span></code></pre></div>
<pre><code>## < table of extent 0 ></code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">sort</span>(rpqa$maternalage *<span class="dv">10</span>),<span class="dv">20</span>) <span class="co"># 10y old may be possible</span></code></pre></div>
<pre><code>## [1] 9.953 12.008 12.482 13.008 13.049 13.140 13.156 13.238 13.247 13.247
## [11] 13.299 13.337 13.381 13.405 13.408 13.441 13.458 13.510 13.559 13.581</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">tail</span>(<span class="kw">sort</span>(rpqa$maternalage *<span class="dv">10</span>),<span class="dv">40</span>) <span class="co"># but 69y old. something wrong with that.</span></code></pre></div>
<pre><code>## [1] 51.01 51.03 51.04 51.05 51.09 51.16 51.24 51.40 51.56 51.62 51.63
## [12] 51.72 51.72 51.92 52.03 52.12 52.12 52.16 52.35 52.35 52.38 52.49
## [23] 52.85 52.92 53.03 53.23 53.60 54.35 54.73 54.85 54.90 55.03 55.04
## [34] 55.68 55.93 56.04 56.70 57.58 59.60 69.27</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(maternalage*<span class="dv">10</span>,paternalage*<span class="dv">10</span>,<span class="dt">data=</span>rpqa,<span class="dt">geom=</span><span class="st">"jitter"</span>,<span class="dt">alpha=</span><span class="kw">I</span>(<span class="fl">0.1</span>),<span class="dt">shape=</span><span class="kw">I</span>(<span class="st">"."</span>)) +<span class="st"> </span><span class="kw">xlim</span>(<span class="dv">12</span>,<span class="dv">50</span>) +<span class="st"> </span><span class="kw">geom_smooth</span>()</code></pre></div>
<pre><code>## `geom_smooth()` using method = 'gam'</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/transform.data-5.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">cor.test</span>(rpqa$maternalage,rpqa$paternalage)</code></pre></div>
<pre><code>##
## Pearson's product-moment correlation
##
## data: rpqa$maternalage and rpqa$paternalage
## t = 510, df = 390000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6309 0.6346
## sample estimates:
## cor
## 0.6328</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">xtabs</span>(~<span class="st"> </span><span class="kw">is.na</span>(idMere) +<span class="st"> </span><span class="kw">is.na</span>(idPere), <span class="dt">data =</span> rpqa.individuals)</code></pre></div>
<pre><code>## is.na(idPere)
## is.na(idMere) FALSE TRUE
## FALSE 427016 509
## TRUE 265 31801</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># rpqa = merge(rpqa, spouses, by.x = "idIndividu",by.y="id", all.x =T)</span></code></pre></div>
</div>
<div id="count-kids-and-spouses" class="section level2">
<h2>count kids and spouses</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">count_and_merge =<span class="st"> </span>function(df, what, wt_var) {
counted.dad =<span class="st"> </span><span class="kw">dcast</span>(<span class="dt">data=</span> df[,<span class="kw">c</span>(<span class="st">'idPere'</span>,wt_var)],<span class="dt">formula =</span> idPere ~<span class="st"> </span>.,<span class="dt">fun.aggregate =</span> sum, <span class="dt">na.rm=</span>T,<span class="dt">value.var =</span> wt_var)
counted.mom =<span class="st"> </span><span class="kw">dcast</span>(<span class="dt">data=</span> df[,<span class="kw">c</span>(<span class="st">'idMere'</span>,wt_var)],<span class="dt">formula =</span> idMere ~<span class="st"> </span>.,<span class="dt">fun.aggregate =</span> sum, <span class="dt">na.rm=</span>T, <span class="dt">value.var =</span> wt_var)
<span class="kw">names</span>(counted.dad) =<span class="st"> </span><span class="kw">names</span>(counted.mom) =<span class="st"> </span><span class="kw">c</span>(<span class="st">'idIndividu'</span>,what)
counted =<span class="st"> </span><span class="kw">rbind</span>(counted.dad,counted.mom)
df =<span class="st"> </span><span class="kw">merge</span>(df,counted,<span class="dt">by=</span><span class="st">'idIndividu'</span>,<span class="dt">all.x=</span>T)
df[,what] =<span class="st"> </span>car::<span class="kw">Recode</span>(df[,what],<span class="st">'NA=0'</span>)
df
}
rpqa.unions =<span class="st"> </span>rpqa.unions[<span class="kw">order</span>(rpqa.unions$id_m,rpqa.unions$dat),]
rpqa.unions$marriage.order.Father =<span class="st"> </span><span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(rpqa.unions)), rpqa.unions$id_m, <span class="dt">FUN =</span> seq_along)
rpqa.unions =<span class="st"> </span>rpqa.unions[<span class="kw">order</span>(rpqa.unions$id_f,rpqa.unions$dat),]
rpqa.unions$marriage.order.Mother =<span class="st"> </span><span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(rpqa.unions)), rpqa.unions$id_f, <span class="dt">FUN =</span> seq_along)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, rpqa.unions, <span class="dt">by=</span><span class="st">"idParents"</span>,<span class="dt">all.x=</span>T, <span class="dt">suffixes =</span> <span class="kw">c</span>(<span class="st">""</span>, <span class="st">".Parents"</span>))
rpqa$ehebekannt =<span class="st"> </span><span class="kw">ifelse</span>(<span class="kw">is.na</span>(rpqa$ehebekannt), <span class="dv">0</span>, <span class="dv">1</span>)
rpqa$first.marriage =<span class="st"> </span>(rpqa$marriage.order.Mother +<span class="st"> </span>rpqa$marriage.order.Father) ==<span class="st"> </span><span class="dv">2</span>
<span class="kw">table</span>(rpqa$first.marriage,<span class="dt">exclude=</span><span class="ot">NULL</span>)</code></pre></div>
<pre><code>##
## FALSE TRUE <NA>
## 63206 350838 45547</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">count_spouses =<span class="st"> </span>function(df, df2, what, wt_var) {
counted.husband =<span class="st"> </span><span class="kw">dcast</span>(<span class="dt">data=</span> df2[,<span class="kw">c</span>(<span class="st">'id_m'</span>,wt_var)],<span class="dt">formula =</span> id_m ~<span class="st"> </span>.,<span class="dt">fun.aggregate =</span> sum, <span class="dt">na.rm=</span>T, <span class="dt">value.var =</span> wt_var)
counted.wive =<span class="st"> </span><span class="kw">dcast</span>(<span class="dt">data=</span> df2[,<span class="kw">c</span>(<span class="st">'id_f'</span>,wt_var)],<span class="dt">formula =</span> id_f ~<span class="st"> </span>.,<span class="dt">fun.aggregate =</span> sum, <span class="dt">na.rm=</span>T, <span class="dt">value.var =</span> wt_var)
<span class="kw">names</span>(counted.husband) =<span class="st"> </span><span class="kw">names</span>(counted.wive) =<span class="st"> </span><span class="kw">c</span>(<span class="st">'idIndividu'</span>,what)
counted =<span class="st"> </span><span class="kw">rbind</span>(counted.husband,counted.wive)
df =<span class="st"> </span><span class="kw">merge</span>(df,counted,<span class="dt">by=</span><span class="st">'idIndividu'</span>,<span class="dt">all.x=</span>T)
df[,what] =<span class="st"> </span>car::<span class="kw">Recode</span>(df[,what],<span class="st">'NA=0'</span>)
df
}
rpqa$born =<span class="st"> </span><span class="dv">1</span>; rpqa.unions$born =<span class="st"> </span><span class="dv">1</span>
rpqa =<span class="st"> </span><span class="kw">count_spouses</span>(rpqa,rpqa.unions, <span class="st">'spouses'</span>, <span class="st">"born"</span>)
rpqa$survive1d =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age.days ><span class="st"> </span><span class="dv">1</span>, <span class="dv">1</span>, <span class="dv">0</span>)
rpqa$survive1m =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age.days ><span class="st"> </span><span class="dv">28</span>, <span class="dv">1</span>, <span class="dv">0</span>)
rpqa$dead1d =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age.days <=<span class="st"> </span><span class="dv">1</span>, <span class="dv">1</span>, <span class="dv">0</span>)
rpqa$dead1m =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age.days ><span class="st"> </span><span class="dv">28</span>, <span class="dv">0</span>, <span class="dv">1</span>)
rpqa$dead1y =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age ><span class="st"> </span><span class="fl">0.1</span>, <span class="dv">0</span>, <span class="dv">1</span>)
rpqa$dead5y =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age ><span class="st"> </span><span class="fl">0.5</span>, <span class="dv">0</span>, <span class="dv">1</span>)
rpqa$deadR =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age ><span class="st"> </span><span class="fl">1.5</span>, <span class="dv">0</span>, <span class="dv">1</span>)
rpqa$survive5y =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age ><span class="st"> </span><span class="fl">0.5</span>, <span class="dv">1</span>, <span class="dv">0</span>)
rpqa$survive1y =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age ><span class="st"> </span><span class="fl">0.1</span>, <span class="dv">1</span>, <span class="dv">0</span>)
rpqa$surviveR =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$age ><span class="st"> </span><span class="fl">1.5</span>, <span class="dv">1</span>, <span class="dv">0</span>)
<span class="co"># rpqa$survive1m = car::Recode(rpqa$survive1m, 'NA=0')</span>
<span class="kw">table</span>(rpqa$survive1m,<span class="dt">exclude=</span><span class="ot">NULL</span>)</code></pre></div>
<pre><code>##
## 0 1 <NA>
## 41499 198139 219953</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children'</span>, <span class="dt">wt_var =</span> <span class="st">"born"</span>)
rpqa$children.per.spouse =<span class="st"> </span>rpqa$children/rpqa$spouses
rpqa$children.per.spouse[<span class="kw">which</span>(rpqa$spouses==<span class="dv">0</span>)] =<span class="st"> </span><span class="ot">NA</span>
<span class="kw">qplot</span>(rpqa$children.per.spouse)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-1.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># well we know that people who reproduced or married usually made 15</span>
changeNAto1 =<span class="st"> </span>function(x) { <span class="kw">colwise</span>(function(x) { <span class="kw">ifelse</span>(<span class="kw">is.na</span>(x), <span class="dv">1</span>, x)})(x) }
rpqa[<span class="kw">which</span>(rpqa$children><span class="dv">0</span> |<span class="st"> </span>rpqa$spouses><span class="dv">0</span>), <span class="kw">c</span>(<span class="st">'surviveR'</span>, <span class="st">'survive1y'</span>, <span class="st">'survive1m'</span>, <span class="st">'survive1d'</span>)] =<span class="st"> </span><span class="kw">changeNAto1</span>(rpqa[<span class="kw">which</span>(rpqa$children><span class="dv">0</span> |<span class="st"> </span>rpqa$spouses><span class="dv">0</span>), <span class="kw">c</span>(<span class="st">'surviveR'</span>, <span class="st">'survive1y'</span>, <span class="st">'survive1m'</span>, <span class="st">'survive1d'</span>)])
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children.dead1d'</span>, <span class="dt">wt_var =</span> <span class="st">'dead1d'</span>)
rpqa$children.surviving1d =<span class="st"> </span>rpqa$children -<span class="st"> </span>rpqa$children.dead1d
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children.dead1m'</span>, <span class="dt">wt_var =</span> <span class="st">'dead1m'</span>)
rpqa$children.surviving1m =<span class="st"> </span>rpqa$children -<span class="st"> </span>rpqa$children.dead1m
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children.dead1y'</span>, <span class="dt">wt_var =</span> <span class="st">'dead1y'</span>)
rpqa$children.surviving1y =<span class="st"> </span>rpqa$children -<span class="st"> </span>rpqa$children.dead1y
<span class="co"># xtabs(~ rpqa$children.dead1y + rpqa$NinfantD, exclude = NULL, na.action = na.pass)</span>
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children.dead5y'</span>, <span class="dt">wt_var =</span> <span class="st">'dead5y'</span>)
rpqa$children.surviving5y =<span class="st"> </span>rpqa$children -<span class="st"> </span>rpqa$children.dead5y
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children.deadR'</span>, <span class="dt">wt_var =</span> <span class="st">'deadR'</span>)
rpqa$children.survivingR =<span class="st"> </span>rpqa$children -<span class="st"> </span>rpqa$children.deadR
changeNAto0 =<span class="st"> </span>function(x) { <span class="kw">ifelse</span>(<span class="kw">is.na</span>(x), <span class="dv">0</span>, x) }
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children.spouses'</span>, <span class="dt">wt_var =</span> <span class="st">'spouses'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren.per.spouse'</span>, <span class="dt">wt_var =</span> <span class="st">'children.per.spouse'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren'</span>,<span class="dt">wt_var=</span><span class="st">'children'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren.surviving1d'</span>, <span class="dt">wt_var =</span> <span class="st">'children.surviving1d'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren.surviving1m'</span>, <span class="dt">wt_var =</span> <span class="st">'children.surviving1m'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren.surviving1y'</span>, <span class="dt">wt_var =</span> <span class="st">'children.surviving1y'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren.surviving5y'</span>, <span class="dt">wt_var =</span> <span class="st">'children.surviving5y'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren.survivingR'</span>, <span class="dt">wt_var =</span> <span class="st">'children.survivingR'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'grandchildren.dead1m'</span>, <span class="dt">wt_var =</span> <span class="st">'children.dead1m'</span>)
<span class="kw">xtabs</span>(~<span class="st"> </span>(grandchildren><span class="dv">0</span>) +(children><span class="dv">0</span>) +<span class="st"> </span>(spouses><span class="dv">0</span>) +<span class="st"> </span>surviveR,<span class="dt">data=</span>rpqa,<span class="dt">exclude=</span><span class="ot">NULL</span>, <span class="dt">na.action=</span> na.pass)</code></pre></div>
<pre><code>## , , spouses > 0 = FALSE, surviveR = 0
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 137531 0
## TRUE 0 0
##
## , , spouses > 0 = TRUE, surviveR = 0
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 3 2
## TRUE 0 0
##
## , , spouses > 0 = FALSE, surviveR = 1
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 17753 4233
## TRUE 0 16266
##
## , , spouses > 0 = TRUE, surviveR = 1
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 11499 58260
## TRUE 0 57547
##
## , , spouses > 0 = FALSE, surviveR = NA
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 156497 0
## TRUE 0 0
##
## , , spouses > 0 = TRUE, surviveR = NA
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 0 0
## TRUE 0 0</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">counted.parents =<span class="st"> </span><span class="kw">dcast</span>(<span class="dt">data=</span> rpqa[,<span class="kw">c</span>(<span class="st">'idParents'</span>,<span class="st">"born"</span>)],<span class="dt">formula =</span> idParents ~<span class="st"> </span>.,<span class="dt">fun.aggregate =</span> sum, <span class="dt">na.rm=</span>T, <span class="dt">value.var =</span> <span class="st">"born"</span>)
<span class="kw">names</span>(counted.parents) =<span class="st"> </span><span class="kw">c</span>(<span class="st">'idParents'</span>,<span class="st">"children"</span>)
rpqa.unions =<span class="st"> </span><span class="kw">merge</span>(rpqa.unions, counted.parents,<span class="dt">by=</span><span class="st">"idParents"</span>,<span class="dt">all.x=</span>T) <span class="co"># find childless marriages</span>
<span class="kw">table</span>(rpqa.unions$children,<span class="dt">exclude=</span><span class="ot">NULL</span>)</code></pre></div>
<pre><code>##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 7252 6086 5806 5378 5093 4738 4518 4310 4193 3999 3524 2818 2076 1493 990
## 16 17 18 19 20 21 22 23 <NA>
## 623 342 205 90 45 16 4 6 9535</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa.unions =<span class="st"> </span>rpqa.unions[<span class="kw">which</span>(rpqa.unions$children ><span class="st"> </span><span class="dv">0</span>), ]
rpqa.unions =<span class="st"> </span>rpqa.unions[<span class="kw">order</span>(rpqa.unions$id_m,rpqa.unions$dat),]
rpqa.unions$fertile.marriage.order.Father =<span class="st"> </span><span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(rpqa.unions)), rpqa.unions$id_m, <span class="dt">FUN =</span> seq_along)
rpqa.unions =<span class="st"> </span>rpqa.unions[<span class="kw">order</span>(rpqa.unions$id_f,rpqa.unions$dat),]
rpqa.unions$fertile.marriage.order.Mother =<span class="st"> </span><span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(rpqa.unions)), rpqa.unions$id_f, <span class="dt">FUN =</span> seq_along)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, rpqa.unions[,<span class="kw">c</span>(<span class="st">'idParents'</span>,<span class="st">'fertile.marriage.order.Mother'</span>,<span class="st">'fertile.marriage.order.Father'</span>),],<span class="dt">by=</span><span class="st">"idParents"</span>,<span class="dt">all.x=</span>T)
rpqa$first.fertile.marriage =<span class="st"> </span>(rpqa$fertile.marriage.order.Mother +<span class="st"> </span>rpqa$fertile.marriage.order.Father) ==<span class="st"> </span><span class="dv">2</span>
<span class="kw">table</span>(rpqa$first.fertile.marriage,<span class="dt">exclude=</span><span class="ot">NULL</span>)</code></pre></div>
<pre><code>##
## FALSE TRUE <NA>
## 56874 357170 45547</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$children)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-2.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa[<span class="kw">which</span>(rpqa$age ><span class="st"> </span><span class="fl">1.5</span>),]$children )</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-3.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$grandchildren)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-4.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa[<span class="kw">which</span>(rpqa$age ><span class="st"> </span><span class="fl">1.5</span>),]$grandchildren )</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-5.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$children.surviving1m)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-6.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$children.dead1m)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-7.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$spouses)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-8.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(children,children.surviving1m, <span class="dt">data=</span>rpqa,<span class="dt">geom=</span><span class="st">"jitter"</span>,<span class="dt">alpha=</span><span class="kw">I</span>(<span class="fl">0.02</span>))</code></pre></div>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-9.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(children.surviving1d, children.surviving1m, <span class="dt">data=</span>rpqa,<span class="dt">geom=</span><span class="st">"jitter"</span>,<span class="dt">alpha=</span><span class="kw">I</span>(<span class="fl">0.2</span>))</code></pre></div>
<p><img src="0_rpqa_massage_files/figure-html/kids.and.spouses-10.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa =<span class="st"> </span>rpqa[<span class="kw">order</span>(rpqa$idParents),]
rpqa$paternalage.mean =<span class="st"> </span><span class="kw">ave</span>(rpqa$paternalage ,rpqa$idParents,<span class="dt">FUN=</span> function(x) { <span class="kw">mean</span>(x,<span class="dt">na.rm=</span>T) } )
rpqa$paternalage.diff =<span class="st"> </span>rpqa$paternalage -<span class="st"> </span>rpqa$paternalage.mean
rpqa$maternalage.mean =<span class="st"> </span><span class="kw">ave</span>(rpqa$maternalage,rpqa$idMere,<span class="dt">FUN=</span> function(x) { <span class="kw">mean</span>(x,<span class="dt">na.rm=</span>T) } )
rpqa$maternalage.diff =<span class="st"> </span>rpqa$maternalage -<span class="st"> </span>rpqa$maternalage.mean
rpqa$urban =<span class="st"> </span><span class="kw">factor</span>(rpqa$codeLieuNaiss %in%<span class="st"> </span><span class="kw">c</span>(<span class="dv">3901</span>, <span class="dv">4501</span>),<span class="kw">c</span>(<span class="st">"FALSE"</span>,<span class="st">"TRUE"</span>),<span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>))
<span class="kw">table</span>(rpqa$urban)</code></pre></div>
<pre><code>##
## 0 1
## 400287 59304</code></pre>
</div>
<div id="calculate-predictors" class="section level2">
<h2>calculate predictors</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa =<span class="st"> </span>rpqa[<span class="kw">order</span>(rpqa$idParents,rpqa$bdate), ]
rpqa <-<span class="st"> </span><span class="kw">transform</span>(rpqa, <span class="dt">siblings =</span> <span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(rpqa)), rpqa$idParents, <span class="dt">FUN =</span> length)-<span class="dv">1</span>) <span class="co"># sibling count</span>
rpqa <-<span class="st"> </span><span class="kw">transform</span>(rpqa, <span class="dt">birthorder =</span> <span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(rpqa)), rpqa$idParents, <span class="dt">FUN =</span> seq_along)) <span class="co"># old trick to get birth order, don't know what this does to those with missings for father though</span>
<span class="kw">qplot</span>(rpqa$birthorder)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/add.some.indicators-1.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$younger_siblings =<span class="st"> </span>rpqa$siblings +<span class="st"> </span><span class="dv">1</span> -<span class="st"> </span>rpqa$birthorder
<span class="kw">table</span>(rpqa$younger_siblings)</code></pre></div>
<pre><code>##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 74394 57339 50743 44738 39259 34103 29321 24777 20447 16242 12237 8710
## 12 13 14 15 16 17 18 19 20 21 22
## 5891 3815 2321 1331 708 366 161 71 26 10 6</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">table</span>( rpqa$qualiteDateNaiss ==<span class="st"> </span><span class="dv">3</span> ) <span class="co"># born dead or died before baptism</span></code></pre></div>
<pre><code>##
## FALSE TRUE
## 450756 8835</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$age[rpqa$qualiteDateNaiss ==<span class="st"> </span><span class="dv">3</span>] *<span class="dv">10</span>)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/add.some.indicators-2.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$age[rpqa$qualiteDateNaiss !=<span class="st"> </span><span class="dv">3</span>] *<span class="dv">10</span>) +<span class="st"> </span><span class="kw">xlim</span>(<span class="dv">0</span>,<span class="dv">1</span>)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/add.some.indicators-3.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$age.years =<span class="st"> </span>rpqa$dyear-<span class="st"> </span>rpqa$byear
rpqa =<span class="st"> </span>rpqa[<span class="kw">order</span>(rpqa$idParents,rpqa$birthorder), ]
rpqa$nr.siblings =<span class="st"> </span><span class="kw">ave</span>(rpqa$born,rpqa$idParents, <span class="dt">FUN =</span> function(x) { <span class="kw">sum</span>(x,<span class="dt">na.rm=</span>T) } ) -<span class="dv">1</span> <span class="co"># dont count self</span>
<span class="kw">qplot</span>(rpqa$nr.siblings,<span class="dt">binwidth=</span><span class="dv">1</span>)</code></pre></div>
<p><img src="0_rpqa_massage_files/figure-html/add.some.indicators-4.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$nr.dead.siblings1m =<span class="st"> </span><span class="kw">ave</span>(rpqa$dead1m,rpqa$idParents,<span class="dt">FUN=</span> function(x) { <span class="kw">sum</span>(x,<span class="dt">na.rm=</span>T) } ) -<span class="st"> </span>rpqa$dead1m
<span class="kw">qplot</span>(rpqa$nr.dead.siblings1m,<span class="dt">binwidth=</span><span class="dv">1</span>)</code></pre></div>
<p><img src="0_rpqa_massage_files/figure-html/add.some.indicators-5.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$infant.death.cluster =<span class="st"> </span>rpqa$nr.dead.siblings1m/rpqa$nr.siblings <span class="co"># dont count self</span>
<span class="kw">qplot</span>(rpqa$infant.death.cluster)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/add.some.indicators-6.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa[<span class="kw">which</span>(rpqa$nr.siblings><span class="dv">1</span>),]$infant.death.cluster)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<p><img src="0_rpqa_massage_files/figure-html/add.some.indicators-7.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">lag<span class="fl">.0</span> =<span class="st"> </span>function(x) {
if(<span class="kw">length</span>(x)==<span class="dv">1</span>) <span class="dv">0</span>
else <span class="kw">c</span>(<span class="dv">0</span>,x[ <span class="dv">1</span>:(<span class="kw">length</span>(x)-<span class="dv">1</span>)])
}
inv.lag<span class="fl">.0</span> =<span class="st"> </span>function(x) {
if(<span class="kw">length</span>(x)==<span class="dv">1</span>) <span class="dv">0</span>
else <span class="kw">c</span>(x[ <span class="dv">2</span>:<span class="kw">length</span>(x)],<span class="dv">0</span>)
}
rpqa =<span class="st"> </span><span class="kw">transform</span>(rpqa, <span class="dt">older.sib.made.15y =</span> <span class="kw">ave</span>(surviveR, idPere, <span class="dt">FUN =</span> lag<span class="fl">.0</span>))
rpqa =<span class="st"> </span><span class="kw">transform</span>(rpqa, <span class="dt">younger.sib.made.15y =</span> <span class="kw">ave</span>(surviveR, idPere, <span class="dt">FUN =</span> inv.lag<span class="fl">.0</span>))
rpqa$older_siblings =<span class="st"> </span><span class="kw">factor</span>(<span class="kw">ifelse</span>((rpqa$birthorder -<span class="st"> </span><span class="dv">1</span>) ><span class="st"> </span><span class="dv">4</span>,<span class="st">"5+"</span>, rpqa$birthorder -<span class="st"> </span><span class="dv">1</span>))
rpqa$last_born =<span class="st"> </span><span class="kw">ifelse</span>(rpqa$birthorder ==<span class="st"> </span>rpqa$nr.siblings, <span class="dv">1</span>, <span class="dv">0</span>)</code></pre></div>
</div>
<div id="get-grandparents" class="section level2">
<h2>Get grandparents</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">grandparents =<span class="st"> </span>rpqa[, <span class="kw">c</span>(<span class="st">'idIndividu'</span>,<span class="st">'idPere'</span>,<span class="st">'idMere'</span>, <span class="st">'paternalage'</span>, <span class="st">'maternalage'</span>)]
<span class="kw">names</span>(grandparents) =<span class="st"> </span><span class="kw">c</span>(<span class="st">'idMere'</span>, <span class="st">'idMaternalGrandfather'</span>, <span class="st">'idMaternalGrandmother'</span>, <span class="st">'maternal.grandpaternalage'</span>, <span class="st">'maternal.grandmaternalage'</span>)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, grandparents, <span class="dt">by =</span> <span class="st">"idMere"</span>, <span class="dt">all.x =</span>T)
<span class="kw">names</span>(grandparents) =<span class="st"> </span><span class="kw">c</span>(<span class="st">'idPere'</span>, <span class="st">'idPaternalGrandfather'</span>, <span class="st">'idPaternalGrandmother'</span>, <span class="st">'paternal.grandpaternalage'</span>, <span class="st">'paternal.grandmaternalage'</span>)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, grandparents, <span class="dt">by =</span> <span class="st">"idPere"</span>, <span class="dt">all.x =</span>T)
rpqa$idPaternalGrandparents =<span class="st"> </span><span class="kw">paste</span>(rpqa$idPaternalGrandfather, rpqa$idPaternalGrandmother)
rpqa$idMaternalGrandparents =<span class="st"> </span><span class="kw">paste</span>(rpqa$idMaternalGrandfather, rpqa$idMaternalGrandmother)
grandparent =<span class="st"> </span>rpqa[, <span class="kw">c</span>(<span class="st">'idIndividu'</span>, <span class="st">'ddate'</span>, <span class="st">'bdate'</span>)]
<span class="kw">names</span>(grandparent) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idPaternalGrandfather"</span>, <span class="st">"ddate.paternalGrandfather"</span>, <span class="st">"bdate.paternalGrandfather"</span>)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, grandparent, <span class="dt">by =</span> <span class="st">"idPaternalGrandfather"</span>, <span class="dt">all.x =</span> T)
<span class="kw">names</span>(grandparent) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idMaternalGrandfather"</span>, <span class="st">"ddate.maternalGrandfather"</span>, <span class="st">"bdate.maternalGrandfather"</span>)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, grandparent, <span class="dt">by =</span> <span class="st">"idMaternalGrandfather"</span>, <span class="dt">all.x =</span> T)
<span class="kw">names</span>(grandparent) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idPaternalGrandmother"</span>, <span class="st">"ddate.paternalGrandmother"</span>, <span class="st">"bdate.paternalGrandmother"</span>)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, grandparent, <span class="dt">by =</span> <span class="st">"idPaternalGrandmother"</span>, <span class="dt">all.x =</span> T)
<span class="kw">names</span>(grandparent) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idMaternalGrandmother"</span>, <span class="st">"ddate.maternalGrandmother"</span>, <span class="st">"bdate.maternalGrandmother"</span>)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, grandparent, <span class="dt">by =</span> <span class="st">"idMaternalGrandmother"</span>, <span class="dt">all.x =</span> T)
<span class="kw">library</span>(lubridate)
rpqa$maternalgrandfatherloss =<span class="st"> </span><span class="kw">factor</span>(rpqa$bdate +<span class="st"> </span><span class="kw">years</span>(<span class="dv">5</span>) ><span class="st"> </span>rpqa$ddate.maternalGrandfather,<span class="dt">exclude =</span> <span class="ot">NULL</span>)
rpqa$paternalgrandfatherloss =<span class="st"> </span><span class="kw">factor</span>(rpqa$bdate +<span class="st"> </span><span class="kw">years</span>(<span class="dv">5</span>) ><span class="st"> </span>rpqa$ddate.paternalGrandfather,<span class="dt">exclude =</span> <span class="ot">NULL</span>)
rpqa$maternalgrandmotherloss =<span class="st"> </span><span class="kw">factor</span>(rpqa$bdate +<span class="st"> </span><span class="kw">years</span>(<span class="dv">5</span>) ><span class="st"> </span>rpqa$ddate.maternalGrandmother,<span class="dt">exclude =</span> <span class="ot">NULL</span>)
rpqa$paternalgrandmotherloss =<span class="st"> </span><span class="kw">factor</span>(rpqa$bdate +<span class="st"> </span><span class="kw">years</span>(<span class="dv">5</span>) ><span class="st"> </span>rpqa$ddate.paternalGrandmother,<span class="dt">exclude =</span> <span class="ot">NULL</span>)</code></pre></div>
</div>
<div id="compute-high-level-predictors" class="section level2">
<h2>compute high-level predictors</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$born =<span class="st"> </span><span class="ot">NULL</span> <span class="co"># was just an aid</span>
rpqa$byear.years =<span class="st"> </span><span class="kw">year</span>(rpqa$bdate)
rpqa$dyear.years =<span class="st"> </span><span class="kw">year</span>(rpqa$ddate)
rpqa =<span class="st"> </span>rpqa %>%
<span class="st"> </span><span class="kw">group_by</span>(idParents) %>%
<span class="st"> </span><span class="kw">mutate</span>(
<span class="dt">younger_sibs_ad_5y =</span> <span class="kw">younger_sibs_alive_and_dependent</span>(<span class="dt">survive5y=</span>survive5y, <span class="dt">byear=</span>byear.years, <span class="dt">dyear=</span>dyear.years) ,
<span class="dt">older_sibs_ad_5y =</span> <span class="kw">older_sibs_alive_and_dependent</span>(<span class="dt">survive5y=</span>survive5y, <span class="dt">byear=</span>byear.years, <span class="dt">dyear=</span>dyear.years),
<span class="dt">dependent_sibs_f5y =</span> <span class="kw">dependent_sibs_f5y</span>(<span class="dt">survive1y=</span>survive1y, <span class="dt">byear=</span>byear, <span class="dt">dyear=</span>dyear)
) %>%<span class="st"> </span><span class="kw">data.table</span>()</code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">min_na =<span class="st"> </span>function(x) { <span class="kw">ifelse</span>(<span class="kw">all</span>(<span class="kw">is.na</span>(x)), <span class="ot">NA</span>, <span class="kw">min</span>(x, <span class="dt">na.rm=</span>T) ) }
max_na =<span class="st"> </span>function(x) { <span class="kw">ifelse</span>(<span class="kw">all</span>(<span class="kw">is.na</span>(x)), <span class="ot">NA</span>, <span class="kw">max</span>(x, <span class="dt">na.rm=</span>T) ) }
rpqa[, paternalage_at_1st_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(paternalage, idPere, <span class="dt">FUN =</span> min_na)]
rpqa[, paternalage_at_last_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(paternalage, idPere, <span class="dt">FUN =</span> max_na)]
rpqa[, maternalage_at_1st_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(maternalage, idMere, <span class="dt">FUN =</span> min_na)]
rpqa[, maternalage_at_last_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(maternalage, idMere, <span class="dt">FUN =</span> max_na)]
fathers =<span class="st"> </span>rpqa[!<span class="kw">duplicated</span>(idPere), <span class="kw">list</span>(idPere, paternalage_at_1st_sib, paternalage_at_last_sib)]
<span class="kw">names</span>(fathers) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idIndividu"</span>,<span class="st">"age_at_1st_child"</span>, <span class="st">"age_at_last_child"</span>)
mothers =<span class="st"> </span>rpqa[!<span class="kw">duplicated</span>(idMere), <span class="kw">list</span>(idMere, maternalage_at_1st_sib, maternalage_at_last_sib)]
<span class="kw">names</span>(mothers) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idIndividu"</span>,<span class="st">"age_at_1st_child"</span>, <span class="st">"age_at_last_child"</span>)
parents =<span class="st"> </span><span class="kw">rbind</span>(fathers, mothers)
rpqa =<span class="st"> </span><span class="kw">merge</span>(rpqa, parents, <span class="dt">by =</span> <span class="st">"idIndividu"</span>, <span class="dt">all.x =</span> T)
rpqa$maternalage_c =<span class="st"> </span>QuantPsyc::<span class="kw">meanCenter</span>(rpqa$maternalage)
rpqa$paternalage_c =<span class="st"> </span>QuantPsyc::<span class="kw">meanCenter</span>(rpqa$paternalage)
rpqa$nr.siblings =<span class="st"> </span>rpqa$siblings
rpqa %>%<span class="st"> </span>
<span class="kw">mutate</span>(<span class="dt">maternal_loss_age =</span> dyear.Mother -<span class="st"> </span>byear
,<span class="dt">maternal_loss_age =</span> <span class="kw">as.numeric</span>(<span class="kw">ifelse</span>(maternal_loss_age >=<span class="st"> </span>-<span class="dv">1</span> &<span class="st"> </span>maternal_loss_age <<span class="st"> </span><span class="dv">0</span>, <span class="dv">0</span>, maternal_loss_age))
,<span class="dt">maternal_loss =</span> <span class="kw">as.character</span>(<span class="kw">cut</span>(maternal_loss_age, <span class="dt">breaks =</span> <span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>,<span class="dv">5</span>,<span class="dv">10</span>,<span class="dv">15</span>,<span class="dv">20</span>,<span class="dv">25</span>,<span class="dv">30</span>,<span class="dv">35</span>,<span class="dv">40</span>,<span class="dv">45</span>), <span class="dt">include.lowest =</span> T ))
,<span class="dt">maternal_loss =</span> <span class="kw">ifelse</span>( maternal_loss_age >=<span class="st"> </span><span class="dv">45</span>, <span class="st">"later"</span>, maternal_loss)
,<span class="dt">maternal_loss =</span> <span class="kw">ifelse</span>(<span class="kw">is.na</span>(maternal_loss_age) |<span class="st"> </span>maternal_loss_age <<span class="st"> </span><span class="dv">0</span>, <span class="st">"unclear"</span>, maternal_loss)
,<span class="dt">maternal_loss =</span> <span class="kw">factor</span>(maternal_loss, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="st">"later"</span>,<span class="st">"[0,1]"</span>, <span class="st">"(1,5]"</span>, <span class="st">"(5,10]"</span>, <span class="st">"(10,15]"</span>, <span class="st">"(15,20]"</span>, <span class="st">"(20,25]"</span>, <span class="st">"(25,30]"</span>, <span class="st">"(30,35]"</span>, <span class="st">"(35,40]"</span>, <span class="st">"(40,45]"</span>, <span class="st">"unclear"</span>))
,<span class="dt">paternal_loss_age =</span> dyear.Father -<span class="st"> </span>byear
,<span class="dt">paternal_loss_age =</span> <span class="kw">as.numeric</span>(<span class="kw">ifelse</span>(paternal_loss_age >=<span class="st"> </span>-<span class="dv">1</span> &<span class="st"> </span>paternal_loss_age <<span class="st"> </span><span class="dv">0</span>, <span class="dv">0</span>, paternal_loss_age))
,<span class="dt">paternal_loss =</span> <span class="kw">as.character</span>(<span class="kw">cut</span>(paternal_loss_age, <span class="dt">breaks =</span> <span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>,<span class="dv">5</span>,<span class="dv">10</span>,<span class="dv">15</span>,<span class="dv">20</span>,<span class="dv">25</span>,<span class="dv">30</span>,<span class="dv">35</span>,<span class="dv">40</span>,<span class="dv">45</span>), <span class="dt">include.lowest =</span> T ))
,<span class="dt">paternal_loss =</span> <span class="kw">ifelse</span>( paternal_loss_age >=<span class="st"> </span><span class="dv">45</span>, <span class="st">"later"</span>, paternal_loss)
,<span class="dt">paternal_loss =</span> <span class="kw">ifelse</span>(<span class="kw">is.na</span>(paternal_loss_age) |<span class="st"> </span>paternal_loss_age <<span class="st"> </span><span class="dv">0</span>, <span class="st">"unclear"</span>, paternal_loss)
,<span class="dt">paternal_loss =</span> <span class="kw">factor</span>(paternal_loss, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="st">"later"</span>,<span class="st">"[0,1]"</span>, <span class="st">"(1,5]"</span>, <span class="st">"(5,10]"</span>, <span class="st">"(10,15]"</span>, <span class="st">"(15,20]"</span>, <span class="st">"(20,25]"</span>, <span class="st">"(25,30]"</span>, <span class="st">"(30,35]"</span>, <span class="st">"(35,40]"</span>, <span class="st">"(40,45]"</span>, <span class="st">"unclear"</span>))
) %>%
<span class="st"> </span><span class="kw">data.table</span>() ->
<span class="st"> </span>rpqa
<span class="kw">table</span>(rpqa$maternal_loss, <span class="dt">exclude =</span> <span class="ot">NULL</span>)</code></pre></div>
<pre><code>##
## later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35]
## 96785 8922 17870 19592 18031 18390 21247 25498 29276
## (35,40] (40,45] unclear <NA>
## 32748 27878 143354 0</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">table</span>(rpqa$paternal_loss, <span class="dt">exclude =</span> <span class="ot">NULL</span>)</code></pre></div>
<pre><code>##
## later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35]
## 67685 5903 12520 17507 21294 26344 32911 36337 37820
## (35,40] (40,45] unclear <NA>
## 37750 28036 135484 0</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">recenter_all =<span class="st"> </span>function(x) { <span class="kw">recenter.pat</span>( <span class="kw">recenter.pat</span>( x, <span class="dt">among_who=</span><span class="st">"idParents"</span>), <span class="dt">what =</span> <span class="st">"maternalage"</span>, <span class="dt">among_who =</span> <span class="st">"idParents"</span>) }
rpqa[, ever_married :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(spouses ><span class="st"> </span><span class="dv">0</span>,<span class="dv">1</span>,<span class="dv">0</span>) ]
rpqa[, birth.cohort :<span class="er">=</span><span class="st"> </span><span class="kw">year_bins</span>(byear)]
rpqa[byear <<span class="st"> </span><span class="dv">1635</span>, birth.cohort :<span class="er">=</span><span class="st"> "1630-1635"</span>]
<span class="kw">crosstabs</span>(rpqa$birth.cohort)</code></pre></div>
<pre><code>## rpqa$birth.cohort
## 1630-1635 1635-1640 1640-1645 1645-1650 1650-1655 1655-1660 1660-1665
## 1348 735 954 921 805 866 1235
## 1665-1670 1670-1675 1675-1680 1680-1685 1685-1690 1690-1695 1695-1700
## 1705 2527 2651 2694 2835 3457 4488
## 1700-1705 1705-1710 1710-1715 1715-1720 1720-1725 1725-1730 1730-1735
## 5373 5713 6449 7220 8550 10247 12112
## 1735-1740 1740-1745 1745-1750 1750-1755 1755-1760 1760-1765 1765-1770
## 12810 14107 15585 18836 20213 22616 26447
## 1770-1775 1775-1780 1780-1785 1785-1790 1790-1795 1795-1800 <NA>
## 27417 30140 32575 36765 40126 47163 31906</code></pre>
</div>
<div id="subset-and-save" class="section level2">
<h2>Subset and save</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa =<span class="st"> </span><span class="kw">recenter_all</span>(rpqa)
rpqa =<span class="st"> </span><span class="kw">recenter.pat</span>(rpqa, <span class="dt">what =</span> <span class="st">"paternal.grandpaternalage"</span>, <span class="dt">among_who =</span> <span class="st">"idPaternalGrandparents"</span>)
rpqa =<span class="st"> </span><span class="kw">recenter.pat</span>(rpqa, <span class="dt">what =</span> <span class="st">"maternal.grandpaternalage"</span>, <span class="dt">among_who =</span> <span class="st">"idMaternalGrandparents"</span>)
rpqa[, any_surviving_children :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(children.survivingR ><span class="st"> </span><span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">0</span>)]
rpqa[, children.wddate :<span class="er">=</span><span class="st"> </span>children.dead1y +<span class="st"> </span>children.surviving1y]
rpqa[, maternalage.factor :<span class="er">=</span><span class="st"> </span><span class="kw">cut</span>((<span class="dv">10</span>*maternalage), <span class="dt">breaks =</span> <span class="kw">c</span>(<span class="dv">14</span>, <span class="dv">20</span>, <span class="dv">35</span>, <span class="dv">50</span>))]
rpqa$maternalage.factor =<span class="st"> </span><span class="kw">relevel</span>(rpqa$maternalage.factor, <span class="dt">ref =</span> <span class="st">"(20,35]"</span>)
<span class="kw">crosstabs</span>(~<span class="st"> </span><span class="kw">is.na</span>(codeLieuNaiss) +<span class="st"> </span>(codeLieuNaiss <<span class="st"> </span><span class="dv">10000</span>), <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## codeLieuNaiss < 10000
## is.na(codeLieuNaiss) FALSE TRUE <NA>
## FALSE 10814 416629 0
## TRUE 0 0 32148</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$known_to_be_born_in_quebec =<span class="st"> </span>!<span class="kw">is.na</span>(rpqa$codeLieuNaiss) &<span class="st"> </span>rpqa$codeLieuNaiss <<span class="st"> </span><span class="dv">10000</span>
<span class="kw">crosstabs</span>(rpqa$known_to_be_born_in_quebec)</code></pre></div>
<pre><code>## rpqa$known_to_be_born_in_quebec
## FALSE TRUE
## 42962 416629</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$known_to_have_died_in_quebec =<span class="st"> </span>!<span class="kw">is.na</span>(rpqa$codeLieuDeces) &<span class="st"> </span>rpqa$codeLieuDeces <<span class="st"> </span><span class="dv">10000</span>
<span class="kw">crosstabs</span>(~<span class="st"> </span><span class="kw">is.na</span>(rpqa$codeLieuDeces) +<span class="st"> </span><span class="kw">is.na</span>(dyear), <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## is.na(dyear)
## is.na(rpqa$codeLieuDeces) FALSE TRUE
## FALSE 244806 667
## TRUE 8 214110</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$might_have_died_in_quebec =<span class="st"> </span><span class="kw">is.na</span>(rpqa$codeLieuDeces) |<span class="st"> </span>rpqa$codeLieuDeces <<span class="st"> </span><span class="dv">10000</span>
<span class="kw">crosstabs</span>(~<span class="st"> </span>might_have_died_in_quebec +<span class="st"> </span>known_to_have_died_in_quebec, <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## known_to_have_died_in_quebec
## might_have_died_in_quebec FALSE TRUE
## FALSE 39 0
## TRUE 214118 245434</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(rpqa$might_have_died_in_quebec)</code></pre></div>
<pre><code>## rpqa$might_have_died_in_quebec
## FALSE TRUE
## 39 459552</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$parents_known_to_have_married_in_quebec =<span class="st"> </span>!<span class="kw">is.na</span>(rpqa$codeLieuM.Parents) &<span class="st"> </span>rpqa$codeLieuM.Parents <<span class="st"> </span><span class="dv">10000</span>
rpqa$parents_might_have_married_in_quebec =<span class="st"> </span><span class="kw">is.na</span>(rpqa$codeLieuM.Parents) |<span class="st"> </span>rpqa$codeLieuM.Parents <<span class="st"> </span><span class="dv">10000</span>
<span class="kw">crosstabs</span>(~<span class="st"> </span><span class="kw">is.na</span>(codeLieuM.Parents) +<span class="st"> </span>(codeLieuM.Parents <<span class="st"> </span><span class="dv">10000</span>), <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## codeLieuM.Parents < 10000
## is.na(codeLieuM.Parents) FALSE TRUE <NA>
## FALSE 302 409187 0
## TRUE 0 0 50102</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(~<span class="st"> </span>parents_known_to_have_married_in_quebec +<span class="st"> </span>might_have_died_in_quebec +<span class="st"> </span>known_to_be_born_in_quebec, <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## , , known_to_be_born_in_quebec = FALSE
##
## might_have_died_in_quebec
## parents_known_to_have_married_in_quebec FALSE TRUE
## FALSE 19 34990
## TRUE 0 7953
##
## , , known_to_be_born_in_quebec = TRUE
##
## might_have_died_in_quebec
## parents_known_to_have_married_in_quebec FALSE TRUE
## FALSE 8 15387
## TRUE 12 401222</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(~<span class="st"> </span>parents_might_have_married_in_quebec +<span class="st"> </span>might_have_died_in_quebec +<span class="st"> </span>known_to_be_born_in_quebec, <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## , , known_to_be_born_in_quebec = FALSE
##
## might_have_died_in_quebec
## parents_might_have_married_in_quebec FALSE TRUE
## FALSE 0 106
## TRUE 19 42837
##
## , , known_to_be_born_in_quebec = TRUE
##
## might_have_died_in_quebec
## parents_might_have_married_in_quebec FALSE TRUE
## FALSE 0 196
## TRUE 20 416413</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa =<span class="st"> </span><span class="kw">data.frame</span>(rpqa)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children_born_in_quebec'</span>,<span class="dt">wt_var =</span> <span class="st">'known_to_be_born_in_quebec'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children_died_in_quebec'</span>,<span class="dt">wt_var =</span> <span class="st">'known_to_have_died_in_quebec'</span>)
rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'children_probably_died_in_quebec'</span>,<span class="dt">wt_var =</span> <span class="st">'might_have_died_in_quebec'</span>)
rpqa$all_kids_born_and_died_in_quebec =<span class="st"> </span>rpqa$children_born_in_quebec ==<span class="st"> </span>rpqa$children_died_in_quebec &<span class="st"> </span>rpqa$children_born_in_quebec ==<span class="st"> </span>rpqa$children
rpqa$all_kids_born_and_probably_died_in_quebec =<span class="st"> </span>rpqa$children_born_in_quebec ==<span class="st"> </span>rpqa$children_probably_died_in_quebec &<span class="st"> </span>rpqa$children_born_in_quebec ==<span class="st"> </span>rpqa$children
<span class="kw">crosstabs</span>(~<span class="st"> </span>all_kids_born_and_died_in_quebec +<span class="st"> </span>all_kids_born_and_probably_died_in_quebec, <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## all_kids_born_and_probably_died_in_quebec
## all_kids_born_and_died_in_quebec FALSE TRUE
## FALSE 29204 89926
## TRUE 0 340461</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(~<span class="st"> </span>all_kids_born_and_probably_died_in_quebec +<span class="st"> </span>parents_known_to_have_married_in_quebec, <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## parents_known_to_have_married_in_quebec
## all_kids_born_and_probably_died_in_quebec FALSE TRUE
## FALSE 18659 10545
## TRUE 31745 398642</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(rpqa$all_kids_born_and_died_in_quebec &<span class="st"> </span>rpqa$parents_known_to_have_married_in_quebec)</code></pre></div>
<pre><code>## rpqa$all_kids_born_and_died_in_quebec & rpqa$parents_known_to_have_married_in_quebec
## FALSE TRUE
## 140295 319296</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(rpqa$all_kids_born_and_probably_died_in_quebec &<span class="st"> </span>rpqa$parents_known_to_have_married_in_quebec)</code></pre></div>
<pre><code>## rpqa$all_kids_born_and_probably_died_in_quebec & rpqa$parents_known_to_have_married_in_quebec
## FALSE TRUE
## 60949 398642</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$sibs_born_in_quebec =<span class="st"> </span><span class="kw">ave</span>(rpqa$known_to_be_born_in_quebec, rpqa$idParents, <span class="dt">FUN =</span> function(x) { <span class="kw">sum</span>(x, <span class="dt">na.rm =</span> T) })
rpqa$sibs_died_in_quebec =<span class="st"> </span><span class="kw">ave</span>(rpqa$might_have_died_in_quebec, rpqa$idParents, <span class="dt">FUN =</span> function(x) { <span class="kw">sum</span>(x, <span class="dt">na.rm =</span> T) })
<span class="kw">crosstabs</span>(<span class="kw">I</span>(rpqa$sibs_born_in_quebec -<span class="st"> </span>rpqa$sibs_died_in_quebec))</code></pre></div>
<pre><code>## I(rpqa$sibs_born_in_quebec - rpqa$sibs_died_in_quebec)
## -9 -8 -7 -6 -5 -4 -3 -2 -1 0
## 12 11 88 245 445 1029 2867 10092 74394 370282
## 1
## 126</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">qplot</span>(rpqa$sibs_born_in_quebec -<span class="st"> </span>rpqa$sibs_died_in_quebec, <span class="dt">binwidth =</span> <span class="dv">1</span>)</code></pre></div>
<p><img src="0_rpqa_massage_files/figure-html/sbset-1.png" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa =<span class="st"> </span><span class="kw">count_and_merge</span>(rpqa, <span class="st">'all_sibs_born_and_died_in_quebec'</span>,<span class="dt">wt_var =</span> <span class="st">'all_kids_born_and_died_in_quebec'</span>)
<span class="kw">crosstabs</span>(~<span class="st"> </span>all_sibs_born_and_died_in_quebec +<span class="st"> </span>parents_known_to_have_married_in_quebec, <span class="dt">data =</span> rpqa)</code></pre></div>
<pre><code>## parents_known_to_have_married_in_quebec
## all_sibs_born_and_died_in_quebec FALSE TRUE
## 0 29424 308036
## 1 9947 10995
## 2 2196 10535
## 3 1707 10675
## 4 1486 10456
## 5 1235 10169
## 6 1169 9289
## 7 913 8413
## 8 702 7475
## 9 519 6310
## 10 394 4895
## 11 254 4018
## 12 174 2794
## 13 81 1878
## 14 83 1287
## 15 39 847
## 16 29 479
## 17 21 314
## 18 11 151
## 19 8 72
## 20 6 42
## 21 2 27
## 22 2 19
## 23 1 3
## 24 0 3
## 25 0 2
## 27 1 2
## 29 0 1</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$probably_complete_records =<span class="st"> </span>rpqa$all_kids_born_and_probably_died_in_quebec &<span class="st"> </span>rpqa$parents_might_have_married_in_quebec
<span class="kw">table</span>(rpqa$probably_complete_records)</code></pre></div>
<pre><code>##
## FALSE TRUE
## 29493 430098</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rpqa$birth_cohort =<span class="st"> </span><span class="kw">factor</span>(rpqa$birth.cohort)
rpqa$male =<span class="st"> </span><span class="kw">factor</span>(rpqa$male)
rpqa$last_born =<span class="st"> </span><span class="kw">factor</span>(rpqa$last_born)
rpqa<span class="fl">.1</span> =<span class="st"> </span>rpqa %>%<span class="st"> </span><span class="kw">filter</span>(byear <<span class="st"> </span><span class="dv">1740</span> &<span class="st"> </span>byear >=<span class="st"> </span><span class="dv">1670</span> &<span class="st"> </span>probably_complete_records)
<span class="kw">save</span>(rpqa, rpqa<span class="fl">.1</span>,<span class="dt">file =</span> <span class="st">"rpqa.rdata"</span>)</code></pre></div>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
$(document).ready(function () {
$('tr.header').parent('thead').parent('table').addClass('table table-condensed');
});
</script>
</body>
</html>