-
Notifications
You must be signed in to change notification settings - Fork 0
/
process_json.Rmd
93 lines (79 loc) · 1.73 KB
/
process_json.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
```{r}
library(tidyverse)
library(jsonlite)
library(lubridate)
```
```{r}
f <- file("data/epi.json", "r")
lines <- readLines(f)
close(f)
```
```{r}
process_field <- function(line, field) {
f <- ensym(field)
obj <- fromJSON(line)
if (length(obj[[rlang::as_string(f)]]) == 0) {
tibble()
} else {
obj %>%
with(tibble(id = url, !!f := !!f))
}
}
ingr <- lines %>%
map(process_field, recipeIngredient) %>%
bind_rows() %>%
distinct() %>%
rename(ingredients = recipeIngredient) %>%
filter(
!str_detect(ingredients, "^\\s*$"),
!str_detect(ingredients, "^\\*")
) %>%
mutate(
ingredients = ingredients %>%
str_to_lower() %>%
str_replace_all("\n", " ") %>%
stringi::stri_trans_general("latin-ascii") %>%
str_remove(".*: "),
id = str_match(id, "/([^/]+)$")[, 2]
)
f <- file("outputs/ingr", "w")
writeLines(ingr$ingredients, f)
close(f)
```
```{r}
# parsed.json comes from running https://github.com/mtlynch/ingredient-phrase-tagger
# on the ingr output
ingr2 <- fromJSON("outputs/parsed.json")
ingr %>%
mutate(base = ingr2$name) %>%
write_csv("outputs/ingredients.csv")
```
```{r}
get_facts <- function(l) {
obj <- fromJSON(l)
tibble(
id = obj$url,
name = obj$name,
date = obj$datePublished,
rating = obj$aggregateRating$ratingValue,
count = obj$aggregateRating$ratingCount,
author = paste(obj$author$name, collapse = ";"),
tags = paste(obj$keywords, collapse = ";")
)
}
facts <- lines %>%
map(get_facts) %>%
bind_rows() %>%
distinct() %>%
mutate(
id = str_match(id, "/([^/]+)$")[, 2],
date = ymd(str_sub(date, start = 1, end = 10))
)
facts %>%
write_csv("outputs/facts.csv")
```
```{r}
lines[[1]] %>%
fromJSON() %>%
str()
```