forked from mrverhoeven/CASC_Fish_Climate
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMI_Flat_File_April23.Rmd
93 lines (69 loc) · 3.27 KB
/
MI_Flat_File_April23.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
---
title: "MI_Flat_File_April23"
author: "Holly Kundel"
date: "`r Sys.Date()`"
output: html_document
---
# Michigan Flat File Work
- able to read in all files and rename columns 4/3/23
## Load libraries
```{r, warning = FALSE}
library(readr)
library(readxl)
library(dplyr)
library(stringr)
library(arrow)
library(data.table)
library(googledrive)
library(janitor)
library(tidyr)
library(data.table)
library(tidyr)
```
#Testing Mike's WI code on MI data
```{r}
# find files on Google Drive Desktop
MI_files_list <- list.files(path = "D:/Shared drives/Hansen Lab/RESEARCH PROJECTS/Fish Survey Data/MI_Data/mi_raw_disaggregated_data", pattern = ".+\\.csv") #grabs only.csv files
MI_files_list #check that file names look correct
n <- length(MI_files_list)
for(i in 1:n) {
#i = 1
assign(gsub(".csv","", MI_files_list[i]),
data.table(read_csv_arrow(paste0("D:/Shared drives/Hansen Lab/RESEARCH PROJECTS/Fish Survey Data/MI_Data/mi_raw_disaggregated_data/",
MI_files_list[i]))))
#consider moving renaming into here!
#this does those two steps in one package
# note we want to review a sorted list of column names to check misspelling etc.
cde %>% # call data explainer file
filter(`new_file_name`== gsub(".csv","", MI_files_list)[i])%>% #keep only the row relevant to this file
select_if(~ !any(is.na(.))) %>%
transpose(keep.names = "newname") %>%
rename("oldname" = V1) %>%
assign("names", ., envir = .GlobalEnv)
#see if any column names will not have a match!
# IF any pop FALSE, force stop and revist of data explainer ()
# - e.g., named something "total catch" when actual column name was "total_catch"
print(
cbind(colnames(get(gsub(".csv","", MI_files_list)[i])),
colnames(get(gsub(".csv","", MI_files_list)[i])) %in% names[ !str_detect(newname,"unique_row_key"), oldname, ]
)
)
# break the loop if the current file has column names not in the data explainer
if (all(cbind(colnames(get(gsub(".csv","", MI_files_list)[i])), colnames(get(gsub(".csv","", MI_files_list)[i])) %in% names[ !str_detect(newname,"unique_row_key"), oldname, ])[,2]) == FALSE ) break
#now rename that file's colnames
setnames(get(gsub(".csv","", MI_files_list)[i]), colnames(get(gsub(".csv","", MI_files_list)[i])), names[!str_detect(newname,"unique_row_key")] [match(names(get(gsub(".csv","", MI_files_list)[i])),names[!str_detect(newname,"unique_row_key"),oldname]), newname] )
#confirm import of files:
print(paste(gsub(".csv","", MI_files_list)[i] ,"added to workspace" ))
}
```
Minor fixes to two of the files
```{r}
MI_catch_length <- read_csv("G:/Shared drives/Hansen Lab/RESEARCH PROJECTS/Fish Survey Data/MI_Data/mi_raw_disaggregated_data/mi_statustrends_catchlengthclass_16June2022.csv")
MI_catch_length_fixed <- MI_catch_length %>%
select(2:7) #removes first column which is unnamed, and just contains row numbers
write_csv(MI_catch_length_fixed, "mi_statustrends_catchlengthclass_16June2022.csv")
MI_effort <- read_csv("G:/Shared drives/Hansen Lab/RESEARCH PROJECTS/Fish Survey Data/MI_Data/mi_raw_disaggregated_data/mi_statustrends_effort_16Mar2021.csv")
MI_effort_fixed <- MI_effort %>%
select(2:14) #removes first column which is unnamed, and just contains row numbers
write_csv(MI_effort_fixed, "mi_statustrends_effort_16Mar2021.csv")
```