-
Notifications
You must be signed in to change notification settings - Fork 0
/
DBGI_directus_import.py
executable file
·160 lines (144 loc) · 9.95 KB
/
DBGI_directus_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
from dotenv import load_dotenv
import requests
import pandas as pd
load_dotenv()
# import env variable
directus_email = os.getenv('directus_email')
directus_password = os.getenv('directus_password')
# Define the Directus table url
base_url = 'http://directus.dbgi.org'
# Define the login endpoint URL
login_url = base_url + '/auth/login'
# Create a session object for making requests
session = requests.Session()
# Send a POST request to the login endpoint
response = session.post(login_url, json={'email': directus_email, 'password': directus_password})
data = response.json()['data']
directus_access_token = data['access_token']
collection_url = base_url + '/items/Qfield_Data/'
collection_url_obs = base_url + '/items/QField_Data_Obs/'
session.headers.update({'Authorization': f'Bearer {directus_access_token}'})
#Add headers
headers = {
'Content-Type': 'application/json'
}
out_csv_path = os.getenv('out_csv_path')
# Iterate over all CSV files in the input folder and its subdirectories
for root, dirs, files in os.walk(out_csv_path):
for filename in files:
# Ignore old layer without sample_id
if filename.endswith('.csv') and filename != "SBL_20004_2022_EPSG:4326.csv":
# Read each df
constructed_path = root + "/" + filename
df = pd.read_csv(constructed_path)
if filename.endswith("_obs_EPSG:4326.csv"):
treated_df = df.loc[:, ['collector_fullname', 'inat_upload', 'taxon_name', 'latitude', 'longitude', 'ipen', 'no_name_on_list', 'name_proposition', 'herbivory_(percent)', 'comment_eco', 'soil_type', 'weather', 'temperature_(°C)', 'comment_env', 'collector_orcid', 'collector_inat', 'date', 'is_wild']]
# Replace NAs by nothing, otherwise directus raises an error
treated_df.fillna('', inplace=True)
# Send each row individually to directus
for index, row in treated_df.iterrows():
# Store correct field_sample_name, depending if user entered it in sample_name or name_proposition
if row["no_name_on_list"] != 1 or row["no_name_on_list"] != 1.0:
field_sample_name = row["taxon_name"]
else:
field_sample_name = row["name_proposition"]
if row["is_wild"] == 1:
is_wild = True
else:
is_wild = False
if row["inat_upload"] == 1:
inat_upload = True
else:
inat_upload = False
# Create json for data import
data = {'latitude': str(row["latitude"]),
'field_sample_name': field_sample_name,
'longitude': row["longitude"],
'ipen': row["ipen"],
'herbivory_percent': row["herbivory_(percent)"],
'comment_eco': row["comment_eco"],
'soil_type': row["soil_type"],
'weather': row["weather"],
'temperature_celsius': row["temperature_(°C)"],
'comment_env': row["comment_env"],
'inat_upload': inat_upload,
'collector_fullname': row["collector_fullname"],
'collector_orcid': row["collector_orcid"],
'collector_inat': row["collector_inat"],
'collection_date': row["date"],
'is_wild': is_wild}
# Request
response = session.post(url=collection_url_obs, headers=headers, json=data)
# Check if success, if not try modifying the data to update already existing observations
if response.status_code != 200:
# Modify url to target the correct observation
collection_url_patch = collection_url_obs + str(row["latitude"])
# Request
response = session.patch(url=collection_url_patch, headers=headers, json=data)
# If still not success response, print informations on the sample.
# Should be replaced by an other directus request to have a track of unsuccessful import elsewhere than in the log.
if response.status_code != 200:
print(field_sample_name)
print(response.status_code)
#print(response.json())
else:
# Homogeneize data for directus import
treated_df = df.loc[:, ['collector_fullname', 'observation_subject', 'inat_upload', 'sample_id', 'taxon_name', 'latitude', 'longitude', 'ipen', 'no_name_on_list', 'name_proposition', 'herbivory_(percent)', 'comment_eco', 'soil_type', 'weather', 'temperature_(°C)', 'comment_env', 'date', 'collector_orcid', 'collector_inat', 'is_wild']]
treated_df.rename(columns={'sample_id':'field_sample_id'}, inplace=True)
# Remove possible whitespaces
treated_df["field_sample_id"] = treated_df["field_sample_id"].str.strip()
# Replace NAs by nothing, otherwise directus raises an error
treated_df.fillna('', inplace=True)
# Send each row individually to directus
for index, row in treated_df.iterrows():
# Check that sample_id is not null
if row["field_sample_id"] != '':
# Store correct field_sample_name, depending if user entered it in sample_name or name_proposition
if row["no_name_on_list"] != 1 or row["no_name_on_list"] != 1.0:
field_sample_name = row["taxon_name"]
else:
field_sample_name = row["name_proposition"]
if row["is_wild"] == 1:
is_wild = True
else:
is_wild = False
if row["inat_upload"] == 1:
inat_upload = True
else:
inat_upload = False
# Create json for data import
data = {'field_sample_id_pk': row["field_sample_id"],
'field_sample_id_fk': row["field_sample_id"],
'field_sample_name': field_sample_name,
'latitude': row["latitude"],
'longitude': row["longitude"],
'ipen': row["ipen"],
'herbivory_percent': row["herbivory_(percent)"],
'comment_eco': row["comment_eco"],
'soil_type': row["soil_type"],
'weather': row["weather"],
'temperature_celsius': row["temperature_(°C)"],
'comment_env': row["comment_env"],
'inat_upload': inat_upload,
'collector_fullname': row["collector_fullname"],
'collector_orcid': row["collector_orcid"],
'collector_inat': row["collector_inat"],
'collection_date': row["date"],
'is_wild': is_wild}
# Request
response = session.post(url=collection_url, headers=headers, json=data)
# Check if success, if not try modifying the data to update already existing observations
if response.status_code != 200:
# Modify url to target the correct observation
collection_url_patch = collection_url + row["field_sample_id"]
# Request
response = session.patch(url=collection_url_patch, headers=headers, json=data)
# If still not success response, print informations on the sample.
# Should be replaced by an other directus request to have a track of unsuccessful import elsewhere than in the log.
if response.status_code != 200:
print(row["field_sample_id"])
print(response.status_code)
#print(response.json())
else:
print("no field sample id")