-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathA_compiler.py
198 lines (167 loc) · 7.76 KB
/
A_compiler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# -*- coding: utf-8 -*-
"""
Code to the paper "Rock mass structure characterization considering finite and
folded discontinuities"
Dr. Georg H. Erharter - 2023
DOI: XXXXXXXXXXX
Script that compiles the recorded data from samples of the discrete
discontinuity networks and creates one excel file for further processing.
"""
import numpy as np
from os import listdir, remove
import pandas as pd
from X_library import parameters
params = parameters()
pd.options.mode.chained_assignment = None
##########################################
# load text files with geometry parameters as outputted by grasshopper
# clean files in case grasshopper produced inclomplete files
all_files = list(listdir(r'../combinations'))
for file_name in all_files:
id_ = file_name[:12]
values = f'{id_}.txt'
mesh = f'{id_}_discontinuities.stl'
boxes = f'{id_}_boxcount.txt'
if values in all_files and mesh in all_files:
pass
else:
print('delete incomplete files')
for f in [fr'../combinations/{values}', fr'../combinations/{mesh}',
fr'../combinations/{boxes}']:
try:
remove(f)
except FileNotFoundError:
pass
# load data from text files to a pd dataframe
contents = []
for file_name in listdir(r'../combinations'):
if '_box' in file_name or 'FAIL' in file_name or '_Rast' in file_name:
pass
else:
if '.txt' in file_name:
f = open(fr'../combinations/{file_name}', 'r')
content = f.read()
content = content.replace('(', '')
content = content.replace(')', '')
content = content.replace(' ', '')
content = content.replace('L', '')
content = [eval(num) for num in content.split(',')]
contents.append(content)
f.close()
inputs = ['bounding box size [m]', 'Jv boxes edge size [m]', 'seed',
'set 1 - n joints',
'set 1 - radius [m]', 'set 1 - radius std [m]',
'set 1 - dip direction [°]', 'set 1 - dip direction std [°]',
'set 1 - dip [°]', 'set 1 - dip std [°]', 'set 1 - type',
'F_rand_sin', 'F_rand_n_planes', 'F_rand_angle', 'F_rand_axis_x',
'F_rand_axis_y', 'F_rand_axis_z',
'set 2 - n joints', 'set 2 - radius [m]', 'set 2 - radius std [m]',
'set 2 - dip direction [°]', 'set 2 - dip direction std [°]',
'set 2 - dip [°]', 'set 2 - dip std [°]',
'set 3 - n joints',
'set 3 - radius [m]', 'set 3 - radius std [m]',
'set 3 - dip direction [°]', 'set 3 - dip direction std [°]',
'set 3 - dip [°]', 'set 3 - dip std [°]',
'random set - n joints', 'random set - radius [m]',
'random set - radius std [m]',
'identifier']
outputs = ['meas. spacing set 1 [m]', 'meas. spacing set 2 [m]',
'meas. spacing set 3 [m]', 'RQD Y', 'RQD X', 'RQD Z',
'apparent spacing Y [m]', 'apparent spacing X [m]',
'apparent spacing Z [m]',
'P10 Y', 'P10 X', 'P10 Z', 'P20 X', 'P21 X', 'P20 Y', 'P21 Y',
'P20 Z', 'P21 Z', 'Jv measured [discs/m³]', 'P32',
'n blocks', 'avg. block volume [m³]',
'max block volume [m³]', 'min block volume [m³]',
'avg. block edge length [m]',
'avg. block surface area [m²]', 'a3', 'a2', 'a1',
'set 1 total area [m2]', 'set 2 total area [m2]',
'set 3 total area [m2]', 'random set total area [m2]']
columns = inputs + outputs
df = pd.DataFrame(columns=columns, data=np.array(contents))
# # remove samples where one of the discontinuity sets was not stored
# for i in range(len(df)):
# for set_ in [2, 3]:
# if df.iloc[i]['set 1 - type'] == 1 and pd.isnull(df.iloc[i][f'meas. spacing set {set_} [m]']) is True:
# print(f'{list(df.index)[i]} set: {set_}')
# remove unused input data from discontinuity set 1 which is either a set of
# planar, finite discontinuities or folded discontinuities
id_plane = np.where(df['set 1 - type'] == 0)[0]
id_folds = np.where(df['set 1 - type'] == 1)[0]
df.loc[id_folds, ['set 1 - n joints', 'set 1 - radius [m]',
'set 1 - radius std [m]', 'set 1 - dip direction [°]',
'set 1 - dip direction std [°]', 'set 1 - dip [°]',
'set 1 - dip std [°]']] = np.nan
df.loc[id_plane, ['F_rand_sin', 'F_rand_n_planes', 'F_rand_angle',
'F_rand_axis_x', 'F_rand_axis_y', 'F_rand_axis_z']] = np.nan
df.set_index('identifier', inplace=True)
print('data frame set up')
# set up empty columns for later population
for col in ['Minkowski dimension', 'similarity n zeros', 'similarity max',
'similarity min', 'similarity mean', 'similarity median',
'structural complexity']:
df[col] = np.nan
##########################################
# add fractal dimensions where they are computed
for sample in df.index:
try:
df_boxcount = pd.read_csv(fr'../combinations/{sample}_boxcount.txt')
df['Minkowski dimension'].loc[sample] = params.Minkowski(
df_boxcount['n boxes'], df_boxcount['box edge length [m]'])
if np.isnan(df['Minkowski dimension'].loc[sample]) == True:
raise ValueError(f'{sample} has no computed boxes')
except FileNotFoundError:
pass
n_processed = len(df) - df['Minkowski dimension'].isna().sum()
print(f'{n_processed} / {len(df)} fractal dimensions computed')
##########################################
# add similarity and structural complexity information where it is computed
for sample in df.index:
try:
with open(fr'../combinations/{sample}_RasterAnalysis.txt', 'r') as f:
content = []
for v in f.read().split(','):
if v == 'nan' or v == "nan":
content.append(np.nan)
else:
content.append(eval(v))
for i, col in enumerate(['similarity n zeros', 'similarity max',
'similarity min', 'similarity mean',
'similarity median',
'structural complexity']):
if col == 'structural complexity' and content[i] == 0:
remove(fr'../combinations/{sample}_RasterAnalysis.txt')
else:
df[col].loc[sample] = content[i]
except FileNotFoundError:
pass
n_processed = len(df) - df['structural complexity'].isna().sum()
print(f'{n_processed} / {len(df)} similarities computed')
##########################################
# some basic data preprocessing
# convert object dtypes to float
for param in df.columns:
df[param] = pd.to_numeric(df[param])
# replace joint properties for joint sets that have no joints
for joint_set in [1, 2, 3, 4]:
if joint_set == 4:
id_0 = np.where(df['random set - n joints'] == 0)[0]
df['random set - radius [m]'].iloc[id_0] = np.nan
else:
id_0 = np.where(df[f'set {joint_set} - n joints'] == 0)[0]
df[f'set {joint_set} - radius [m]'].iloc[id_0] = np.nan
df[f'set {joint_set} - dip [°]'].iloc[id_0] = np.nan
df[f'set {joint_set} - dip direction [°]'].iloc[id_0] = np.nan
# idx_neg_vol = np.where(df['avg. block volume [m³]'] < 0)[0]
# df = df.drop(index=idx_neg_vol)
# drop all columns of features that are still under development such as block
# volumes or fractal dimensions
df.drop(['n blocks', 'avg. block volume [m³]', 'max block volume [m³]',
'min block volume [m³]', 'avg. block edge length [m]',
'avg. block surface area [m²]', 'a3', 'a2', 'a1',
'similarity n zeros', 'similarity max', 'similarity min',
'similarity mean', 'similarity median', 'structural complexity'],
axis=1, inplace=True)
# save to excel file
df.to_excel(r'../output/PDD1_0.xlsx')
print('data compiled')