-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeldman.py
566 lines (473 loc) · 28.3 KB
/
feldman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
'''
Logic and supporting f'ns to convert a section summary and sparse splice
to an affine and splice interval table (SIT), and to export measurement
data based on an affine and SIT.
'''
from datetime import date, datetime
import logging as log
import os
import unittest
import numpy
import pandas
import coring.identity as ci
import coring.affine as aff
import coring.spliceInterval as si
import coring.measurement as meas
from coring.sectionSummary import SectionSummary
from coring.sparseSplice import SparseSplice
from coring.manualCorrelation import ManualCorrelationTable, loadManualCorrelation
from tabular.csvio import writeToCSV, FormatError
import tabular.pandasutils as PU
FeldmanVersion = "1.0.4"
OutputVocabulary = 'IODP'
ProgressListener = None
def setProgressListener(pl):
global ProgressListener
ProgressListener = pl
ProgressListener.clear()
def reportProgress(value, text):
global ProgressListener
if ProgressListener:
ProgressListener.setValueAndText(value, text)
# pandas call to open Correlator's inexplicable " \t" delimited file formats
def openCorrelatorFunkyFormatFile(filename):
datfile = open(filename, 'rU')
headers = ["Exp", "Site", "Hole", "Core", "CoreType", "Section", "TopOffset", "BottomOffset", "Depth", "Data", "RunNo"]
_df = pandas.read_csv(datfile, header=None, names=headers, sep=" \t", skipinitialspace=True, comment="#", engine='python')
datfile.close()
#print df.dtypes
# df can now be written to a normal CSV
# get total depth of a section offset using SectionSummary data and curated lengths if available
def getOffsetDepth(secsumm, site, hole, core, section, offset, scaledDepth=False):
secTop = secsumm.getSectionTop(site, hole, core, section) if not scaledDepth else secsumm.getScaledSectionTop(site, hole, core, section)
secBot = secsumm.getSectionBot(site, hole, core, section) if not scaledDepth else secsumm.getScaledSectionBot(site, hole, core, section)
scaledTxt = "scaled " if scaledDepth else ""
sectionId = "{}{}-{}-{}".format(site, hole, core, section)
log.debug(" {}section: {}, top = {}m, bot = {}m".format(scaledTxt, sectionId, secTop, secBot))
log.debug(" {}section offset = {}cm + {}m = {}m".format(scaledTxt, offset, secTop, secTop + offset/100.0))
curatedLength = secsumm.getSectionLength(site, hole, core, section)
if offset/100.0 > curatedLength:
log.warning(" section {}: offset {}cm is beyond curated length of section {}m".format(sectionId, offset, curatedLength))
depth = secTop + (offset/100.0) - (secsumm.getTotalGapAboveSectionDepth(site, hole, core, section, offset)/100.0)
# if using scaled depths, compress depth to drilled interval
drilledLength = (secBot - secTop) * 100.0 # cm
if scaledDepth and curatedLength > drilledLength:
compressionFactor = drilledLength / curatedLength
compressedDepth = secTop + (offset/100.0 * compressionFactor)
log.warning(" section {}: curated length {}cm exceeds drilled length {}cm, compressing depth {}m to {}m".format(sectionId, curatedLength, drilledLength, depth, compressedDepth))
depth = compressedDepth
return depth
# Check a Section column's values: return True if all values are
# are either integers or the string 'CC', otherwise False
def validSectionColumn(df, colname):
try:
for val in df[colname]:
# this works for strings e.g. int("124") = 124 and ints e.g. int(124) == 124
# but throws a ValueError for non-integer strings and non-integers
if val != 'CC':
int(val)
except ValueError:
return False
except:
return False
return True
# options: LazyAppend, UseScaledDepths, Manual Correlation File
def convertSparseSplice(secSummPath, sparsePath, affineOutPath, sitOutPath, useScaledDepths=False, lazyAppend=False, manualCorrelationPath=None):
log.info("--- Converting Sparse Splice to Affine and SIT ---")
log.info("{}".format(datetime.now()))
log.info("Using Section Summary {}".format(secSummPath))
log.info("Using Sparse Splice {}".format(sparsePath))
log.info("Options: Use Scaled Depths = {}, Lazy Append = {}, Manual Correlation File = {}".format(useScaledDepths, lazyAppend, manualCorrelationPath))
log.info("Using {} output vocabulary".format(OutputVocabulary))
ss = SectionSummary.createWithFile(secSummPath)
sp = SparseSplice.createWithFile(sparsePath)
# validate that all Section columns contain only integers and 'CC'
for secCol in ['TopSection', 'BottomSection']:
if not validSectionColumn(sp.dataframe, secCol):
raise FormatError("{} column in Sparse Splice contains one or more non-integer values.".format(secCol))
if not validSectionColumn(ss.dataframe, 'Section'):
raise FormatError("Section column in Section Summary contains one or more non-integer values.")
onSpliceAffRows = sparseSpliceToSIT(sp, ss, sitOutPath, useScaledDepths, lazyAppend)
# load just-created SIT and find affines for off-splice cores
sit = si.SpliceIntervalTable.createWithFile(sitOutPath)
mancorr = loadManualCorrelation(manualCorrelationPath) if manualCorrelationPath else None
if mancorr:
print(mancorr.df.dtypes)
elif manualCorrelationPath: # manual correlation was provided by user but couldn't be loaded
errstr = "The manual correlation file {} could not be loaded.".format(manualCorrelationPath)
log.error(errstr)
raise FormatError(errstr)
offSpliceAffRows = gatherOffSpliceAffines(sit, ss, mancorr)
allAff = onSpliceAffRows + offSpliceAffRows
allAff = fillAffineRows(allAff)
arDicts = [ar.asDict() for ar in allAff]
reportProgress(100, "Writing affine and SIT to file...")
affDF = pandas.DataFrame(arDicts, columns=aff.AffineFormat.getColumnNames())
log.info("writing affine table to {}".format(os.path.abspath(affineOutPath)))
log.debug("affine table column types:\n{}".format(affDF.dtypes))
roundValues(affDF, aff.AffineFormat)
prettyColumns(affDF, aff.AffineFormat)
writeToCSV(affDF, affineOutPath)
log.info("Conversion complete.")
# Generates an affine table and SIT from provided SectionSummary and SparseSplice.
# parameters:
# - sparse: input SparseSplice
# - secsumm: input SectionSummary
# - sitOutPath: path to which generated SIT will be written
# - useScaledDepths: convert section depths to total depth using ScaledTopDepth and ScaledBottomDepth
# in SectionSummary instead of (unscaled) TopDepth and BottomDepth
# - lazyAppend: use previous core's affine shift even if it's from a different hole
def sparseSpliceToSIT(sparse, secsumm, sitOutPath, useScaledDepths=False, lazyAppend=False):
seenCores = [] # list of cores that have already been added to affine
affineRows = [] # list of dicts, each representing a generated affine table row
topCSFs = []
topCCSFs = []
botCSFs = []
botCCSFs = []
prevAffine = 0.0 # previous affine shift (used for APPEND shift)
prevBotCCSF = None
prevRow = {} # previous interval's row, data needed for inter-hole default APPEND gap method
sptype = None
gap = None
rowsToProcess = len(sparse.dataframe)
for index, row in sparse.dataframe.iterrows():
reportProgress(float(index) / rowsToProcess * 50, "Processing sparse splice interval {}...".format(index + 1))
log.debug("Interval {}".format(index + 1))
site = row['Site']
hole = row['Hole']
core = row['Core']
top = row['TopSection']
topOff = row['TopOffset']
log.debug("top section = {}, top offset = {}".format(top, topOff))
shiftTop = secsumm.getOffsetDepth(site, hole, core, top, topOff, useScaledDepths)
bot = row['BottomSection']
botOff = row['BottomOffset']
log.debug("bottom section = {}, bottom offset = {}".format(bot, botOff))
shiftBot = secsumm.getOffsetDepth(site, hole, core, bot, botOff, useScaledDepths)
# bail on inverted or zero-length intervals
if shiftTop >= shiftBot:
log.error("Interval is inverted or zero-length: computed top depth {} >= computed bottom depth {}".format(shiftTop, shiftBot))
return
affine = 0.0
if sptype is None: # first row - unconcerned about splice type now, it will affect next row of data
affine = 0.0
log.debug("First interval, splice type irrelevant")
elif sptype == "APPEND":
if gap is not None: # user-specified gap
gapEndDepth = prevBotCCSF + gap
affine = gapEndDepth - shiftTop
log.debug("User specified gap of {}m between previous bottom ({}m) and current top ({}m), affine = {}m".format(gap, prevBotCCSF, shiftTop, affine))
else: # default gap
assert len(prevRow) > 0
if hole == prevRow['Hole'] or lazyAppend: # hole hasn't changed, use same affine shift
affine = prevAffine
log.debug("APPENDing {} at depth {} based on previous affine {}".format(shiftTop, shiftTop + affine, affine))
else: # different hole, use scaled depths to determine gap
prevBotScaledDepth = secsumm.getOffsetDepth(prevRow['Site'], prevRow['Hole'], prevRow['Core'],
prevRow['BottomSection'], prevRow['BottomOffset'], scaledDepth=True)
topScaledDepth = secsumm.getOffsetDepth(site, hole, core, top, topOff, scaledDepth=True)
scaledGap = topScaledDepth - prevBotScaledDepth
if scaledGap < 0.0:
log.warning("Bottom of previous interval is {}m *above* top of next interval in CSF-B space".format(scaledGap))
affine = (prevBotCCSF - shiftTop) + scaledGap
log.debug("Inter-hole APPENDing {} at depth {} to preserve scaled (CSF-B) gap of {}m".format(shiftTop, shiftTop + affine, scaledGap))
elif sptype == "TIE":
# affine = difference between prev bottom MCD and MBLF of current top
affine = prevBotCCSF - shiftTop
log.debug("TIEing {} to previous bottom depth {}, affine shift of {}".format(shiftTop, prevBotCCSF, affine))
else:
log.error("Encountered unknown splice type {}, bailing out!".format(sptype))
return
if prevBotCCSF is not None and prevBotCCSF > shiftTop + affine:
log.warning("previous interval bottom MCD {} is below current interval top MCD {}".format(prevBotCCSF, shiftTop + affine))
# increase affine to prevent overlap in case of APPEND - this should never happen for a TIE
if sptype == "APPEND":
overlap = prevBotCCSF - (shiftTop + affine)
affine += overlap
log.warning("interval type APPEND, adjusting affine to {}m to avoid {}m overlap".format(affine, overlap))
# create data for corresponding affine - growth rate and differential offset will be filled by fillAffineRows()
coreid = str(site) + str(hole) + "-" + str(core)
if coreid not in seenCores:
seenCores.append(coreid)
coreTop = secsumm.getCoreTop(site, hole, core) # use core's top for depths in affine table, not depth of TIE in splice
affineShiftType = _spliceShiftToAffine(sptype, gap)
fixedCore = prevRow['Hole'] + prevRow['Core'] if sptype == "TIE" else ""
fixedTieCsf = botCSFs[-1] if sptype == "TIE" else numpy.NaN
shiftedTieCsf = shiftTop if sptype == "TIE" else numpy.NaN
affineRow = aff.AffineRow(site, hole, core, row['Tool'], coreTop, coreTop + affine, affine, shiftType=affineShiftType,
fixedCore=fixedCore, fixedTieCsf=fixedTieCsf, shiftedTieCsf=shiftedTieCsf, comment="splice")
affineRows.append(affineRow)
else:
log.error("holecore {} already seen, ignoring".format(coreid))
# create new column data
topCSFs.append(shiftTop)
topCCSFs.append(shiftTop + affine)
botCSFs.append(shiftBot)
botCCSFs.append(shiftBot + affine)
log.debug("shifted top = {}m, bottom = {}m".format(shiftTop + affine, shiftBot + affine))
prevBotCCSF = shiftBot + affine
prevAffine = affine
prevRow = row
# warnings
if shiftTop >= shiftBot:
log.warning("{}: interval top {} at or below interval bottom {} in MBLF".format(coreid, shiftTop, shiftBot))
# track splice type and (optional) gap, used to determine the next interval's depths
sptype = str.upper(row['SpliceType'])
gap = row['Gap'] if not numpy.isnan(row['Gap']) else None
# done parsing, create final dataframe for export
sitDF = sparse.dataframe.copy()
PU.insertColumns(sitDF, 6, [(si.TopDepthCSF.name, pandas.Series(topCSFs)), (si.TopDepthCCSF.name, pandas.Series(topCCSFs))])
PU.insertColumns(sitDF, 10, [(si.BottomDepthCSF.name, pandas.Series(botCSFs)), (si.BottomDepthCCSF.name, pandas.Series(botCCSFs))])
log.info("writing splice interval table to {}".format(os.path.abspath(sitOutPath)))
log.debug("splice interval table column types:{}".format(sitDF.dtypes))
roundValues(sitDF, si.SITFormat)
prettyColumns(sitDF, si.SITFormat)
writeToCSV(sitDF, sitOutPath)
return affineRows
# attempt to map the shift type from the sparse splice to a valid affine shift type
def _spliceShiftToAffine(spliceShift, gap):
affineShiftType = 'REL'
if spliceShift == 'TIE': # sparse splice TIEs naturally become affine TIEs
affineShiftType = "TIE"
elif spliceShift == 'APPEND' and gap is not None:
# if user defined a gap, use SET since they actively chose to position the core
affineShiftType = "SET"
return affineShiftType
# todo: MeasDataDB class that hides multi-file (broken into holes) vs single-file data
# - depthColumn: name of column with depths to be used for splicing data
# - includeOffSplice: if True, all off-splice rows in mdPath will be included in export with 'On-Splice' value = 'off-splice'
# - wholeSpliceSection: if True, all rows in all sections included in a splice interval are exported as 'On-Splice' = 'splice'
def exportMeasurementData(affinePath, sitPath, mdPath, exportPath, depthColumn, includeOffSplice=True, wholeSpliceSection=False):
log.info("--- Splicing Measurement Data ---")
log.info("{}".format(datetime.now()))
log.info("Using Affine Table {}".format(affinePath))
log.info("Using Splice Interval Table {}".format(sitPath))
log.info("Splicing {}".format(mdPath))
log.info("Using '{}' as depth column".format(depthColumn))
log.info("Options: includeOffSplice = {}, wholeSpliceSection = {}".format(includeOffSplice, wholeSpliceSection))
reportProgress(0, "Splicing {}...".format(os.path.basename(mdPath)))
affine = aff.AffineTable.createWithFile(affinePath)
sit = si.SpliceIntervalTable.createWithFile(sitPath)
md = meas.MeasurementData.createWithFile(mdPath, depthColumn)
log.info("Loaded {} rows of data from {}".format(len(md.df.index), mdPath))
log.debug(md.df.dtypes)
onSpliceRows = []
for index, sirow in enumerate(sit.getIntervals()):
progressAmount = 50 if includeOffSplice else 100
reportProgress(progressAmount * float(index)/len(sit.df), "Gathering data for interval {}...".format(index + 1))
log.debug("Interval {}: {}".format(index, sirow))
sections = [sirow.topSection]
if sirow.topSection != sirow.botSection:
intTop = int(sirow.topSection)
intBot = int(sirow.botSection)
sections = [str(x + intTop) for x in range(1 + intBot - intTop)]
log.debug(" Searching section(s) {}...".format(sections))
if wholeSpliceSection:
mdrows = md.getByFullID(sirow.site, sirow.hole, sirow.core, sections)
else:
mdrows = md.getByRangeFullID(sirow.topCSF, sirow.botCSF, sirow.site, sirow.hole, sirow.core, sections)
#print mdrows
#print " found {} rows, top depth = {}, bottom depth = {}".format(len(mdrows), mdrows.iloc[0][depthColumn], mdrows.iloc[-1][depthColumn])
if len(mdrows) > 0:
affineOffset = sirow.topCCSF - sirow.topCSF
_prepSplicedRowsForExport(md.df, mdrows, depthColumn, affineOffset, onSplice=True)
onSpliceRows.append(mdrows)
onSpliceDF = pandas.concat(onSpliceRows)
log.info("Total spliced rows: {}".format(len(onSpliceDF)))
if includeOffSplice:
offSpliceDF = md.df[~(md.df.index.isin(onSpliceDF.index))] # off-splice rows
totalOffSplice = len(offSpliceDF)
log.info("Total off-splice rows: {}".format(totalOffSplice))
#print affine.dataframe.dtypes
#print offSpliceDF.dtypes
# I think iterating over all rows in the affine table, finding
# matching rows, and setting their offsets should be faster than iterating
# over all rows in offSpliceRows and finding/setting the affine of each?
offSpliceRows = []
totalOffSpliceWritten = 0
for index, ar in enumerate(affine.allRows()):
reportProgress(50 + 50 * float(index) / len(affine.dataframe), "Gathering data for off-splice rows...")
shiftedRows = offSpliceDF[(offSpliceDF.Site == ar.site) & (offSpliceDF.Hole == ar.hole) & (offSpliceDF.Core == ar.core)]
log.debug(" found {} off-splice rows for affine row {}".format(len(shiftedRows.index), ar))
_prepSplicedRowsForExport(md.df, shiftedRows, depthColumn, ar.cumOffset, onSplice=False)
onSpliceRows.append(shiftedRows)
offSpliceRows.append(shiftedRows)
totalOffSpliceWritten += len(shiftedRows)
log.info("Total off-splice rows included in export: {}".format(totalOffSpliceWritten))
unwritten = offSpliceDF[~(offSpliceDF.index.isin(pandas.concat(offSpliceRows).index))].copy() # rows that still haven't been written!
if len(unwritten.index) > 0:
log.warn("Of {} off-splice rows, {} were not included in the export.".format(totalOffSplice, len(unwritten)))
unwrittenPath = os.path.splitext(mdPath)[0] + "-unwritten.csv"
log.warn("Those rows will be saved to {}".format(unwrittenPath))
prettyColumns(unwritten, meas.MeasurementFormat)
writeToCSV(unwritten, unwrittenPath)
exportdf = pandas.concat(onSpliceRows)
prettyColumns(exportdf, meas.MeasurementFormat)
writeToCSV(exportdf, exportPath)
log.info("Wrote spliced data to {}".format(exportPath))
# rename and add columns in spliced measurement data per LacCore requirements
def _prepSplicedRowsForExport(dataframe, rows, depthColumn, offset, onSplice):
idIndex = PU.getLastColumnStartingWith(dataframe, "Sediment Depth")
if not idIndex: # if no columns starting with Sediment Depth were found, insert at the beginning
idIndex = 0
else:
idIndex += 1 # insert after Sediment Depth column
onSpliceStr = 'splice' if onSplice else 'off-splice'
nameValuesList = [('Splice Depth', pandas.Series(rows[depthColumn] + offset)), ('Offset', offset), ('On-Splice', onSpliceStr)]
PU.insertColumns(rows, idIndex, nameValuesList)
class OffSpliceCore:
def __init__(self, row):
self.osc = row
def __repr__(self):
return "{}{}-{}".format(self.osc.Site, self.osc.Hole, self.osc.Core)
def gatherOffSpliceAffines(sit, secsumm, mancorr):
# find all off-splice cores: those in section summary that are *not* in SIT
skippedCoreCount = 0
offSpliceCores = []
onSpliceCores = []
ssCores = secsumm.getCores()
for _, row in ssCores.iterrows():
# brg 7/10/2018: Unsure why I did this any why I thought it would do anything.
# Shouldn't all rows from secsumm have their site in secsumm.getSites()???
# May be a vestige of the days when one site was allowed at a time.
if row.Site not in secsumm.getSites(): # skip section summary rows from non-site cores
skippedCoreCount += 1
continue
if not sit.containsCore(row.Site, row.Hole, row.Core):
offSpliceCores.append(row)
else:
onSpliceCores.append(row)
log.info("Found {} off-splice cores in {} section summary cores for sites {} - skipped {} non-site cores".format(len(offSpliceCores), len(ssCores), secsumm.getSites(), skippedCoreCount))
osAffineShifts = {}
affineRows = []
# for each of the off-splice cores:
for index, osc in enumerate(offSpliceCores):
reportProgress(50 + 50 * float(index)/len(offSpliceCores), "Determining affine shifts for off-splice core {}...".format(OffSpliceCore(osc)))
oscid = ci.CoreIdentity("[Project Name]", osc.Site, osc.Hole, osc.Core, osc.Tool)
# is that core manually correlated?
hasManual = False
if mancorr is not None:
hasManual = mancorr.hasOffSpliceCore(osc.Site, osc.Hole, osc.Core)
if hasManual:
if oscid not in osAffineShifts:
log.debug("Found manual correlation for {}".format(OffSpliceCore(osc)))
else:
warnstr = "Found additional manual correlation for {}: {} (new) vs. {} (existing) - ignoring new!"
log.warning(warnstr.format(oscid, mancorr.getOffset(osc.Site, osc.Hole, osc.Core), osAffineShifts[oscid]))
else:
log.debug("no manual correlation for {}".format(OffSpliceCore(osc)))
offSpliceMbsf = 0.0
offset = 0.0
shiftType = "REL"
fixedCore = fixedTieCsf = shiftedTieCsf = None # used only case of TIE
# If so, apply the manual correlation
if hasManual:
if mancorr.includesOnSpliceCore(): # ManualCorrelationTable
mcc = mancorr.findByOffSpliceCore(osc.Site, osc.Hole, osc.Core)
if sit.containsCore(mcc.Site2, mcc.Hole2, mcc.Core2): # is correlation core actually on-splice?
log.debug("SIT contains on-splice core")
# use sparse splice to SIT logic to determine affine for off-splice core based on alignment of section depths
offSpliceMbsf = secsumm.getOffsetDepth(mcc.Site1, mcc.Hole1, mcc.Core1, mcc.Section1, mcc.SectionDepth1) # TODO: UPDATE
log.debug("off-splice: {}@{} = {} MBSF".format(oscid, mcc.SectionDepth1, offSpliceMbsf))
onSpliceMbsf = secsumm.getOffsetDepth(mcc.Site2, mcc.Hole2, mcc.Core2, mcc.Section2, mcc.SectionDepth2)
log.debug("on-splice: {}{}-{}@{} = {} MBSF".format(mcc.Site2, mcc.Hole2, mcc.Core2, mcc.SectionDepth2, onSpliceMbsf))
sitOffset = sit.getCoreOffset(mcc.Site2, mcc.Hole2, mcc.Core2)
onSpliceMcd = onSpliceMbsf + sitOffset
offset = onSpliceMcd - offSpliceMbsf
log.debug(" + SIT offset of {} = {} MCD".format(sitOffset, onSpliceMcd))
log.debug(" off-splice MBSF {} + {} offset = {} on-splice MCD".format(offSpliceMbsf, offset, onSpliceMcd))
# track affine for off-splice core - additional correlations for that core will be ignored if present
osAffineShifts[oscid] = offset
shiftType = "TIE"
fixedCore = "{}{}".format(mcc.Hole2, mcc.Core2)
fixedTieCsf = onSpliceMbsf
shiftedTieCsf = offSpliceMbsf
else:
# warn that "correlation core" is NOT on-splice and fall back on default top MBSF approach
log.warning("Alleged correlation core {}{}-{} is NOT on-splice, using default method to determine offset".format(mcc.Site2, mcc.Hole2, mcc.Core2))
else: # ManualOffsetTable
offset = mancorr.getOffset(osc.Site, osc.Hole, osc.Core)
osAffineShifts[oscid] = offset
shiftType = "SET"
# Otherwise, use default shift method: find the on-splice core with top MBSF
# closest to that of the current core, and use its affine shift.
if oscid not in osAffineShifts:
log.debug("No manual shift for {}, seeking closest top...".format(oscid))
closestCore = secsumm.getCoreWithClosestTop(osc.Site, osc.Hole, osc.Core, onSpliceCores)
offset = sit.getCoreOffset(closestCore.Site, closestCore.Hole, closestCore.Core)
osAffineShifts[oscid] = offset
coreTop = secsumm.getCoreTop(osc.Site, osc.Hole, osc.Core) # use core's top for depths in affine table, not depth of TIE in splice
affineRow = aff.AffineRow(osc.Site, osc.Hole, osc.Core, osc.Tool, coreTop, coreTop + offset, offset, shiftType=shiftType, comment="off-splice")
if shiftType == "TIE":
affineRow.setTieData(fixedCore, fixedTieCsf, shiftedTieCsf)
affineRows.append(affineRow)
return affineRows
# sort affine rows, compute Differential Offset and Growth Rate column values
def fillAffineRows(affineRows):
sortedRows = sorted(affineRows, key = lambda ar: (ar.site, ar.hole, int(ar.core)))
holes = set([r.hole for r in sortedRows])
for h in holes:
rows = [r for r in sortedRows if r.hole == h]
mbsfVals = []
mcdVals = []
prevOffset = None
for row in rows:
if prevOffset is None: # first row
row.diffOffset = row.cumOffset
else:
row.diffOffset = row.cumOffset - prevOffset
prevOffset = row.cumOffset
mbsfVals.append(row.csf)
mcdVals.append(row.ccsf)
if len(mbsfVals) > 1:
row.growthRate = round(numpy.polyfit(mbsfVals, mcdVals, 1)[0], 3)
else:
row.growthRate = 0.0
return sortedRows
# Rename columns in dataframe from their format's internal name to their
# pretty name if the internal name is in the dataframe.
def prettyColumns(dataframe, fmt):
colmap = {c.name: c.prettyName(OutputVocabulary) for c in fmt.cols if c.name in dataframe}
PU.renameColumns(dataframe, colmap)
# Round values in numeric columns to 3 places
def roundValues(dataframe, fmt, digits=3):
numCols = [c.name for c in fmt.cols if c.isNumeric() and c.name in dataframe]
for col in numCols:
try:
dataframe[col] = dataframe[col].round(digits)
except Exception as e:
log.warning("Couldn't round values in column {}. {}".format(col, repr(e)))
def appendDate(text):
return text + "_{}".format(date.today().isoformat())
class Test(unittest.TestCase):
def test_sparse_to_sit(self):
sparsePath = "testdata/GLAD9_Site1_SparseSplice.csv"
secsummPath = "testdata/GLAD9_SectionSummary.csv"
affinePath = "testdata/GLAD9_Site1_TestAffine.csv"
splicePath = "testdata/GLAD9_Site1_TestSIT.csv"
convertSparseSplice(secsummPath, sparsePath, affinePath, splicePath)
affine = aff.AffineTable.createWithFile(affinePath)
sit = si.SpliceIntervalTable.createWithFile(splicePath)
self.assertTrue(len(affine.getSites()) == 7)
self.assertTrue(len(sit.df) == 58)
def test_splice_measurement(self):
affinePath = "testdata/GLAD9_Site1_TestAffine.csv"
splicePath = "testdata/GLAD9_Site1_TestSIT.csv"
measPath = "testdata/GLAD9_Site1_XRF.csv"
splicedMeasPath = "testdata/GLAD9_Site1_XRF_test-spliced.csv"
exportMeasurementData(affinePath, splicePath, measPath, splicedMeasPath, depthColumn='Sediment Depth, unscaled (MBS / CSF-A)') # include off-splice
if __name__ == "__main__":
log.basicConfig(level=log.INFO)
unittest.main()
# convert sparse splice to SIT
# ssPath = "[section summary path]"
# sparsePath = "[sparse splice path]"
# basepath = "[root export path and filename prefix]"
# affPath = basepath + appendDate("_AffineFromSparse") + ".csv"
# sitPath = basepath + appendDate("_SITFromSparse") + ".csv"
# convertSparseSpliceToSIT(ssPath, sparsePath, affPath, sitPath)
# splice measurement data
# for mdPath in mdFilePaths:
# path, ext = os.path.splitext(mdPath)
# exportPath = path + "_spliced" + ext
# exportMeasurementData(affinePath, sitPath, mdPath, exportPath, depthColumn, includeOffSplice=False, wholeSpliceSection=False)