Skip to content

Commit

Permalink
Updatest to resolve conflicts with changes related to floating point …
Browse files Browse the repository at this point in the history
…headers for indexing.
  • Loading branch information
markspec committed May 26, 2023
1 parent 9197e4e commit f382208
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 22 deletions.
8 changes: 5 additions & 3 deletions src/mdio/segy/_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def header_scan_worker(
segy_endian: Endianness of the input SEG-Y. Rev.2 allows little endian
Returns:
Numpy array of parsed headers for the current block.
dictionary with headers: keys are the index names, values are numpy
arrays of parsed headers for the current block. Array is of type
byte_type with the exception of IBM32 which is mapped to FLOAT32.
Raises:
TypeError: if segy_path_or_handle is incorrect / unsupported.
Expand All @@ -59,7 +61,6 @@ def header_scan_worker(
ignore_geometry=True,
endian=segy_endian,
) as segy_handle:

block_headers = [
segy_handle.header[trc_idx] for trc_idx in range(start, stop)
]
Expand Down Expand Up @@ -109,7 +110,8 @@ def header_scan_worker(

out_dtype.append((name, native_dtype))

out_array = np.empty(n_traces, out_dtype)
# out_array = np.empty(n_traces, out_dtype)
out_array = {}

# TODO: Add strict=True and remove noqa when minimum Python is 3.10
for name, loc, type_ in zip(index_names, byte_locs, byte_types): # noqa: B905
Expand Down
13 changes: 10 additions & 3 deletions src/mdio/segy/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def parse_trace_headers(
progress_bar: Enable or disable progress bar. Default is True.
Returns:
Numpy array of parsed trace headers.
dictionary with headers: keys are the index names, values are numpy
arrays of parsed headers for the current block. Array is of type
byte_type with the exception of IBM32 which is mapped to FLOAT32.
"""
trace_count = get_trace_count(segy_path, segy_endian)
n_blocks = int(ceil(trace_count / block_size))
Expand Down Expand Up @@ -133,9 +136,13 @@ def parse_trace_headers(

# This executes the lazy work.
headers = list(lazy_work)

final_headers = {}
for header_name in index_names:
final_headers[header_name] = np.concatenate(
[header[header_name] for header in headers]
)
# Merge blocks before return
return np.concatenate(headers)
return final_headers


def parse_sample_axis(binary_header: dict) -> Dimension:
Expand Down
24 changes: 11 additions & 13 deletions src/mdio/segy/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ def get_grid_plan( # noqa: C901
# to prevent so many lookups
if geom_type == GeometryTemplateType.STREAMER_B:
for idx, cable in enumerate(unique_cables):
cable_idxs = np.where(index_headers[:, cable_idx] == cable)
cable_idxs = np.where(index_headers["cable"][:] == cable)
cc_min = cable_chan_min[idx]
# print(f"idx = {idx} cable = {cable} cc_min={cc_min}")
index_headers[cable_idxs, chan_idx] = (
index_headers[cable_idxs, chan_idx] - cc_min + 1
index_headers["channel"][cable_idxs] = (
index_headers["channel"][cable_idxs] - cc_min + 1
)

if "CalculateCable" in grid_overrides:
Expand Down Expand Up @@ -187,25 +187,23 @@ def qc_index_headers(
cable_chan_max: Array containing the max channel number for each cable,
geom_type: Whether type a or b (wrapped or unwrapped chans)
"""
if trace_qc_count is None:
trace_qc_count = index_headers.shape[0]
if trace_qc_count > index_headers.shape[0]:
trace_qc_count = index_headers.shape[0]

if "cable" in index_names and "channel" in index_names and "shot" in index_names:
cable_idx = index_names.index("cable")
channel_idx = index_names.index("channel")
if trace_qc_count is None:
trace_qc_count = index_headers["cable"].shape[0]
if trace_qc_count > index_headers["cable"].shape[0]:
trace_qc_count = index_headers["cable"].shape[0]

# Find unique cable ids
unique_cables = np.sort(np.unique(index_headers[0:trace_qc_count, cable_idx]))
unique_cables = np.sort(np.unique(index_headers["cable"][0:trace_qc_count]))

# Find channel min and max values for each cable
cable_chan_min = np.empty(unique_cables.shape)
cable_chan_max = np.empty(unique_cables.shape)

for idx, cable in enumerate(unique_cables):
my_chan = np.take(
index_headers[0:trace_qc_count, channel_idx],
np.where(index_headers[0:trace_qc_count, cable_idx] == cable),
index_headers["channel"][0:trace_qc_count],
np.where(index_headers["cable"][0:trace_qc_count] == cable),
)
cable_chan_min[idx] = np.min(my_chan)
cable_chan_max[idx] = np.max(my_chan)
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_segy_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def create_4d_segy(

@pytest.mark.parametrize("header_locations", [(17, 137, 13)])
@pytest.mark.parametrize("header_names", [("shot", "cable", "channel")])
@pytest.mark.parametrize("header_lengths", [(4, 2, 4)])
@pytest.mark.parametrize("header_types", [("int32", "int16", "int32")])
@pytest.mark.parametrize("endian", ["big"])
@pytest.mark.parametrize(
"grid_overrides", [{"AutoChannelWrap": True, "AutoChannelTraceQC": 100000}, None]
Expand All @@ -97,7 +97,7 @@ def test_import_4d_segy(
zarr_tmp,
header_locations,
header_names,
header_lengths,
header_types,
endian,
grid_overrides,
chan_header_type,
Expand All @@ -121,7 +121,7 @@ def test_import_4d_segy(
mdio_path_or_buffer=zarr_tmp.__str__(),
index_bytes=header_locations,
index_names=header_names,
index_lengths=header_lengths,
index_types=header_types,
chunksize=(8, 2, 128, 1024),
overwrite=True,
endian=endian,
Expand Down

0 comments on commit f382208

Please sign in to comment.