From 99d73aa9fec288638d11091b46062d924605f9e1 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Fri, 10 May 2024 14:28:59 -0400 Subject: [PATCH 1/4] added functionality of handling an input of a file list --- src/diffpy/labpdfproc/labpdfprocapp.py | 2 +- src/diffpy/labpdfproc/tests/test_tools.py | 5 ++++- src/diffpy/labpdfproc/tools.py | 26 +++++++++++++++++++---- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index c0ed5e3..f7c2891 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -21,7 +21,7 @@ def get_args(override_cli_inputs=None): "data-files in that directory will be processed. Examples of valid " "inputs are 'file.xy', 'data/file.xy', 'file.xy, data/file.xy', " "'.' (load everything in the current directory), 'data' (load" - "everything in the folder ./data', 'data/file_list.txt' (load" + "everything in the folder ./data), 'data/file_list.txt' (load" " the list of files contained in the text-file called " "file_list.txt that can be found in the folder ./data).", ) diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index db75965..fde14d2 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -27,6 +27,9 @@ ( # glob input directory ["./input_dir"], [ + "./good_data.chi", + "./good_data.xy", + "./good_data.txt", "input_dir/good_data.chi", "input_dir/good_data.xy", "input_dir/good_data.txt", @@ -69,7 +72,7 @@ def test_set_input_lists(inputs, expected, user_filesystem): cli_inputs = ["2.5"] + inputs actual_args = get_args(cli_inputs) actual_args = set_input_lists(actual_args) - assert list(actual_args.input_paths).sort() == expected_paths.sort() + assert set(actual_args.input_paths) == set(expected_paths) # This test covers non-existing single input file or directory, in this case we raise an error with message diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index 2bf2379..8fd50fe 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -28,6 +28,24 @@ def set_output_directory(args): return output_dir +def _parse_file_list_file(input_path): + with open(input_path, "r") as f: + lines = [line.strip() for line in f] + input_files = [Path(line).resolve() for line in lines if Path(line).is_file()] + return input_files + + +def _parse_input_paths(input_path): + # Takes a path to return either a list of files paths if it is a file list, + # a list of single file path if it is a data file, or nothing + if "file_list" in input_path.name: + return _parse_file_list_file(input_path) + elif input_path.is_file(): + return [input_path] + else: + return [] + + def set_input_lists(args): """ Set input directory and files. @@ -51,16 +69,16 @@ def set_input_lists(args): input_path = Path(input).resolve() if input_path.exists(): if input_path.is_file(): - input_paths.append(input_path) + input_paths.extend(_parse_input_paths(input_path)) elif input_path.is_dir(): input_files = input_path.glob("*") - input_files = [file.resolve() for file in input_files if file.is_file()] - input_paths.extend(input_files) + for file in input_files: + input_paths.extend(_parse_input_paths(file)) else: raise FileNotFoundError(f"Cannot find {input}. Please specify valid input file(s) or directories.") else: raise FileNotFoundError(f"Cannot find {input}") - setattr(args, "input_paths", input_paths) + setattr(args, "input_paths", list(set(input_paths))) return args From 93cf5b47edebdb60c74410c00b2171972ca106bc Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Fri, 10 May 2024 16:36:54 -0400 Subject: [PATCH 2/4] used sorted for tests and edited input function to include file list only if user specifies it --- src/diffpy/labpdfproc/tests/test_tools.py | 5 +---- src/diffpy/labpdfproc/tools.py | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index fde14d2..c41a4b2 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -27,9 +27,6 @@ ( # glob input directory ["./input_dir"], [ - "./good_data.chi", - "./good_data.xy", - "./good_data.txt", "input_dir/good_data.chi", "input_dir/good_data.xy", "input_dir/good_data.txt", @@ -72,7 +69,7 @@ def test_set_input_lists(inputs, expected, user_filesystem): cli_inputs = ["2.5"] + inputs actual_args = get_args(cli_inputs) actual_args = set_input_lists(actual_args) - assert set(actual_args.input_paths) == set(expected_paths) + assert sorted(actual_args.input_paths) == sorted(expected_paths) # This test covers non-existing single input file or directory, in this case we raise an error with message diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index 8fd50fe..5e2e683 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -35,17 +35,6 @@ def _parse_file_list_file(input_path): return input_files -def _parse_input_paths(input_path): - # Takes a path to return either a list of files paths if it is a file list, - # a list of single file path if it is a data file, or nothing - if "file_list" in input_path.name: - return _parse_file_list_file(input_path) - elif input_path.is_file(): - return [input_path] - else: - return [] - - def set_input_lists(args): """ Set input directory and files. @@ -69,11 +58,16 @@ def set_input_lists(args): input_path = Path(input).resolve() if input_path.exists(): if input_path.is_file(): - input_paths.extend(_parse_input_paths(input_path)) + if "file_list" in input_path.name: + input_paths.extend(_parse_file_list_file(input_path)) + else: + input_paths.append(input_path) elif input_path.is_dir(): input_files = input_path.glob("*") - for file in input_files: - input_paths.extend(_parse_input_paths(file)) + input_files = [ + file.resolve() for file in input_files if file.is_file() and "file_list" not in file.name + ] + input_paths.extend(input_files) else: raise FileNotFoundError(f"Cannot find {input}. Please specify valid input file(s) or directories.") else: From a9facce62baffab22344bc1223b805d7674c0568 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Sat, 11 May 2024 11:12:58 -0400 Subject: [PATCH 3/4] initial commit on using expand_list_file, not passing tests for file_list with missing files --- src/diffpy/labpdfproc/labpdfprocapp.py | 72 ++++++++++++++------------ src/diffpy/labpdfproc/tools.py | 27 ++++++---- 2 files changed, 56 insertions(+), 43 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index f7c2891..7b9471d 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -1,9 +1,14 @@ import sys from argparse import ArgumentParser -from pathlib import Path from diffpy.labpdfproc.functions import apply_corr, compute_cve -from diffpy.labpdfproc.tools import known_sources, load_user_metadata, set_output_directory, set_wavelength +from diffpy.labpdfproc.tools import ( + known_sources, + load_user_metadata, + set_input_lists, + set_output_directory, + set_wavelength, +) from diffpy.utils.parsers.loaddata import loadData from diffpy.utils.scattering_objects.diffraction_objects import XQUANTITIES, Diffraction_object @@ -89,45 +94,46 @@ def get_args(override_cli_inputs=None): def main(): args = get_args() + args = set_input_lists(args) args.output_directory = set_output_directory(args) args.wavelength = set_wavelength(args) args = load_user_metadata(args) - filepath = Path(args.input_file) - outfilestem = filepath.stem + "_corrected" - corrfilestem = filepath.stem + "_cve" - outfile = args.output_directory / (outfilestem + ".chi") - corrfile = args.output_directory / (corrfilestem + ".chi") + for filepath in args.input_directory: + outfilestem = filepath.stem + "_corrected" + corrfilestem = filepath.stem + "_cve" + outfile = args.output_directory / (outfilestem + ".chi") + corrfile = args.output_directory / (corrfilestem + ".chi") - if outfile.exists() and not args.force_overwrite: - sys.exit( - f"Output file {str(outfile)} already exists. Please rerun " - f"specifying -f if you want to overwrite it." - ) - if corrfile.exists() and args.output_correction and not args.force_overwrite: - sys.exit( - f"Corrections file {str(corrfile)} was requested and already " - f"exists. Please rerun specifying -f if you want to overwrite it." - ) + if outfile.exists() and not args.force_overwrite: + sys.exit( + f"Output file {str(outfile)} already exists. Please rerun " + f"specifying -f if you want to overwrite it." + ) + if corrfile.exists() and args.output_correction and not args.force_overwrite: + sys.exit( + f"Corrections file {str(corrfile)} was requested and already " + f"exists. Please rerun specifying -f if you want to overwrite it." + ) - input_pattern = Diffraction_object(wavelength=args.wavelength) - xarray, yarray = loadData(args.input_file, unpack=True) - input_pattern.insert_scattering_quantity( - xarray, - yarray, - "tth", - scat_quantity="x-ray", - name=str(args.input_file), - metadata={"muD": args.mud, "anode_type": args.anode_type}, - ) + input_pattern = Diffraction_object(wavelength=args.wavelength) + xarray, yarray = loadData(args.input_file, unpack=True) + input_pattern.insert_scattering_quantity( + xarray, + yarray, + "tth", + scat_quantity="x-ray", + name=str(args.input_file), + metadata={"muD": args.mud, "anode_type": args.anode_type}, + ) - absorption_correction = compute_cve(input_pattern, args.mud, args.wavelength) - corrected_data = apply_corr(input_pattern, absorption_correction) - corrected_data.name = f"Absorption corrected input_data: {input_pattern.name}" - corrected_data.dump(f"{outfile}", xtype="tth") + absorption_correction = compute_cve(input_pattern, args.mud, args.wavelength) + corrected_data = apply_corr(input_pattern, absorption_correction) + corrected_data.name = f"Absorption corrected input_data: {input_pattern.name}" + corrected_data.dump(f"{outfile}", xtype="tth") - if args.output_correction: - absorption_correction.dump(f"{corrfile}", xtype="tth") + if args.output_correction: + absorption_correction.dump(f"{corrfile}", xtype="tth") if __name__ == "__main__": diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index 5e2e683..aaa2d7f 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -28,11 +28,20 @@ def set_output_directory(args): return output_dir -def _parse_file_list_file(input_path): - with open(input_path, "r") as f: - lines = [line.strip() for line in f] - input_files = [Path(line).resolve() for line in lines if Path(line).is_file()] - return input_files +def _parse_file_list_file(file_list_path): + with open(file_list_path, "r") as f: + # file_paths = [Path(file_path.strip()).resolve() for file_path in f.readlines() + # if Path(file_path.strip()).is_file()] + file_paths = [file_path.strip() for file_path in f.readlines()] + return file_paths + + +def expand_list_file(input): + file_list_inputs = [input_name for input_name in input if "file_list" in str(input_name)] + for file_list_input in file_list_inputs: + input.remove(file_list_input) + input.extend(_parse_file_list_file(file_list_input)) + return input def set_input_lists(args): @@ -54,14 +63,12 @@ def set_input_lists(args): """ input_paths = [] - for input in args.input: + expanded_input = expand_list_file(args.input) + for input in expanded_input: input_path = Path(input).resolve() if input_path.exists(): if input_path.is_file(): - if "file_list" in input_path.name: - input_paths.extend(_parse_file_list_file(input_path)) - else: - input_paths.append(input_path) + input_paths.append(input_path) elif input_path.is_dir(): input_files = input_path.glob("*") input_files = [ From 13b1e4ffb3cc904dd8708001a34c8395718c847f Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Sat, 11 May 2024 13:44:57 -0400 Subject: [PATCH 4/4] removed nesting functions and the private function, moved file list test with missing file to errorous case --- src/diffpy/labpdfproc/labpdfprocapp.py | 2 ++ src/diffpy/labpdfproc/tests/test_tools.py | 11 ++++--- src/diffpy/labpdfproc/tools.py | 40 ++++++++++++++--------- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index 7b9471d..6dca178 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -3,6 +3,7 @@ from diffpy.labpdfproc.functions import apply_corr, compute_cve from diffpy.labpdfproc.tools import ( + expand_list_file, known_sources, load_user_metadata, set_input_lists, @@ -94,6 +95,7 @@ def get_args(override_cli_inputs=None): def main(): args = get_args() + args = expand_list_file(args) args = set_input_lists(args) args.output_directory = set_output_directory(args) args.wavelength = set_wavelength(args) diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index c41a4b2..23e56bb 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -6,6 +6,7 @@ from diffpy.labpdfproc.labpdfprocapp import get_args from diffpy.labpdfproc.tools import ( + expand_list_file, known_sources, load_user_metadata, set_input_lists, @@ -49,10 +50,6 @@ "input_dir/binary.pkl", ], ), - ( # file_list.txt list of files provided - ["input_dir/file_list.txt"], - ["good_data.chi", "good_data.xy", "good_data.txt"], - ), ( # file_list_example2.txt list of files provided in different directories ["input_dir/file_list_example2.txt"], ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"], @@ -68,6 +65,7 @@ def test_set_input_lists(inputs, expected, user_filesystem): cli_inputs = ["2.5"] + inputs actual_args = get_args(cli_inputs) + actual_args = expand_list_file(actual_args) actual_args = set_input_lists(actual_args) assert sorted(actual_args.input_paths) == sorted(expected_paths) @@ -87,6 +85,10 @@ def test_set_input_lists(inputs, expected, user_filesystem): ["good_data.chi", "good_data.xy", "unreadable_file.txt", "missing_file.txt"], "Cannot find missing_file.txt. Please specify valid input file(s) or directories.", ), + ( # file_list.txt list of files provided (with missing files) + ["input_dir/file_list.txt"], + "Cannot find missing_file.txt. Please specify valid input file(s) or directories.", + ), ] @@ -96,6 +98,7 @@ def test_set_input_files_bad(inputs, msg, user_filesystem): os.chdir(base_dir) cli_inputs = ["2.5"] + inputs actual_args = get_args(cli_inputs) + actual_args = expand_list_file(actual_args) with pytest.raises(FileNotFoundError, match=msg[0]): actual_args = set_input_lists(actual_args) diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index aaa2d7f..a95bf92 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -28,20 +28,27 @@ def set_output_directory(args): return output_dir -def _parse_file_list_file(file_list_path): - with open(file_list_path, "r") as f: - # file_paths = [Path(file_path.strip()).resolve() for file_path in f.readlines() - # if Path(file_path.strip()).is_file()] - file_paths = [file_path.strip() for file_path in f.readlines()] - return file_paths +def expand_list_file(args): + """ + Expands the list of inputs by adding files from file lists and removing the file list. + Parameters + ---------- + args argparse.Namespace + the arguments from the parser -def expand_list_file(input): - file_list_inputs = [input_name for input_name in input if "file_list" in str(input_name)] + Returns + ------- + the arguments with the modified input list + + """ + file_list_inputs = [input_name for input_name in args.input if "file_list" in input_name] for file_list_input in file_list_inputs: - input.remove(file_list_input) - input.extend(_parse_file_list_file(file_list_input)) - return input + with open(file_list_input, "r") as f: + file_inputs = [input_name.strip() for input_name in f.readlines()] + args.input.extend(file_inputs) + args.input.remove(file_list_input) + return args def set_input_lists(args): @@ -63,9 +70,8 @@ def set_input_lists(args): """ input_paths = [] - expanded_input = expand_list_file(args.input) - for input in expanded_input: - input_path = Path(input).resolve() + for input_name in args.input: + input_path = Path(input_name).resolve() if input_path.exists(): if input_path.is_file(): input_paths.append(input_path) @@ -76,9 +82,11 @@ def set_input_lists(args): ] input_paths.extend(input_files) else: - raise FileNotFoundError(f"Cannot find {input}. Please specify valid input file(s) or directories.") + raise FileNotFoundError( + f"Cannot find {input_name}. Please specify valid input file(s) or directories." + ) else: - raise FileNotFoundError(f"Cannot find {input}") + raise FileNotFoundError(f"Cannot find {input_name}") setattr(args, "input_paths", list(set(input_paths))) return args