-
Notifications
You must be signed in to change notification settings - Fork 96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support config files #169
base: main
Are you sure you want to change the base?
Support config files #169
Changes from all commits
b2d4bbd
758cebc
e5cd304
5d45bd0
c4e073c
4c500c9
92440f1
dce4163
7d02b41
0a75bc1
4a61903
c7abff0
e8ced46
08bfc67
749f431
7b065ef
56bcd02
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -109,7 +109,7 @@ | |||||
*.ipynb diff=ipynb | ||||||
""" | ||||||
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter | ||||||
from argparse import ArgumentParser, Namespace, RawDescriptionHelpFormatter | ||||||
import collections | ||||||
import io | ||||||
import json | ||||||
|
@@ -120,7 +120,7 @@ | |||||
import sys | ||||||
import warnings | ||||||
|
||||||
from nbstripout._utils import strip_output, strip_zeppelin_output | ||||||
from nbstripout._utils import strip_output, strip_zeppelin_output, merge_configuration_file | ||||||
try: | ||||||
# Jupyter >= 4 | ||||||
from nbformat import read, write, NO_CONVERT | ||||||
|
@@ -351,7 +351,7 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False): | |||||
return 1 | ||||||
|
||||||
|
||||||
def main(): | ||||||
def setup_commandline() -> Namespace: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is no longer returning a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should probably be called |
||||||
parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter) | ||||||
task = parser.add_mutually_exclusive_group() | ||||||
task.add_argument('--dry-run', action='store_true', | ||||||
|
@@ -373,14 +373,17 @@ def main(): | |||||
help='Do not strip the execution count/prompt number') | ||||||
parser.add_argument('--keep-output', action='store_true', | ||||||
help='Do not strip output', default=None) | ||||||
parser.add_argument('--keep-id', action='store_true', | ||||||
help='Keep the randomly generated cell ids, ' | ||||||
'which will be different after each execution.') | ||||||
parser.add_argument('--extra-keys', default='', | ||||||
help='Space separated list of extra keys to strip ' | ||||||
'from metadata, e.g. metadata.foo cell.metadata.bar') | ||||||
parser.add_argument('--keep-metadata-keys', default='', | ||||||
help='Space separated list of metadata keys to keep' | ||||||
', e.g. metadata.foo cell.metadata.bar') | ||||||
parser.add_argument('--drop-empty-cells', action='store_true', | ||||||
help='Remove cells where `source` is empty or contains only whitepace') | ||||||
help='Remove cells where `source` is empty or contains only whitespace') | ||||||
parser.add_argument('--drop-tagged-cells', default='', | ||||||
help='Space separated list of cell-tags that remove an entire cell') | ||||||
parser.add_argument('--strip-init-cells', action='store_true', | ||||||
|
@@ -408,7 +411,13 @@ def main(): | |||||
help='Prints stripped files to STDOUT') | ||||||
|
||||||
parser.add_argument('files', nargs='*', help='Files to strip output from') | ||||||
args = parser.parse_args() | ||||||
|
||||||
return parser | ||||||
|
||||||
|
||||||
def main(): | ||||||
parser = setup_commandline() | ||||||
args = merge_configuration_file(parser) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
git_config = ['git', 'config'] | ||||||
|
||||||
|
@@ -487,7 +496,7 @@ def main(): | |||||
warnings.simplefilter("ignore", category=UserWarning) | ||||||
nb = read(f, as_version=NO_CONVERT) | ||||||
|
||||||
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells, | ||||||
nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells, | ||||||
args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size)) | ||||||
|
||||||
if args.dry_run: | ||||||
|
@@ -533,7 +542,7 @@ def main(): | |||||
warnings.simplefilter("ignore", category=UserWarning) | ||||||
nb = read(input_stream, as_version=NO_CONVERT) | ||||||
|
||||||
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells, | ||||||
nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells, | ||||||
args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size)) | ||||||
|
||||||
if args.dry_run: | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
from collections import defaultdict | ||
from argparse import ArgumentParser, Namespace, _StoreTrueAction, _StoreFalseAction | ||
import os | ||
import sys | ||
from collections import defaultdict | ||
from functools import partial | ||
from typing import Any, Dict, Optional | ||
|
||
__all__ = ["pop_recursive", "strip_output", "strip_zeppelin_output", "MetadataError"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please keep imports sorted |
||
|
||
|
@@ -94,7 +98,7 @@ def strip_zeppelin_output(nb): | |
return nb | ||
|
||
|
||
def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[], | ||
def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[], | ||
strip_init_cells=False, max_size=0): | ||
""" | ||
Strip the outputs, execution count/prompt number and miscellaneous | ||
|
@@ -124,7 +128,7 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa | |
for tag_to_drop in drop_tagged_cells: | ||
conditionals.append(lambda c: tag_to_drop not in c.get("metadata", {}).get("tags", [])) | ||
|
||
for cell in _cells(nb, conditionals): | ||
for i, cell in enumerate(_cells(nb, conditionals)): | ||
keep_output_this_cell = determine_keep_output(cell, keep_output, strip_init_cells) | ||
|
||
# Remove the outputs, unless directed otherwise | ||
|
@@ -148,7 +152,112 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa | |
cell['prompt_number'] = None | ||
if 'execution_count' in cell and not keep_count: | ||
cell['execution_count'] = None | ||
|
||
# Replace the cell id with an incremental value that will be consistent across runs | ||
if 'id' in cell and not keep_id: | ||
cell['id'] = str(i) | ||
for field in keys['cell']: | ||
pop_recursive(cell, field) | ||
return nb | ||
|
||
|
||
def process_pyproject_toml(toml_file_path: str, parser: ArgumentParser) -> Optional[Dict[str, Any]]: | ||
"""Extract config mapping from pyproject.toml file.""" | ||
try: | ||
import tomllib # python 3.11+ | ||
except ModuleNotFoundError: | ||
import tomli as tomllib | ||
|
||
with open(toml_file_path, 'rb') as f: | ||
dict_config = tomllib.load(f).get('tool', {}).get('nbstripout', None) | ||
|
||
if not dict_config: | ||
# could be {} if 'tool' not found, or None if 'nbstripout' not found | ||
return dict_config | ||
|
||
# special processing of boolean options, make sure we don't have invalid types | ||
for a in parser._actions: | ||
if a.dest in dict_config and isinstance(a, (_StoreTrueAction, _StoreFalseAction)): | ||
if not isinstance(dict_config[a.dest], bool): | ||
Comment on lines
+178
to
+179
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a big fan of reaching into the internals of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I thought that this would still be more robust than keeping an explicit list of Boolean arguments, although that wouldn't be impossible. I looked at a range of options, including shifting to an alternative arguments library, like Click, but that all got too complicated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, agree |
||
raise ValueError(f'Argument {a.dest} in pyproject.toml must be a boolean, not {dict_config[a.dest]}') | ||
|
||
return dict_config | ||
|
||
|
||
def process_setup_cfg(cfg_file_path, parser: ArgumentParser) -> Optional[Dict[str, Any]]: | ||
"""Extract config mapping from setup.cfg file.""" | ||
import configparser | ||
|
||
reader = configparser.ConfigParser() | ||
reader.read(cfg_file_path) | ||
if not reader.has_section('nbstripout'): | ||
return None | ||
|
||
raw_config = reader['nbstripout'] | ||
dict_config = dict(raw_config) | ||
|
||
# special processing of boolean options, to convert various configparser bool types to true/false | ||
for a in parser._actions: | ||
if a.dest in raw_config and isinstance(a, (_StoreTrueAction, _StoreFalseAction)): | ||
dict_config[a.dest] = raw_config.getboolean(a.dest) | ||
Comment on lines
+199
to
+200
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto |
||
|
||
return dict_config | ||
|
||
|
||
def merge_configuration_files(parser: ArgumentParser, args_str=None) -> Namespace: | ||
"""Merge flags from config files into args.""" | ||
CONFIG_FILES = { | ||
'pyproject.toml': partial(process_pyproject_toml, parser=parser), | ||
'setup.cfg': partial(process_setup_cfg, parser=parser), | ||
} | ||
|
||
# parse args as-is to look for configuration files | ||
args = parser.parse_args(args_str) | ||
|
||
# Traverse the file tree common to all files given as argument looking for | ||
# a configuration file | ||
# TODO: make this more like Black: | ||
# By default Black looks for pyproject.toml starting from the common base directory of all files and | ||
# directories passed on the command line. If it’s not there, it looks in parent directories. It stops looking | ||
# when it finds the file, or a .git directory, or a .hg directory, or the root of the file system, whichever | ||
# comes first. | ||
# if no files are given, start from cwd | ||
config_path = os.path.commonpath([os.path.abspath(file) for file in args.files]) if args.files else os.path.abspath(os.getcwd()) | ||
config: Optional[Dict[str, Any]] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line gets a bit long with a ternary |
||
while True: | ||
for config_file, processor in CONFIG_FILES.items(): | ||
config_file_path = os.path.join(config_path, config_file) | ||
if os.path.isfile(config_file_path): | ||
config = processor(config_file_path) | ||
if config is not None: | ||
break | ||
if config is not None: | ||
break | ||
config_path, tail = os.path.split(config_path) | ||
if not tail: | ||
break | ||
|
||
# black starts with default arguments (from click), updates that with the config file, | ||
# then applies command line arguments. this all happens in the click framework, before main() is called | ||
# we can use parser.set_defaults | ||
if config: | ||
# check all arguments are valid | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hadn't picked up on this in my first round review: The way you implemented this, config file settings take precedence over command line arguments, however it should be the other way around. I realise this will be a bit harder to implement. You could turn the What makes this even more annoying is that you don't know if a value has been set by the caller or is simply the default :( I may need to look around for an idiomatic way to implement this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry about the delay. Have to prioritize something else atm. Hope to come back to this at some point but if anyone sees fit, feel free to take over. |
||
valid_args = vars(args).keys() | ||
for name in config.keys(): | ||
if name not in valid_args: | ||
raise ValueError(f'{name} in the config file is not a valid option') | ||
|
||
# separate into default-overrides and special treatment | ||
extra_keys: Optional[str] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a brief comment why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not 100% sure that it does. I left this with the same functionality that @janosh did. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think that's a fair point and treating it specially may lead to surprising results. @janosh can you explain your rationale? Do you agree that special treatment of this flag may be surprising? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't remember for sure at this point, but I think it might be left over from the original autoflake8 config file PR. I agree minimal user surprise is usually the way to go. |
||
if 'extra_keys' in config: | ||
extra_keys = config['extra_keys'] | ||
del config['extra_keys'] | ||
|
||
# merge the configuration options as new defaults, and re-parse the arguments | ||
parser.set_defaults(**config) | ||
args = parser.parse_args(args_str) | ||
|
||
# merge extra_keys using set union | ||
if extra_keys: | ||
args.extra_keys = ' '.join(sorted(set(extra_keys.split()) | set(args.extra_keys.split()))) | ||
|
||
return args |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "0", | ||
"metadata": {}, | ||
"source": [ | ||
"This notebook tests that outputs can be cleared based on size" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "1", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"aaaaaaaaaa\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(\"a\"*10)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "2", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"print(\"a\"*100)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.4" | ||
}, | ||
"varInspector": { | ||
"cols": { | ||
"lenName": 16, | ||
"lenType": 16, | ||
"lenVar": 40 | ||
}, | ||
"kernels_config": { | ||
"python": { | ||
"delete_cmd_postfix": "", | ||
"delete_cmd_prefix": "del ", | ||
"library": "var_list.py", | ||
"varRefreshCmd": "print(var_dic_list())" | ||
}, | ||
"r": { | ||
"delete_cmd_postfix": ") ", | ||
"delete_cmd_prefix": "rm(", | ||
"library": "var_list.r", | ||
"varRefreshCmd": "cat(var_dic_list()) " | ||
} | ||
}, | ||
"types_to_exclude": [ | ||
"module", | ||
"function", | ||
"builtin_function_or_method", | ||
"instance", | ||
"_Feature" | ||
], | ||
"window_display": false | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.