Skip to content

Commit

Permalink
fix patching module that doesn't exist (#4495)
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq authored Jun 15, 2022
1 parent dc41a88 commit 60b8df4
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/datasets/utils/patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ def __enter__(self):
# in this case we need to patch "os" and "os.path"

for i in range(len(submodules)):
submodule = import_module(".".join(submodules[: i + 1]))
try:
submodule = import_module(".".join(submodules[: i + 1]))
except ModuleNotFoundError:
continue
# We iterate over all the globals in self.obj in case we find "os" or "os.path"
for attr in self.obj.__dir__():
obj_attr = getattr(self.obj, attr)
Expand All @@ -79,7 +82,10 @@ def __enter__(self):
# itself if it was imported as "from os.path import join".

if submodules: # if it's an attribute of a submodule like "os.path.join"
attr_value = getattr(import_module(".".join(submodules)), target_attr)
try:
attr_value = getattr(import_module(".".join(submodules)), target_attr)
except (AttributeError, ModuleNotFoundError):
return
# We iterate over all the globals in self.obj in case we find "os.path.join"
for attr in self.obj.__dir__():
# We don't check for the name of the global, but rather if its value *is* "os.path.join".
Expand Down
8 changes: 8 additions & 0 deletions tests/test_patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,11 @@ def test_patch_submodule_successive():
assert _test_patching.os.path.join is original_join
assert _test_patching.os.path.dirname is original_dirname
assert _test_patching.os.rename is original_rename


def test_patch_submodule_doesnt_exist():
mock = "__test_patch_submodule_doesnt_exist_mock__"
with patch_submodule(_test_patching, "__module_that_doesn_exist__.__attribute_that_doesn_exist__", mock):
pass
with patch_submodule(_test_patching, "os.__attribute_that_doesn_exist__", mock):
pass

1 comment on commit 60b8df4

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Show benchmarks

PyArrow==6.0.0

Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.010023 / 0.011353 (-0.001330) 0.004793 / 0.011008 (-0.006215) 0.035313 / 0.038508 (-0.003195) 0.040919 / 0.023109 (0.017810) 0.409766 / 0.275898 (0.133868) 0.467224 / 0.323480 (0.143744) 0.007496 / 0.007986 (-0.000490) 0.004204 / 0.004328 (-0.000125) 0.008760 / 0.004250 (0.004510) 0.043119 / 0.037052 (0.006066) 0.390338 / 0.258489 (0.131849) 0.465077 / 0.293841 (0.171236) 0.036616 / 0.128546 (-0.091930) 0.011643 / 0.075646 (-0.064003) 0.301675 / 0.419271 (-0.117597) 0.062166 / 0.043533 (0.018634) 0.396968 / 0.255139 (0.141829) 0.434205 / 0.283200 (0.151005) 0.105720 / 0.141683 (-0.035962) 2.191514 / 1.452155 (0.739360) 2.231713 / 1.492716 (0.738997)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.332045 / 0.018006 (0.314039) 0.489897 / 0.000490 (0.489408) 0.052005 / 0.000200 (0.051805) 0.000753 / 0.000054 (0.000699)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.029940 / 0.037411 (-0.007471) 0.129790 / 0.014526 (0.115264) 0.138655 / 0.176557 (-0.037902) 0.181398 / 0.737135 (-0.555738) 0.138105 / 0.296338 (-0.158234)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.491138 / 0.215209 (0.275929) 4.780872 / 2.077655 (2.703217) 2.054389 / 1.504120 (0.550269) 1.801961 / 1.541195 (0.260767) 1.892002 / 1.468490 (0.423512) 0.505803 / 4.584777 (-4.078974) 5.685487 / 3.745712 (1.939775) 4.634469 / 5.269862 (-0.635392) 1.212833 / 4.565676 (-3.352843) 0.065387 / 0.424275 (-0.358889) 0.014634 / 0.007607 (0.007027) 0.644763 / 0.226044 (0.418718) 6.696636 / 2.268929 (4.427707) 2.782163 / 55.444624 (-52.662461) 2.422691 / 6.876477 (-4.453785) 2.480906 / 2.142072 (0.338833) 0.728397 / 4.805227 (-4.076830) 0.162734 / 6.500664 (-6.337930) 0.075556 / 0.075469 (0.000086)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.950136 / 1.841788 (0.108348) 17.221042 / 8.074308 (9.146734) 31.240208 / 10.191392 (21.048816) 1.069187 / 0.680424 (0.388764) 0.685084 / 0.534201 (0.150883) 0.615644 / 0.579283 (0.036361) 0.650270 / 0.434364 (0.215906) 0.396180 / 0.540337 (-0.144157) 0.401685 / 1.386936 (-0.985251)
PyArrow==latest
Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.009997 / 0.011353 (-0.001356) 0.004692 / 0.011008 (-0.006316) 0.035572 / 0.038508 (-0.002937) 0.041748 / 0.023109 (0.018639) 0.357848 / 0.275898 (0.081950) 0.393129 / 0.323480 (0.069649) 0.007481 / 0.007986 (-0.000505) 0.004120 / 0.004328 (-0.000208) 0.008701 / 0.004250 (0.004450) 0.044800 / 0.037052 (0.007747) 0.338108 / 0.258489 (0.079619) 0.413437 / 0.293841 (0.119596) 0.036942 / 0.128546 (-0.091604) 0.011397 / 0.075646 (-0.064249) 0.303434 / 0.419271 (-0.115837) 0.060389 / 0.043533 (0.016856) 0.345287 / 0.255139 (0.090148) 0.375926 / 0.283200 (0.092726) 0.105444 / 0.141683 (-0.036238) 2.173531 / 1.452155 (0.721376) 2.244764 / 1.492716 (0.752048)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.276130 / 0.018006 (0.258124) 0.493970 / 0.000490 (0.493480) 0.001463 / 0.000200 (0.001263) 0.000088 / 0.000054 (0.000033)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.033008 / 0.037411 (-0.004403) 0.131170 / 0.014526 (0.116644) 0.140870 / 0.176557 (-0.035686) 0.186784 / 0.737135 (-0.550351) 0.138909 / 0.296338 (-0.157430)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.504122 / 0.215209 (0.288913) 5.047875 / 2.077655 (2.970221) 2.295729 / 1.504120 (0.791610) 2.061735 / 1.541195 (0.520540) 2.135309 / 1.468490 (0.666818) 0.510932 / 4.584777 (-4.073845) 5.870312 / 3.745712 (2.124600) 2.630387 / 5.269862 (-2.639474) 1.180163 / 4.565676 (-3.385514) 0.071784 / 0.424275 (-0.352492) 0.014532 / 0.007607 (0.006925) 0.648587 / 0.226044 (0.422543) 6.576306 / 2.268929 (4.307378) 2.932580 / 55.444624 (-52.512044) 2.524585 / 6.876477 (-4.351892) 2.692967 / 2.142072 (0.550894) 0.711308 / 4.805227 (-4.093920) 0.148001 / 6.500664 (-6.352663) 0.074677 / 0.075469 (-0.000792)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.939034 / 1.841788 (0.097246) 17.093990 / 8.074308 (9.019682) 31.813877 / 10.191392 (21.622485) 0.999208 / 0.680424 (0.318784) 0.626231 / 0.534201 (0.092030) 0.577149 / 0.579283 (-0.002134) 0.633475 / 0.434364 (0.199111) 0.384573 / 0.540337 (-0.155764) 0.405747 / 1.386936 (-0.981189)

CML watermark

Please sign in to comment.