Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: One-pass column optimization #491

Draft
wants to merge 33 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b18b3af
start
martindurant Mar 28, 2024
3cdc778
Remove unused
martindurant Mar 28, 2024
c57693c
Pass reports around
martindurant Apr 4, 2024
0c367df
remember to commit
martindurant Apr 5, 2024
9c654b1
first working (for parquet)
martindurant Apr 9, 2024
e6c5ce3
Merge branch 'main' into one-pass
martindurant Apr 11, 2024
f56f77a
stop
martindurant Apr 12, 2024
75b4416
most pass
martindurant Apr 15, 2024
a5c319d
fix most
martindurant Apr 18, 2024
243fbc1
probably better
martindurant Apr 19, 2024
6eebf87
Reinstate necessary_columns (needs doc)
martindurant Apr 22, 2024
8bbe409
Merge branch 'main' into one-pass
martindurant Apr 22, 2024
43b2e43
pass buffer names around, not columns
martindurant Apr 25, 2024
e5828fb
Clear cache between tests
martindurant May 13, 2024
df0cf2f
Merge branch 'main' into one-pass
martindurant May 13, 2024
b593f87
Another one squashed
martindurant May 21, 2024
e410b61
Squash errors that only show when uproot.dask and hist.dask are insta…
martindurant May 22, 2024
cd537e2
fix uproot
martindurant May 23, 2024
baf6f46
fix report
martindurant May 27, 2024
e29e929
if meta fails
martindurant Jun 6, 2024
f61fdd7
rev
martindurant Jul 23, 2024
2c3abd0
concat enforce condition
martindurant Jul 24, 2024
04abbc8
temp
martindurant Jul 29, 2024
d876f00
squached some
martindurant Jul 30, 2024
8e1d507
add note
martindurant Jul 30, 2024
d1922ab
Merge branch 'main' into one-pass
martindurant Jul 30, 2024
fc9589b
Fix concat form comparison
martindurant Jul 31, 2024
961dd0c
one more squashed
martindurant Jul 31, 2024
c8b254b
fix IO report
martindurant Aug 2, 2024
d71e789
Merge branch 'main' into one-pass
pfackeldey Dec 13, 2024
d97292c
simplify loop to populate touched columns from all_layers
pfackeldey Dec 13, 2024
4e70827
Merge pull request #2 from pfackeldey/one-pass
martindurant Dec 13, 2024
25ff417
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 63 additions & 67 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,40 @@ build-backend = "hatchling.build"
name = "dask-awkward"
description = "Awkward Array meets Dask"
readme = "README.md"
license = {text = "BSD-3-Clause"}
license = { text = "BSD-3-Clause" }
requires-python = ">=3.8"
authors = [
{ name = "Doug Davis", email = "ddavis@ddavis.io" },
{ name = "Martin Durant", email = "mdurant@anaconda.com" },
{ name = "Doug Davis", email = "ddavis@ddavis.io" },
{ name = "Martin Durant", email = "mdurant@anaconda.com" },
]
maintainers = [
{ name = "Doug Davis", email = "ddavis@ddavis.io" },
{ name = "Martin Durant", email = "mdurant@anaconda.com" },
{ name = "Doug Davis", email = "ddavis@ddavis.io" },
{ name = "Martin Durant", email = "mdurant@anaconda.com" },
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Scientific/Engineering :: Physics",
"Topic :: Software Development",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Scientific/Engineering :: Physics",
"Topic :: Software Development",
]
dependencies = [
"awkward >=2.5.1",
"dask >=2023.04.0",
"cachetools",
"typing_extensions >=4.8.0",
"awkward >=2.6.7",
"dask >=2023.04.0",
"cachetools",
"typing_extensions >=4.8.0",
]
dynamic = ["version"]

Expand All @@ -48,33 +48,29 @@ Homepage = "https://github.com/dask-contrib/dask-awkward"
"Bug Tracker" = "https://github.com/dask-contrib/dask-awkward/issues"

[project.optional-dependencies]
io = [
"pyarrow",
]
complete = [
"dask-awkward[io]",
]
io = ["pyarrow"]
complete = ["dask-awkward[io]"]
# `docs` and `test` are separate from user installs
docs = [
"dask-awkward[complete]",
"sphinx-book-theme",
"sphinx-design",
"sphinx-codeautolink",
# broken see PR 451
# "dask-sphinx-theme",
"dask-awkward[complete]",
"sphinx-book-theme",
"sphinx-design",
"sphinx-codeautolink",
# broken see PR 451
# "dask-sphinx-theme",
]
test = [
"aiohttp;python_version<\"3.12\"",
"dask[dataframe]",
"dask-awkward[complete]",
"dask-histogram",
"distributed",
"hist",
"pandas",
"pytest >=6.0,<8",
"pytest-cov >=3.0.0",
"requests",
"uproot >=5.1.0",
"aiohttp;python_version<\"3.12\"",
"dask[dataframe]",
"dask-awkward[complete]",
"dask-histogram",
"distributed",
"hist",
"pandas",
"pytest >=6.0,<8",
"pytest-cov >=3.0.0",
"requests",
"uproot >=5.1.0",
]

[project.entry-points."dask.sizeof"]
Expand Down Expand Up @@ -130,17 +126,17 @@ warn_unused_ignores = true
warn_unreachable = true

[[tool.mypy.overrides]]
module = [
module = [
"awkward.*",
"IPython.*",
"fsspec.*",
"pyarrow.*",
"tlz.*",
"uproot.*",
"cloudpickle.*",
"cachetools.*"
]
ignore_missing_imports = true
"cachetools.*",
]
ignore_missing_imports = true

[tool.pyright]
include = ["src"]
Expand All @@ -149,27 +145,27 @@ reportPrivateImportUsage = false

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:",
"except ImportError:",
"NotImplementedError",
"DaskAwkwardNotImplemented",
"_ipython_key_completions_",
"Only highlevel=True is supported",
"\\.\\.\\.$",
"pragma: no cover",
"if TYPE_CHECKING:",
"except ImportError:",
"NotImplementedError",
"DaskAwkwardNotImplemented",
"_ipython_key_completions_",
"Only highlevel=True is supported",
"\\.\\.\\.$",
]
fail_under = 90
show_missing = true

[tool.coverage.run]
omit = [
"*/dask_awkward/lib/unproject_layout.py",
"*/tests/test_*.py",
"*/tests/__init__.py",
"*/version.py",
"*/dask_awkward/lib/unproject_layout.py",
"*/tests/test_*.py",
"*/tests/__init__.py",
"*/version.py",
]
source = ["src/"]

[tool.ruff]
ignore = ["E501", "E402"]
per-file-ignores = {"__init__.py" = ["E402", "F401"]}
lint.ignore = ["E501", "E402"]
lint.per-file-ignores = { "__init__.py" = ["E402", "F401"] }
10 changes: 2 additions & 8 deletions src/dask_awkward/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,7 @@
partition_compatibility,
)
from dask_awkward.lib.describe import backend, fields
from dask_awkward.lib.inspect import (
report_necessary_buffers,
report_necessary_columns,
sample,
)

necessary_columns = report_necessary_columns # Export for backwards compatibility.

from dask_awkward.lib.inspect import sample
from dask_awkward.lib.io.io import (
from_awkward,
from_dask_array,
Expand All @@ -42,6 +35,7 @@
from dask_awkward.lib.io.parquet import from_parquet, to_parquet
from dask_awkward.lib.io.text import from_text
from dask_awkward.lib.operations import concatenate
from dask_awkward.lib.optimize import necessary_columns
from dask_awkward.lib.reducers import (
all,
any,
Expand Down
4 changes: 0 additions & 4 deletions src/dask_awkward/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
AwkwardMaterializedLayer,
AwkwardTreeReductionLayer,
ImplementsIOFunction,
ImplementsProjection,
IOFunctionWithMocking,
io_func_implements_projection,
)

Expand All @@ -14,8 +12,6 @@
"AwkwardBlockwiseLayer",
"AwkwardMaterializedLayer",
"AwkwardTreeReductionLayer",
"ImplementsProjection",
"ImplementsIOFunction",
"IOFunctionWithMocking",
"io_func_implements_projection",
)
Loading
Loading