Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add a numba demo for hist fill #293

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dev-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ dependencies:
- uncertainties >=3
- mplhep>=0.1.27
- histoprint>=2.2.0
- rich
- pip:
- -e .
317 changes: 317 additions & 0 deletions docs/examples/HistNumbaFill.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hist Design Prototype"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is `fill` method in python loop:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import numba as nb\n",
"from hist import Hist\n",
"from hist import axis\n",
"\n",
"array = np.random.randn(10000)\n",
"h = Hist.new.Reg(100, -3, 3, name=\"x\", label=\"x-axis\").Double()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Numba: Hist"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To extend the Numba, we first need to create a Hist type `HistType` for `Hist`, and then teach Numba about our type inference additions:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from numba import types\n",
"import numba as nb\n",
"\n",
"# create Numba type\n",
"class HistType(types.Type):\n",
" arraytype = nb.types.Array(nb.types.float64, 1, \"C\")\n",
"\n",
" def __init__(self):\n",
" super().__init__(name=\"Hist\")\n",
"\n",
"\n",
"hist_type = HistType()\n",
"\n",
"# infer values\n",
"@nb.extending.typeof_impl.register(Hist)\n",
"def typeof_index(val, c):\n",
" return hist_type\n",
"\n",
"\n",
"# infer annotations\n",
"nb.extending.as_numba_type.register(Hist, hist_type)\n",
"\n",
"# infer operations\n",
"@nb.extending.type_callable(Hist)\n",
"def type_hist(context):\n",
" def typer(axes):\n",
" for ax in axes:\n",
" # TODO: Assumed all are Regular axes\n",
" if not (isinstance(ax, hist.axis.Regular)):\n",
" return None\n",
" return HistType\n",
"\n",
" return typer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We also need to teach Numba how to actually generate native representation for the new operations:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numba as nb\n",
"from numba.core import cgutils\n",
"from numba.extending import (\n",
" models,\n",
" overload_attribute,\n",
" lower_builtin,\n",
" NativeValue,\n",
")\n",
"\n",
"# define data model\n",
"@nb.extending.register_model(HistType)\n",
"class HistModel(models.StructModel):\n",
" def __init__(self, dmm, fe_type):\n",
" members = [\n",
" (\"bins\", types.int64),\n",
" (\"lo\", types.float64),\n",
" (\"hi\", types.float64),\n",
" (\"data\", fe_type.arraytype),\n",
" ]\n",
" super().__init__(dmm, fe_type, members)\n",
"\n",
"\n",
"# expose attributes, porperties and constructors\n",
"nb.extending.make_attribute_wrapper(HistType, \"bins\", \"bins\")\n",
"nb.extending.make_attribute_wrapper(HistType, \"lo\", \"lo\")\n",
"nb.extending.make_attribute_wrapper(HistType, \"hi\", \"hi\")\n",
"nb.extending.make_attribute_wrapper(HistType, \"data\", \"data\")\n",
"\n",
"\n",
"@nb.extending.lower_builtin(Hist, types.Integer, types.Float, types.Float, types.Array)\n",
"def impl_h(context, builder, sig, args):\n",
" typ = sig.return_type\n",
" lo, hi, bins, data = args\n",
" h = cgutils.create_struct_proxy(typ)(context, builder)\n",
" h.lo = lo\n",
" h.hi = hi\n",
" h.bins = bins\n",
" h.data = data\n",
" return h._getvalue()\n",
"\n",
"\n",
"# unbox and box\n",
"@nb.extending.unbox(HistType)\n",
"def unbox_h(typ, obj, c):\n",
" # lower = h.axes[0][0][0]\n",
" # upper = h.axes[0][-1][-1]\n",
" # bins = h.axes[0].__len__(self)\n",
" # data = h.values()\n",
"\n",
" start_obj = c.pyapi.long_from_long(c.context.get_constant(nb.long_, 0))\n",
" stop_obj = c.pyapi.long_from_long(c.context.get_constant(nb.long_, -1))\n",
"\n",
" data_obj = c.pyapi.call_method(obj, \"values\")\n",
"\n",
" axis_tuple_obj = c.pyapi.object_getattr_string(obj, \"axes\")\n",
" axis_obj = c.pyapi.tuple_getitem(axis_tuple_obj, 0)\n",
" bins_obj = c.pyapi.call_method(axis_obj, \"__len__\")\n",
"\n",
" lo1_obj = c.pyapi.object_getitem(axis_obj, start_obj)\n",
" hi1_obj = c.pyapi.object_getitem(axis_obj, stop_obj)\n",
"\n",
" lo_obj = c.pyapi.tuple_getitem(lo1_obj, 0)\n",
" hi_obj = c.pyapi.object_getitem(hi1_obj, stop_obj)\n",
"\n",
" h = cgutils.create_struct_proxy(typ)(c.context, c.builder)\n",
"\n",
" h.bins = c.pyapi.number_as_ssize_t(bins_obj)\n",
" h.lo = c.pyapi.float_as_double(lo_obj)\n",
" h.hi = c.pyapi.float_as_double(hi_obj)\n",
" h.data = c.pyapi.to_native_value(typ.arraytype, data_obj).value\n",
"\n",
" c.pyapi.decref(bins_obj)\n",
" c.pyapi.decref(lo_obj)\n",
" c.pyapi.decref(hi_obj)\n",
" c.pyapi.decref(data_obj)\n",
"\n",
" c.pyapi.decref(lo1_obj)\n",
" c.pyapi.decref(hi1_obj)\n",
"\n",
" c.pyapi.decref(axis_tuple_obj)\n",
" # c.pyapi.decref(axis_obj) - no deref needed, crashes\n",
"\n",
" c.pyapi.decref(start_obj)\n",
" c.pyapi.decref(stop_obj)\n",
"\n",
" is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred())\n",
" return NativeValue(h._getvalue(), is_error=is_error)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We also need to teach numba about running the fill:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@nb.extending.overload_method(HistType, \"fill\")\n",
"def fill_resolve(hist, val):\n",
" if not isinstance(hist, HistType):\n",
" return None\n",
" if not isinstance(val, nb.types.Float):\n",
" return None\n",
"\n",
" def fill(hist, val):\n",
" delta = 1 / ((hist.hi - hist.lo) / hist.bins)\n",
" i = int((val - hist.lo) * delta)\n",
"\n",
" if 0 <= i < hist.bins:\n",
" hist.data[i] += 1\n",
"\n",
" return fill"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Timing the Python version:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%timeit\n",
"h_python = h.copy()\n",
"h_python.fill(array)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@nb.njit\n",
"def nb_fill_hist(h, v):\n",
" for v in array:\n",
" h.fill(v)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Timing the Numba version:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%timeit\n",
"h_numba = h.copy()\n",
"nb_fill_hist(h_numba, array)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Showing the results:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"h_numba = h.copy()\n",
"nb_fill_hist(h_numba, array)\n",
"h_numba"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"h_python = h.copy()\n",
"h.fill(array)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "hist",
"language": "python",
"name": "hist"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
7 changes: 7 additions & 0 deletions src/hist/basehist.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,13 @@ def plot_pie(
import hist.plot

return hist.plot.plot_pie(self, ax=ax, **kwargs)

def integrate(self, name: int | str, i_or_list: Loc | list[str | int] | None = None, j: Loc | None = None) -> Self:
if isinstance(i_or_list, list):
return self[{name: i_or_list}][{name: slice(0, len(i_or_list), sum)}]

return self[{name: slice(i_or_list, j, sum)}]


def stack(self, axis: int | str) -> hist.stack.Stack:
"""
Expand Down
14 changes: 14 additions & 0 deletions tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,3 +925,17 @@ def test_quick_construct_direct():
assert tuple(h.sort(0, key=lambda x: -x).axes[0]) == (4, 2, 1)
assert tuple(h.sort(1).axes[1]) == ("AB", "BC", "BCC")
assert tuple(h.sort(1, reverse=True).axes[1]) == ("BCC", "BC", "AB")


def test_integrate():
h = (
hist.new.IntCat([4, 1, 2], name="x")
.StrCat(["AB", "BCC", "BC"], name="y")
.Int(1, 10) # To provide the start and stop values as arguments to the Int() constructor
)
h.fill(4, "AB", 1)
h.fill(4, "BCC", 2)
h.fill(4, "BC", 4)
h.fill(4, "X", 8)
h1 = h.integrate("y", ["AB", "BC"])
assert h1[{ "x": 4 }] == 5