scikit-hep · LovelyBuggies · Jun 3, 2021 · Aug 4, 2021 · Aug 6, 2021 · Aug 7, 2021
diff --git a/dev-environment.yml b/dev-environment.yml
@@ -19,5 +19,6 @@ dependencies:
  - uncertainties >=3
  - mplhep>=0.1.27
  - histoprint>=2.2.0
+ - rich
  - pip:
  - -e .
diff --git a/docs/examples/HistNumbaFill.ipynb b/docs/examples/HistNumbaFill.ipynb
@@ -0,0 +1,317 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Hist Design Prototype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is `fill` method in python loop:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import numba as nb\n",
+ "from hist import Hist\n",
+ "from hist import axis\n",
+ "\n",
+ "array = np.random.randn(10000)\n",
+ "h = Hist.new.Reg(100, -3, 3, name=\"x\", label=\"x-axis\").Double()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Numba: Hist"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To extend the Numba, we first need to create a Hist type `HistType` for `Hist`, and then teach Numba about our type inference additions:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from numba import types\n",
+ "import numba as nb\n",
+ "\n",
+ "# create Numba type\n",
+ "class HistType(types.Type):\n",
+ " arraytype = nb.types.Array(nb.types.float64, 1, \"C\")\n",
+ "\n",
+ " def __init__(self):\n",
+ " super().__init__(name=\"Hist\")\n",
+ "\n",
+ "\n",
+ "hist_type = HistType()\n",
+ "\n",
+ "# infer values\n",
+ "@nb.extending.typeof_impl.register(Hist)\n",
+ "def typeof_index(val, c):\n",
+ " return hist_type\n",
+ "\n",
+ "\n",
+ "# infer annotations\n",
+ "nb.extending.as_numba_type.register(Hist, hist_type)\n",
+ "\n",
+ "# infer operations\n",
+ "@nb.extending.type_callable(Hist)\n",
+ "def type_hist(context):\n",
+ " def typer(axes):\n",
+ " for ax in axes:\n",
+ " # TODO: Assumed all are Regular axes\n",
+ " if not (isinstance(ax, hist.axis.Regular)):\n",
+ " return None\n",
+ " return HistType\n",
+ "\n",
+ " return typer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We also need to teach Numba how to actually generate native representation for the new operations:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numba as nb\n",
+ "from numba.core import cgutils\n",
+ "from numba.extending import (\n",
+ " models,\n",
+ " overload_attribute,\n",
+ " lower_builtin,\n",
+ " NativeValue,\n",
+ ")\n",
+ "\n",
+ "# define data model\n",
+ "@nb.extending.register_model(HistType)\n",
+ "class HistModel(models.StructModel):\n",
+ " def __init__(self, dmm, fe_type):\n",
+ " members = [\n",
+ " (\"bins\", types.int64),\n",
+ " (\"lo\", types.float64),\n",
+ " (\"hi\", types.float64),\n",
+ " (\"data\", fe_type.arraytype),\n",
+ " ]\n",
+ " super().__init__(dmm, fe_type, members)\n",
+ "\n",
+ "\n",
+ "# expose attributes, porperties and constructors\n",
+ "nb.extending.make_attribute_wrapper(HistType, \"bins\", \"bins\")\n",
+ "nb.extending.make_attribute_wrapper(HistType, \"lo\", \"lo\")\n",
+ "nb.extending.make_attribute_wrapper(HistType, \"hi\", \"hi\")\n",
+ "nb.extending.make_attribute_wrapper(HistType, \"data\", \"data\")\n",
+ "\n",
+ "\n",
+ "@nb.extending.lower_builtin(Hist, types.Integer, types.Float, types.Float, types.Array)\n",
+ "def impl_h(context, builder, sig, args):\n",
+ " typ = sig.return_type\n",
+ " lo, hi, bins, data = args\n",
+ " h = cgutils.create_struct_proxy(typ)(context, builder)\n",
+ " h.lo = lo\n",
+ " h.hi = hi\n",
+ " h.bins = bins\n",
+ " h.data = data\n",
+ " return h._getvalue()\n",
+ "\n",
+ "\n",
+ "# unbox and box\n",
+ "@nb.extending.unbox(HistType)\n",
+ "def unbox_h(typ, obj, c):\n",
+ " # lower = h.axes[0][0][0]\n",
+ " # upper = h.axes[0][-1][-1]\n",
+ " # bins = h.axes[0].__len__(self)\n",
+ " # data = h.values()\n",
+ "\n",
+ " start_obj = c.pyapi.long_from_long(c.context.get_constant(nb.long_, 0))\n",
+ " stop_obj = c.pyapi.long_from_long(c.context.get_constant(nb.long_, -1))\n",
+ "\n",
+ " data_obj = c.pyapi.call_method(obj, \"values\")\n",
+ "\n",
+ " axis_tuple_obj = c.pyapi.object_getattr_string(obj, \"axes\")\n",
+ " axis_obj = c.pyapi.tuple_getitem(axis_tuple_obj, 0)\n",
+ " bins_obj = c.pyapi.call_method(axis_obj, \"__len__\")\n",
+ "\n",
+ " lo1_obj = c.pyapi.object_getitem(axis_obj, start_obj)\n",
+ " hi1_obj = c.pyapi.object_getitem(axis_obj, stop_obj)\n",
+ "\n",
+ " lo_obj = c.pyapi.tuple_getitem(lo1_obj, 0)\n",
+ " hi_obj = c.pyapi.object_getitem(hi1_obj, stop_obj)\n",
+ "\n",
+ " h = cgutils.create_struct_proxy(typ)(c.context, c.builder)\n",
+ "\n",
+ " h.bins = c.pyapi.number_as_ssize_t(bins_obj)\n",
+ " h.lo = c.pyapi.float_as_double(lo_obj)\n",
+ " h.hi = c.pyapi.float_as_double(hi_obj)\n",
+ " h.data = c.pyapi.to_native_value(typ.arraytype, data_obj).value\n",
+ "\n",
+ " c.pyapi.decref(bins_obj)\n",
+ " c.pyapi.decref(lo_obj)\n",
+ " c.pyapi.decref(hi_obj)\n",
+ " c.pyapi.decref(data_obj)\n",
+ "\n",
+ " c.pyapi.decref(lo1_obj)\n",
+ " c.pyapi.decref(hi1_obj)\n",
+ "\n",
+ " c.pyapi.decref(axis_tuple_obj)\n",
+ " # c.pyapi.decref(axis_obj) - no deref needed, crashes\n",
+ "\n",
+ " c.pyapi.decref(start_obj)\n",
+ " c.pyapi.decref(stop_obj)\n",
+ "\n",
+ " is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred())\n",
+ " return NativeValue(h._getvalue(), is_error=is_error)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We also need to teach numba about running the fill:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@nb.extending.overload_method(HistType, \"fill\")\n",
+ "def fill_resolve(hist, val):\n",
+ " if not isinstance(hist, HistType):\n",
+ " return None\n",
+ " if not isinstance(val, nb.types.Float):\n",
+ " return None\n",
+ "\n",
+ " def fill(hist, val):\n",
+ " delta = 1 / ((hist.hi - hist.lo) / hist.bins)\n",
+ " i = int((val - hist.lo) * delta)\n",
+ "\n",
+ " if 0 <= i < hist.bins:\n",
+ " hist.data[i] += 1\n",
+ "\n",
+ " return fill"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Timing the Python version:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%timeit\n",
+ "h_python = h.copy()\n",
+ "h_python.fill(array)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@nb.njit\n",
+ "def nb_fill_hist(h, v):\n",
+ " for v in array:\n",
+ " h.fill(v)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Timing the Numba version:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%timeit\n",
+ "h_numba = h.copy()\n",
+ "nb_fill_hist(h_numba, array)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Showing the results:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "h_numba = h.copy()\n",
+ "nb_fill_hist(h_numba, array)\n",
+ "h_numba"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "h_python = h.copy()\n",
+ "h.fill(array)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "hist",
+ "language": "python",
+ "name": "hist"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/src/hist/basehist.py b/src/hist/basehist.py
@@ -500,6 +500,13 @@ def plot_pie(
  import hist.plot
 
  return hist.plot.plot_pie(self, ax=ax, **kwargs)
+
+ def integrate(self, name: int | str, i_or_list: Loc | list[str | int] | None = None, j: Loc | None = None) -> Self:
+ if isinstance(i_or_list, list):
+ return self[{name: i_or_list}][{name: slice(0, len(i_or_list), sum)}]
+
+ return self[{name: slice(i_or_list, j, sum)}]
+
 
  def stack(self, axis: int | str) -> hist.stack.Stack:
  """

diff --git a/tests/test_general.py b/tests/test_general.py
@@ -925,3 +925,17 @@ def test_quick_construct_direct():
  assert tuple(h.sort(0, key=lambda x: -x).axes[0]) == (4, 2, 1)
  assert tuple(h.sort(1).axes[1]) == ("AB", "BC", "BCC")
  assert tuple(h.sort(1, reverse=True).axes[1]) == ("BCC", "BC", "AB")
+
+
+def test_integrate():
+ h = (
+ hist.new.IntCat([4, 1, 2], name="x")
+ .StrCat(["AB", "BCC", "BC"], name="y")
+ .Int(1, 10) # To provide the start and stop values as arguments to the Int() constructor
+ )
+ h.fill(4, "AB", 1)
+ h.fill(4, "BCC", 2)
+ h.fill(4, "BC", 4)
+ h.fill(4, "X", 8)
+ h1 = h.integrate("y", ["AB", "BC"])
+ assert h1[{ "x": 4 }] == 5