Deploying to gh-pages from @ 3043f38 🚀

Bears-R-Us · Jun 13, 2024 · f1f3f53 · f1f3f53
1 parent e4d8ca5
commit f1f3f53
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 178 deletions.
diff --git a/_modules/arkouda/alignment.html b/_modules/arkouda/alignment.html
@@ -507,18 +507,19 @@ <h1>Source code for arkouda.alignment</h1><div class="highlight"><pre>
 <span class="sd">        occurrences as a pdarray. Defaults to only finding the first occurrence.</span>
 <span class="sd">        Finding all occurrences is not yet supported on sequences of arrays</span>
 <span class="sd">    remove_missing: bool</span>
+<span class="sd">        If all_occurrences is True, remove_missing is automatically enabled.</span>
 <span class="sd">        If False, return -1 for any items in query not found in space. If True,</span>
 <span class="sd">        remove these and only return indices of items that are found.</span>
 
 <span class="sd">    Returns</span>
 <span class="sd">    -------</span>
 <span class="sd">    indices : pdarray or SegArray</span>
-<span class="sd">        For each item in query, its index in space. If remove_missing is True,</span>
-<span class="sd">        exclued missing values otherwise return -1. If all_occurrences is False,</span>
+<span class="sd">        For each item in query, its index in space. If all_occurrences is False,</span>
 <span class="sd">        the return will be a pdarray of the first index where each value in the</span>
-<span class="sd">        query appears in the space. if all_occurrences is True, the return will be</span>
+<span class="sd">        query appears in the space. If all_occurrences is True, the return will be</span>
 <span class="sd">        a SegArray containing every index where each value in the query appears in</span>
-<span class="sd">        the space.</span>
+<span class="sd">        the space. If all_occurrences is True, remove_missing is automatically enabled.</span>
+<span class="sd">        If remove_missing is True, exclude missing values, otherwise return -1.</span>
 
 <span class="sd">    Examples</span>
 <span class="sd">    --------</span>
@@ -539,30 +540,6 @@ <h1>Source code for arkouda.alignment</h1><div class="highlight"><pre>
 <span class="sd">     &gt;&gt;&gt; ak.find(arr1, arr2, remove_missing=True)</span>
 <span class="sd">    array([0 1 2 5 8 5 11 5 0])</span>
 
-<span class="sd">    # set all_occurrences to True, the first index of each list</span>
-<span class="sd">    # is the first occurence and should match the default</span>
-<span class="sd">    &gt;&gt;&gt; ak.find(arr1, arr2, all_occurrences=True).to_list()</span>
-<span class="sd">    [[-1],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [0, 4],</span>
-<span class="sd">     [1, 3, 10],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [2, 6, 12, 13],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [5, 7],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [8, 9, 14],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [5, 7],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [-1],</span>
-<span class="sd">     [11, 15],</span>
-<span class="sd">     [5, 7],</span>
-<span class="sd">     [0, 4]]</span>
-
 <span class="sd">    # set both remove_missing and all_occurrences to True, missing values</span>
 <span class="sd">    # will be empty segments</span>
 <span class="sd">    &gt;&gt;&gt; ak.find(arr1, arr2, remove_missing=True, all_occurrences=True).to_list()</span>
@@ -614,7 +591,21 @@ <h1>Source code for arkouda.alignment</h1><div class="highlight"><pre>
     <span class="c1"># Group on terms</span>
     <span class="n">g</span> <span class="o">=</span> <span class="n">GroupBy</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
     <span class="c1"># For each term, count how many times it appears in the search space</span>
-    <span class="n">space_multiplicity</span> <span class="o">=</span> <span class="n">g</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">i</span> <span class="o">&lt;</span> <span class="n">spacesize</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
+
+    <span class="c1"># since we reuse (i &lt; spacesize)[g.permutation] later, we call sum aggregation manually</span>
+    <span class="n">less_than</span> <span class="o">=</span> <span class="p">(</span><span class="n">i</span> <span class="o">&lt;</span> <span class="n">spacesize</span><span class="p">)[</span><span class="n">g</span><span class="o">.</span><span class="n">permutation</span><span class="p">]</span>
+    <span class="n">repMsg</span> <span class="o">=</span> <span class="n">generic_msg</span><span class="p">(</span>
+        <span class="n">cmd</span><span class="o">=</span><span class="s2">&quot;segmentedReduction&quot;</span><span class="p">,</span>
+        <span class="n">args</span><span class="o">=</span><span class="p">{</span>
+            <span class="s2">&quot;values&quot;</span><span class="p">:</span> <span class="n">less_than</span><span class="p">,</span>
+            <span class="s2">&quot;segments&quot;</span><span class="p">:</span> <span class="n">g</span><span class="o">.</span><span class="n">segments</span><span class="p">,</span>
+            <span class="s2">&quot;op&quot;</span><span class="p">:</span> <span class="s2">&quot;sum&quot;</span><span class="p">,</span>
+            <span class="s2">&quot;skip_nan&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
+            <span class="s2">&quot;ddof&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
+        <span class="p">},</span>
+    <span class="p">)</span>
+
+    <span class="n">space_multiplicity</span> <span class="o">=</span> <span class="n">create_pdarray</span><span class="p">(</span><span class="n">repMsg</span><span class="p">)</span>
     <span class="n">has_duplicates</span> <span class="o">=</span> <span class="p">(</span><span class="n">space_multiplicity</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
     <span class="c1"># handle duplicate terms in space</span>
     <span class="k">if</span> <span class="n">has_duplicates</span><span class="p">:</span>
@@ -624,28 +615,13 @@ <h1>Source code for arkouda.alignment</h1><div class="highlight"><pre>
 
             <span class="kn">from</span> <span class="nn">arkouda.segarray</span> <span class="kn">import</span> <span class="n">SegArray</span>
 
-            <span class="c1"># use segmented mink to select space_multiplicity number of elements</span>
-            <span class="c1"># and create a segarray which contains all the indices</span>
-            <span class="c1"># in our query space, instead of just the min for each segment</span>
-
-            <span class="c1"># only calculate where to place the negatives if remove_missing is false</span>
-            <span class="n">negative_at</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span> <span class="k">if</span> <span class="n">remove_missing</span> <span class="k">else</span> <span class="n">space_multiplicity</span> <span class="o">==</span> <span class="mi">0</span>
-            <span class="n">repMsg</span> <span class="o">=</span> <span class="n">generic_msg</span><span class="p">(</span>
-                <span class="n">cmd</span><span class="o">=</span><span class="s2">&quot;segmentedExtremaK&quot;</span><span class="p">,</span>
-                <span class="n">args</span><span class="o">=</span><span class="p">{</span>
-                    <span class="s2">&quot;vals&quot;</span><span class="p">:</span> <span class="n">i</span><span class="p">[</span><span class="n">g</span><span class="o">.</span><span class="n">permutation</span><span class="p">],</span>
-                    <span class="s2">&quot;segs&quot;</span><span class="p">:</span> <span class="n">g</span><span class="o">.</span><span class="n">segments</span><span class="p">,</span>
-                    <span class="s2">&quot;segLens&quot;</span><span class="p">:</span> <span class="n">g</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">1</span><span class="p">],</span>
-                    <span class="s2">&quot;kArray&quot;</span><span class="p">:</span> <span class="n">space_multiplicity</span><span class="p">,</span>
-                    <span class="s2">&quot;isMin&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
-                    <span class="s2">&quot;removeMissing&quot;</span><span class="p">:</span> <span class="n">remove_missing</span><span class="p">,</span>
-                    <span class="s2">&quot;negativeAt&quot;</span><span class="p">:</span> <span class="n">negative_at</span><span class="p">,</span>
-                <span class="p">},</span>
-            <span class="p">)</span>
-            <span class="n">min_k_vals</span> <span class="o">=</span> <span class="n">create_pdarray</span><span class="p">(</span><span class="n">repMsg</span><span class="p">)</span>
+            <span class="c1"># create a segarray which contains all the indices from query</span>
+            <span class="c1"># in our search space, instead of just the min for each segment</span>
+
+            <span class="c1"># im not completely convinced there&#39;s not a better way to get this given the</span>
+            <span class="c1"># amount of structure but this is not the bottleneck of the computation anymore</span>
+            <span class="n">min_k_vals</span> <span class="o">=</span> <span class="n">i</span><span class="p">[</span><span class="n">g</span><span class="o">.</span><span class="n">permutation</span><span class="p">][</span><span class="n">less_than</span><span class="p">]</span>
             <span class="n">seg_idx</span> <span class="o">=</span> <span class="n">g</span><span class="o">.</span><span class="n">broadcast</span><span class="p">(</span><span class="n">arange</span><span class="p">(</span><span class="n">g</span><span class="o">.</span><span class="n">segments</span><span class="o">.</span><span class="n">size</span><span class="p">))[</span><span class="n">i</span> <span class="o">&gt;=</span> <span class="n">spacesize</span><span class="p">]</span>
-            <span class="k">if</span> <span class="ow">not</span> <span class="n">remove_missing</span><span class="p">:</span>
-                <span class="n">space_multiplicity</span> <span class="o">+=</span> <span class="n">negative_at</span>
             <span class="n">min_k_segs</span> <span class="o">=</span> <span class="n">cumsum</span><span class="p">(</span><span class="n">space_multiplicity</span><span class="p">)</span> <span class="o">-</span> <span class="n">space_multiplicity</span>
             <span class="n">sa</span> <span class="o">=</span> <span class="n">SegArray</span><span class="p">(</span><span class="n">min_k_segs</span><span class="p">,</span> <span class="n">min_k_vals</span><span class="p">)</span>
             <span class="k">return</span> <span class="n">sa</span><span class="p">[</span><span class="n">seg_idx</span><span class="p">]</span>

diff --git a/_modules/arkouda/pdarraysetops.html b/_modules/arkouda/pdarraysetops.html
@@ -641,7 +641,6 @@ <h1>Source code for arkouda.pdarraysetops</h1><div class="highlight"><pre>
 <span class="sd">    RuntimeError</span>
 <span class="sd">        Raised if the dtype of either array is not supported</span>
 <span class="sd">    &quot;&quot;&quot;</span>
-    <span class="c1"># from arkouda.alignment import find as akfind</span>
     <span class="kn">from</span> <span class="nn">arkouda.categorical</span> <span class="kn">import</span> <span class="n">Categorical</span> <span class="k">as</span> <span class="n">Categorical_</span>
 
     <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="p">(</span><span class="n">pdarray</span><span class="p">,</span> <span class="n">Strings</span><span class="p">,</span> <span class="n">Categorical_</span><span class="p">)):</span>
@@ -650,16 +649,17 @@ <h1>Source code for arkouda.pdarraysetops</h1><div class="highlight"><pre>
         <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">pdarray</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">space</span><span class="p">,</span> <span class="n">pdarray</span><span class="p">):</span>
             <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;If keys is pdarray, arr must also be pdarray&quot;</span><span class="p">)</span>
 
-    <span class="n">repMsg</span> <span class="o">=</span> <span class="n">generic_msg</span><span class="p">(</span>
-        <span class="n">cmd</span><span class="o">=</span><span class="s2">&quot;indexof1d&quot;</span><span class="p">,</span>
-        <span class="n">args</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;keys&quot;</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span> <span class="s2">&quot;arr&quot;</span><span class="p">:</span> <span class="n">space</span><span class="p">},</span>
-    <span class="p">)</span>
-    <span class="k">return</span> <span class="n">create_pdarray</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">repMsg</span><span class="p">))</span></div>
+    <span class="c1"># repMsg = generic_msg(</span>
+    <span class="c1">#     cmd=&quot;indexof1d&quot;,</span>
+    <span class="c1">#     args={&quot;keys&quot;: query, &quot;arr&quot;: space},</span>
+    <span class="c1"># )</span>
+    <span class="c1"># return create_pdarray(cast(str, repMsg))</span>
+
+    <span class="kn">from</span> <span class="nn">arkouda.alignment</span> <span class="kn">import</span> <span class="n">find</span> <span class="k">as</span> <span class="n">akfind</span>
 
+    <span class="n">found</span> <span class="o">=</span> <span class="n">akfind</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">space</span><span class="p">,</span> <span class="n">all_occurrences</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">remove_missing</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+    <span class="k">return</span> <span class="n">found</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">found</span><span class="p">,</span> <span class="n">pdarray</span><span class="p">)</span> <span class="k">else</span> <span class="n">found</span><span class="o">.</span><span class="n">values</span></div>
 
-    <span class="c1"># TODO see issue #3229 reverted back to old implementation until we can investigate</span>
-    <span class="c1"># found = akfind(query, space, all_occurrences=True, remove_missing=True)</span>
-    <span class="c1"># return found if isinstance(found, pdarray) else found.values</span>
 
 
 <span class="c1"># fmt: off</span>

diff --git a/_sources/autoapi/arkouda/alignment/index.rst.txt b/_sources/autoapi/arkouda/alignment/index.rst.txt
@@ -3780,16 +3780,17 @@ Module Contents
                            occurrences as a pdarray. Defaults to only finding the first occurrence.
                            Finding all occurrences is not yet supported on sequences of arrays
    :type all_occurrences: bool
-   :param remove_missing: If False, return -1 for any items in query not found in space. If True,
+   :param remove_missing: If all_occurrences is True, remove_missing is automatically enabled.
+                          If False, return -1 for any items in query not found in space. If True,
                           remove these and only return indices of items that are found.
    :type remove_missing: bool
 
-   :returns: **indices** -- For each item in query, its index in space. If remove_missing is True,
-             exclued missing values otherwise return -1. If all_occurrences is False,
+   :returns: **indices** -- For each item in query, its index in space. If all_occurrences is False,
              the return will be a pdarray of the first index where each value in the
-             query appears in the space. if all_occurrences is True, the return will be
+             query appears in the space. If all_occurrences is True, the return will be
              a SegArray containing every index where each value in the query appears in
-             the space.
+             the space. If all_occurrences is True, remove_missing is automatically enabled.
+             If remove_missing is True, exclude missing values, otherwise return -1.
    :rtype: pdarray or SegArray
 
    .. rubric:: Examples
@@ -3811,30 +3812,6 @@ Module Contents
     >>> ak.find(arr1, arr2, remove_missing=True)
    array([0 1 2 5 8 5 11 5 0])
 
-   # set all_occurrences to True, the first index of each list
-   # is the first occurence and should match the default
-   >>> ak.find(arr1, arr2, all_occurrences=True).to_list()
-   [[-1],
-    [-1],
-    [-1],
-    [0, 4],
-    [1, 3, 10],
-    [-1],
-    [-1],
-    [-1],
-    [2, 6, 12, 13],
-    [-1],
-    [5, 7],
-    [-1],
-    [8, 9, 14],
-    [-1],
-    [5, 7],
-    [-1],
-    [-1],
-    [11, 15],
-    [5, 7],
-    [0, 4]]
-
    # set both remove_missing and all_occurrences to True, missing values
    # will be empty segments
    >>> ak.find(arr1, arr2, remove_missing=True, all_occurrences=True).to_list()

diff --git a/_sources/autoapi/arkouda/index.rst.txt b/_sources/autoapi/arkouda/index.rst.txt
@@ -29841,16 +29841,17 @@ Package Contents
                            occurrences as a pdarray. Defaults to only finding the first occurrence.
                            Finding all occurrences is not yet supported on sequences of arrays
    :type all_occurrences: bool
-   :param remove_missing: If False, return -1 for any items in query not found in space. If True,
+   :param remove_missing: If all_occurrences is True, remove_missing is automatically enabled.
+                          If False, return -1 for any items in query not found in space. If True,
                           remove these and only return indices of items that are found.
    :type remove_missing: bool
 
-   :returns: **indices** -- For each item in query, its index in space. If remove_missing is True,
-             exclued missing values otherwise return -1. If all_occurrences is False,
+   :returns: **indices** -- For each item in query, its index in space. If all_occurrences is False,
              the return will be a pdarray of the first index where each value in the
-             query appears in the space. if all_occurrences is True, the return will be
+             query appears in the space. If all_occurrences is True, the return will be
              a SegArray containing every index where each value in the query appears in
-             the space.
+             the space. If all_occurrences is True, remove_missing is automatically enabled.
+             If remove_missing is True, exclude missing values, otherwise return -1.
    :rtype: pdarray or SegArray
 
    .. rubric:: Examples
@@ -29872,30 +29873,6 @@ Package Contents
     >>> ak.find(arr1, arr2, remove_missing=True)
    array([0 1 2 5 8 5 11 5 0])
 
-   # set all_occurrences to True, the first index of each list
-   # is the first occurence and should match the default
-   >>> ak.find(arr1, arr2, all_occurrences=True).to_list()
-   [[-1],
-    [-1],
-    [-1],
-    [0, 4],
-    [1, 3, 10],
-    [-1],
-    [-1],
-    [-1],
-    [2, 6, 12, 13],
-    [-1],
-    [5, 7],
-    [-1],
-    [8, 9, 14],
-    [-1],
-    [5, 7],
-    [-1],
-    [-1],
-    [11, 15],
-    [5, 7],
-    [0, 4]]
-
    # set both remove_missing and all_occurrences to True, missing values
    # will be empty segments
    >>> ak.find(arr1, arr2, remove_missing=True, all_occurrences=True).to_list()