From dcedc30816f7194aa91fc46122b0e5de5dab1f94 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jul 2023 12:14:00 -0700 Subject: [PATCH 1/3] DOC: Supress setups less in user guide --- doc/source/user_guide/advanced.rst | 5 ----- doc/source/user_guide/basics.rst | 25 +------------------------ doc/source/user_guide/groupby.rst | 15 +-------------- doc/source/user_guide/indexing.rst | 15 --------------- doc/source/user_guide/io.rst | 4 ---- doc/source/user_guide/missing_data.rst | 22 ---------------------- doc/source/user_guide/visualization.rst | 20 -------------------- 7 files changed, 2 insertions(+), 104 deletions(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index d76c7e2bf3b03..41b0c98e339da 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -470,11 +470,6 @@ Compare the above with the result using ``drop_level=True`` (the default value). df.xs("one", level="second", axis=1, drop_level=True) -.. ipython:: python - :suppress: - - df = df.T - .. _advanced.advanced_reindex: Advanced reindexing and alignment diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 389a2d23c466d..7d60d763a89df 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -220,11 +220,6 @@ either match on the *index* or *columns* via the **axis** keyword: df.sub(column, axis="index") df.sub(column, axis=0) -.. ipython:: python - :suppress: - - df_orig = df - Furthermore you can align a level of a MultiIndexed DataFrame with a Series. .. ipython:: python @@ -272,13 +267,9 @@ case the result will be NaN (you can later replace NaN with some other value using ``fillna`` if you wish). .. ipython:: python - :suppress: df2 = df.copy() df2["three"]["a"] = 1.0 - -.. ipython:: python - df df2 df + df2 @@ -936,7 +927,6 @@ Another useful feature is the ability to pass Series methods to carry out some Series operation on each column or row: .. ipython:: python - :suppress: tsdf = pd.DataFrame( np.random.randn(10, 3), @@ -944,9 +934,6 @@ Series operation on each column or row: index=pd.date_range("1/1/2000", periods=10), ) tsdf.iloc[3:7] = np.nan - -.. ipython:: python - tsdf tsdf.apply(pd.Series.interpolate) @@ -1171,12 +1158,7 @@ and analogously :meth:`~Series.map` on Series accept any Python function taking a single value and returning a single value. For example: .. ipython:: python - :suppress: - - df4 = df_orig.copy() - -.. ipython:: python - + df4 = df.copy() df4 def f(x): @@ -1280,14 +1262,9 @@ is a common enough operation that the :meth:`~DataFrame.reindex_like` method is available to make this simpler: .. ipython:: python - :suppress: df2 = df.reindex(["a", "b", "c"], columns=["one", "two"]) df3 = df2 - df2.mean() - - -.. ipython:: python - df2 df3 df.reindex_like(df2) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 7ddce18d8a259..cda81e4f89b05 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -271,7 +271,6 @@ the length of the ``groups`` dict, so it is largely just a convenience: ``GroupBy`` will tab complete column names (and other attributes): .. ipython:: python - :suppress: n = 10 weight = np.random.normal(166, 20, size=n) @@ -281,9 +280,6 @@ the length of the ``groups`` dict, so it is largely just a convenience: df = pd.DataFrame( {"height": height, "weight": weight, "gender": gender}, index=time ) - -.. ipython:: python - df gb = df.groupby("gender") @@ -334,19 +330,14 @@ number: Grouping with multiple levels is supported. .. ipython:: python - :suppress: arrays = [ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["doo", "doo", "bee", "bee", "bop", "bop", "bop", "bop"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] - tuples = list(zip(*arrays)) - index = pd.MultiIndex.from_tuples(tuples, names=["first", "second", "third"]) + index = pd.MultiIndex.from_arrays(tuples, names=["first", "second", "third"]) s = pd.Series(np.random.randn(8), index=index) - -.. ipython:: python - s s.groupby(level=["first", "second"]).sum() @@ -963,7 +954,6 @@ match the shape of the input array. Another common data transform is to replace missing data with the group mean. .. ipython:: python - :suppress: cols = ["A", "B", "C"] values = np.random.randn(1000, 3) @@ -971,9 +961,6 @@ Another common data transform is to replace missing data with the group mean. values[np.random.randint(0, 1000, 50), 1] = np.nan values[np.random.randint(0, 1000, 200), 2] = np.nan data_df = pd.DataFrame(values, columns=cols) - -.. ipython:: python - data_df countries = np.array(["US", "UK", "GR", "JP"]) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 77eee8e58a5e8..318f353b66dbf 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1029,14 +1029,10 @@ input data shape. ``where`` is used under the hood as the implementation. The code below is equivalent to ``df.where(df < 0)``. .. ipython:: python - :suppress: dates = pd.date_range('1/1/2000', periods=8) df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) - -.. ipython:: python - df[df < 0] In addition, ``where`` takes an optional ``other`` argument for replacement of @@ -1431,8 +1427,6 @@ This plot was created using a ``DataFrame`` with 3 columns each containing floating point values generated using ``numpy.random.randn()``. .. ipython:: python - :suppress: - df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) df2 = df.copy() @@ -1694,15 +1688,11 @@ DataFrame has a :meth:`~DataFrame.set_index` method which takes a column name To create a new, re-indexed DataFrame: .. ipython:: python - :suppress: data = pd.DataFrame({'a': ['bar', 'bar', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'two'], 'c': ['z', 'y', 'x', 'w'], 'd': [1., 2., 3, 4]}) - -.. ipython:: python - data indexed1 = data.set_index('c') indexed1 @@ -1812,11 +1802,6 @@ But it turns out that assigning to the product of chained indexing has inherently unpredictable results. To see this, think about how the Python interpreter executes this code: -.. ipython:: python - :suppress: - - value = None - .. code-block:: python dfmi.loc[:, ('one', 'second')] = value diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index ec0e7d0636b07..0077e5a9abd88 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -704,7 +704,6 @@ Comments Sometimes comments or meta data may be included in a file: .. ipython:: python - :suppress: data = ( "ID,level,category\n" @@ -712,12 +711,9 @@ Sometimes comments or meta data may be included in a file: "Patient2,23000,y # wouldn't take his medicine\n" "Patient3,1234018,z # awesome" ) - with open("tmp.csv", "w") as fh: fh.write(data) -.. ipython:: python - print(open("tmp.csv").read()) By default, the parser includes the comments in the output: diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 443fdd4f59e3f..ac7e383d6d7ff 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -142,14 +142,10 @@ Missing values propagate naturally through arithmetic operations between pandas objects. .. ipython:: python - :suppress: df = df2.loc[:, ["one", "two", "three"]] a = df2.loc[df2.index[:5], ["one", "two"]].ffill() b = df2.loc[df2.index[:5], ["one", "two", "three"]] - -.. ipython:: python - a b a + b @@ -247,12 +243,8 @@ If we only want consecutive gaps filled up to a certain number of data points, we can use the ``limit`` keyword: .. ipython:: python - :suppress: df.iloc[2:4, :] = np.nan - -.. ipython:: python - df df.ffill(limit=1) @@ -308,13 +300,9 @@ You may wish to simply exclude labels from a data set which refer to missing data. To do this, use :meth:`~DataFrame.dropna`: .. ipython:: python - :suppress: df["two"] = df["two"].fillna(0) df["three"] = df["three"].fillna(0) - -.. ipython:: python - df df.dropna(axis=0) df.dropna(axis=1) @@ -333,7 +321,6 @@ Both Series and DataFrame objects have :meth:`~DataFrame.interpolate` that, by default, performs linear interpolation at missing data points. .. ipython:: python - :suppress: np.random.seed(123456) idx = pd.date_range("1/1/2000", periods=100, freq="BM") @@ -343,8 +330,6 @@ that, by default, performs linear interpolation at missing data points. ts[60:80] = np.nan ts = ts.cumsum() -.. ipython:: python - ts ts.count() @savefig series_before_interpolate.png @@ -361,12 +346,8 @@ that, by default, performs linear interpolation at missing data points. Index aware interpolation is available via the ``method`` keyword: .. ipython:: python - :suppress: ts2 = ts.iloc[[0, 1, 30, 60, 99]] - -.. ipython:: python - ts2 ts2.interpolate() ts2.interpolate(method="time") @@ -374,13 +355,10 @@ Index aware interpolation is available via the ``method`` keyword: For a floating-point index, use ``method='values'``: .. ipython:: python - :suppress: idx = [0.0, 1.0, 10.0] ser = pd.Series([0.0, np.nan, 10.0], idx) -.. ipython:: python - ser ser.interpolate() ser.interpolate(method="values") diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index 67799edf96ce2..9081d13ef2cf1 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -42,12 +42,9 @@ The ``plot`` method on Series and DataFrame is just a simple wrapper around :meth:`plt.plot() `: .. ipython:: python - :suppress: np.random.seed(123456) -.. ipython:: python - ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000)) ts = ts.cumsum() @@ -1468,7 +1465,6 @@ otherwise you will see a warning. Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a particular axis: .. ipython:: python - :suppress: np.random.seed(123456) ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000)) @@ -1583,12 +1579,8 @@ Plotting tables Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout. .. ipython:: python - :suppress: np.random.seed(123456) - -.. ipython:: python - fig, ax = plt.subplots(1, 1, figsize=(7, 6.5)) df = pd.DataFrame(np.random.rand(5, 3), columns=["a", "b", "c"]) ax.xaxis.tick_top() # Display x-axis ticks on top. @@ -1663,12 +1655,8 @@ colormaps will produce lines that are not easily visible. To use the cubehelix colormap, we can pass ``colormap='cubehelix'``. .. ipython:: python - :suppress: np.random.seed(123456) - -.. ipython:: python - df = pd.DataFrame(np.random.randn(1000, 10), index=ts.index) df = df.cumsum() @@ -1701,12 +1689,8 @@ Alternatively, we can pass the colormap itself: Colormaps can also be used other plot types, like bar charts: .. ipython:: python - :suppress: np.random.seed(123456) - -.. ipython:: python - dd = pd.DataFrame(np.random.randn(10, 10)).map(abs) dd = dd.cumsum() @@ -1764,12 +1748,8 @@ level of refinement you would get when plotting via pandas, it can be faster when plotting a large number of points. .. ipython:: python - :suppress: np.random.seed(123456) - -.. ipython:: python - price = pd.Series( np.random.randn(150).cumsum(), index=pd.date_range("2000-1-1", periods=150, freq="B"), From 30ef21cc88eb0d37072079d0d20c0c386b2f5959 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jul 2023 12:28:19 -0700 Subject: [PATCH 2/3] Fix error --- doc/source/user_guide/groupby.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index cda81e4f89b05..482e3fe91ca09 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -336,7 +336,7 @@ Grouping with multiple levels is supported. ["doo", "doo", "bee", "bee", "bop", "bop", "bop", "bop"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] - index = pd.MultiIndex.from_arrays(tuples, names=["first", "second", "third"]) + index = pd.MultiIndex.from_arrays(arrays, names=["first", "second", "third"]) s = pd.Series(np.random.randn(8), index=index) s s.groupby(level=["first", "second"]).sum() From eb50dda3da3dd50c8ff9cbe240f2fc0507e18702 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jul 2023 14:15:15 -0700 Subject: [PATCH 3/3] Formatting --- doc/source/user_guide/basics.rst | 1 + doc/source/user_guide/indexing.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 7d60d763a89df..06e52d8713409 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -1158,6 +1158,7 @@ and analogously :meth:`~Series.map` on Series accept any Python function taking a single value and returning a single value. For example: .. ipython:: python + df4 = df.copy() df4 diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 318f353b66dbf..e785376ab10a4 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1427,6 +1427,7 @@ This plot was created using a ``DataFrame`` with 3 columns each containing floating point values generated using ``numpy.random.randn()``. .. ipython:: python + df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) df2 = df.copy()