bodo-ai · DrTodd13 · Jan 24, 2025 · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025
diff --git a/bodo/hiframes/dataframe_impl.py b/bodo/hiframes/dataframe_impl.py
diff --git a/bodo/hiframes/datetime_date_ext.py b/bodo/hiframes/datetime_date_ext.py
@@ -450,7 +450,7 @@ def fromordinal_impl(n):  # pragma: no cover
 
 
 # TODO: support general string formatting
-@numba.njit
+@numba.njit(cache=True)
 def str_2d(a):  # pragma: no cover
     """Takes in a number representing an date/time unit and formats it as a
     2 character string, adding a leading zero if necessary."""
@@ -865,7 +865,7 @@ def lower_constant_datetime_date_arr(context, builder, typ, pyval):
     return lir.Constant.literal_struct([data_const_arr, nulls_const_arr])
 
 
-@numba.njit(no_cpython_wrapper=True)
+@numba.njit(cache=True, no_cpython_wrapper=True)
 def alloc_datetime_date_array(n):  # pragma: no cover
     data_arr = np.empty(n, dtype=np.int32)
     # XXX: set all bits to not null since datetime.date array operations do not support

diff --git a/bodo/hiframes/generic_pandas_coverage.py b/bodo/hiframes/generic_pandas_coverage.py
@@ -62,13 +62,15 @@ def generate_simple_series_impl(
 
     # Create the function definition line
     if arg_defaults is None:
-        func_text = "def impl(" + ", ".join(arg_names) + "):\n"
+        func_text = "def bodo_generate_simple_series(" + ", ".join(arg_names) + "):\n"
     else:
         arg_def_strings = [
             name if name not in arg_defaults else f"{name}={arg_defaults.get(name)}"
             for name in arg_names
         ]
-        func_text = "def impl(" + ", ".join(arg_def_strings) + "):\n"
+        func_text = (
+            "def bodo_generate_simple_series(" + ", ".join(arg_def_strings) + "):\n"
+        )
 
     # Extract the underlying array of the series as a variable called "data"
     if isinstance(series_arg, bodo.hiframes.pd_series_ext.SeriesType):
@@ -178,8 +180,7 @@ def generate_simple_series_impl(
         raise_bodo_error(
             f"generate_simple_series_impl: unsupported output type {out_type}"
         )
-    loc_vars = {}
-    exec(
+    return bodo.utils.utils.bodo_exec(
         func_text,
         {
             "bodo": bodo,
@@ -188,10 +189,9 @@ def generate_simple_series_impl(
             "np": np,
             "out_dtype": out_arr_type,
         },
-        loc_vars,
+        {},
+        globals(),
     )
-    impl = loc_vars["impl"]
-    return impl
 
 
 def generate_series_to_df_impl(
@@ -252,7 +252,7 @@ def generate_series_to_df_impl(
         name if default is None else f"{name}={default}"
         for name, default in zip(arg_names, arg_defaults)
     ]
-    func_text = "def impl(" + ", ".join(arg_strings) + "):\n"
+    func_text = "def bodo_generate_series_to_df(" + ", ".join(arg_strings) + "):\n"
 
     # Extract the underlying array of the series as a variable called "data"
     if isinstance(series_arg, bodo.hiframes.pd_series_ext.SeriesType):
@@ -357,11 +357,9 @@ def generate_series_to_df_impl(
     for i in range(n_out):
         glbls[f"out_dtype{i}"] = out_types[i]
 
-    loc_vars = {}
-    exec(
+    return bodo.utils.utils.bodo_exec(
         func_text,
         glbls,
-        loc_vars,
+        {},
+        globals(),
     )
-    impl = loc_vars["impl"]
-    return impl
diff --git a/bodo/hiframes/pd_dataframe_ext.py b/bodo/hiframes/pd_dataframe_ext.py
@@ -119,7 +119,10 @@
     to_str_arr_if_dict_array,
     unwrap_typeref,
 )
-from bodo.utils.utils import is_null_pointer
+from bodo.utils.utils import (
+    bodo_exec,
+    is_null_pointer,
+)
 
 _json_write = types.ExternalFunction(
     "json_write",
@@ -964,7 +967,7 @@
 
 # workaround to support row["A"] case in df.apply()
 # implements getitem for namedtuples if generated by Bodo
-@overload(operator.getitem, no_unliteral=True)
+@overload(operator.getitem, no_unliteral=True, jit_options={"cache": True})
 def namedtuple_getitem_overload(tup, idx):
     if isinstance(tup, types.BaseNamedTuple) and is_overload_constant_str(idx):
         field_idx = get_overload_const_str(idx)
@@ -1193,7 +1196,7 @@
     pass
 
 
-@overload(pushdown_safe_init_df, inline="never")
+@overload(pushdown_safe_init_df, inline="never", jit_options={"cache": True})
 def overload_pushdown_safe_init_df(table, colNames):
     """
     A wrapper for init_dataframe to coerce a table to a DataFrame while preventing filter pushdown
@@ -1208,11 +1211,11 @@
         [DataFrame] the data from the table wrapped in a DataFrame.
     """
 
-    def impl(table, colNames):
+    def bodo_pushdown_safe_init_df(table, colNames):
         index = bodo.hiframes.pd_index_ext.init_range_index(0, len(table), 1, None)
         return bodo.hiframes.pd_dataframe_ext.init_dataframe((table,), index, colNames)
 
-    return impl
+    return bodo_pushdown_safe_init_df
 
 
 @intrinsic
@@ -2267,7 +2270,7 @@
     return data_tup
 
 
-@overload(pd.DataFrame, inline="always", no_unliteral=True)
+@overload(pd.DataFrame, inline="always", no_unliteral=True, jit_options={"cache": True})
 def pd_dataframe_overload(data=None, index=None, columns=None, dtype=None, copy=False):
     # TODO: support other input combinations
     # TODO: error checking
@@ -2279,18 +2282,14 @@
     col_args, data_args, index_arg = _get_df_args(data, index, columns, dtype, copy)
     col_var = ColNamesMetaType(tuple(col_args))
 
-    func_text = (
-        "def _init_df(data=None, index=None, columns=None, dtype=None, copy=False):\n"
-    )
+    func_text = "def bodo_init_df(data=None, index=None, columns=None, dtype=None, copy=False):\n"
     func_text += f"  return bodo.hiframes.pd_dataframe_ext.init_dataframe({data_args}, {index_arg}, __col_name_meta_value_pd_overload)\n"
-    loc_vars = {}
-    exec(
+    return bodo_exec(
         func_text,
         {"bodo": bodo, "np": np, "__col_name_meta_value_pd_overload": col_var},
-        loc_vars,
+        {},
+        globals(),
     )
-    _init_df = loc_vars["_init_df"]
-    return _init_df
 
 
 @intrinsic
@@ -3136,7 +3135,13 @@
 
 # TODO: jitoptions for overload_method and infer_global
 # (no_cpython_wrapper to avoid error for iterator object)
-@overload_method(DataFrameType, "itertuples", inline="always", no_unliteral=True)
+@overload_method(
+    DataFrameType,
+    "itertuples",
+    inline="always",
+    no_unliteral=True,
+    jit_options={"cache": True},
+)
 def itertuples_overload(df, index=True, name="Pandas"):
     check_runtime_cols_unsupported(df, "DataFrame.itertuples()")
     unsupported_args = {"index": index, "name": name}
@@ -3761,7 +3766,9 @@
     return pandas_metadata
 
 
-@overload_method(DataFrameType, "to_parquet", no_unliteral=True)
+@overload_method(
+    DataFrameType, "to_parquet", no_unliteral=True, jit_options={"cache": True}
+)
 def to_parquet_overload(
     df,
     path,
@@ -3945,7 +3952,7 @@
             # wrap the name with quotation mark to indicate it is a string
             pandas_metadata_str = pandas_metadata_str.replace('"%s"', "%s")
 
-    func_text = "def df_to_parquet(df, path, engine='auto', compression='snappy', index=None, partition_cols=None, storage_options=None, row_group_size=-1, _bodo_file_prefix='part-', _bodo_timestamp_tz=None, _is_parallel=False):\n"
+    func_text = "def bodo_df_to_parquet(df, path, engine='auto', compression='snappy', index=None, partition_cols=None, storage_options=None, row_group_size=-1, _bodo_file_prefix='part-', _bodo_timestamp_tz=None, _is_parallel=False):\n"
 
     # Why we are calling drop_duplicates_local_dictionary on all dict encoded arrays?
     # Arrow doesn't support writing DictionaryArrays with nulls in the dictionary.
@@ -4136,13 +4143,12 @@
         "decode_if_dict_table": decode_if_dict_table,
     }
     glbls.update(extra_globals)
-    exec(
+    return bodo_exec(
         func_text,
         glbls,
         loc_vars,
+        globals(),
     )
-    df_to_parquet = loc_vars["df_to_parquet"]
-    return df_to_parquet
 
 
 # -------------------------------------- to_sql ------------------------------------------
@@ -4260,7 +4266,7 @@
         ev.finalize()
 
 
-@numba.njit
+@numba.njit(cache=True)
 def to_sql_exception_guard_encaps(
     df,
     name,