diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c4ed0f91b..33137562e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
       - id: check-added-large-files
   # General formatting
   - repo: https://github.com/psf/black
-    rev: 23.9.1
+    rev: 23.12.1
     hooks:
       - id: black
       - id: black-jupyter
@@ -20,12 +20,12 @@ repos:
       - id: nbstripout
   # Unused imports
   - repo: https://github.com/hadialqattan/pycln
-    rev: "v2.3.0"
+    rev: "v2.4.0"
     hooks:
       - id: pycln
   # Sorted imports
   - repo: https://github.com/PyCQA/isort
-    rev: "5.12.0"
+    rev: "5.13.2"
     hooks:
       - id: isort
         additional_dependencies: [toml]
diff --git a/README.md b/README.md
index 417fc605c..c487ba357 100644
--- a/README.md
+++ b/README.md
@@ -286,7 +286,7 @@ You may load the model from the `pkl` file with:
 model = PySRRegressor.from_file("hall_of_fame.2022-08-10_100832.281.pkl")
 ```
 
-There are several other useful features such as denoising (e.g., `denoising=True`),
+There are several other useful features such as denoising (e.g., `denoise=True`),
 feature selection (e.g., `select_k_features=3`).
 For examples of these and other features, see the [examples page](https://astroautomata.com/PySR/examples).
 For a detailed look at more options, see the [options page](https://astroautomata.com/PySR/options).
diff --git a/docs/operators.md b/docs/operators.md
index 1777d28d6..a469d21bb 100644
--- a/docs/operators.md
+++ b/docs/operators.md
@@ -2,57 +2,77 @@
 
 ## Pre-defined
 
-All Base julia operators that take 1 or 2 scalars as input,
-and output a scalar as output, are available. A selection
-of these and other valid operators are stated below.
+First, note that pretty much any valid Julia function which
+takes one or two scalars as input, and returns on scalar as output,
+is likely to be a valid operator[^1].
+A selection of these and other valid operators are stated below.
 
 **Binary**
 
-`+`, `-`, `*`, `/`, `^`, `greater`, `mod`, `logical_or`,
-`logical_and`
+- `+`
+- `-`
+- `*`
+- `/`
+- `^`
+- `max`
+- `min`
+- `mod`
+- `cond`
+    - Equal to `(x, y) -> x > 0 ? y : 0`
+- `greater`
+    - Equal to `(x, y) -> x > y ? 1 : 0`
+- `logical_or`
+    - Equal to `(x, y) -> (x > 0 || y > 0) ? 1 : 0`
+- `logical_and`
+    - Equal to `(x, y) -> (x > 0 && y > 0) ? 1 : 0`
 
 **Unary**
 
-`neg`,
-`square`,
-`cube`,
-`exp`,
-`abs`,
-`log`,
-`log10`,
-`log2`,
-`log1p`,
-`sqrt`,
-`sin`,
-`cos`,
-`tan`,
-`sinh`,
-`cosh`,
-`tanh`,
-`atan`,
-`asinh`,
-`acosh`,
-`atanh_clip` (=atanh((x+1)%2 - 1)),
-`erf`,
-`erfc`,
-`gamma`,
-`relu`,
-`round`,
-`floor`,
-`ceil`,
-`round`,
-`sign`.
+- `neg`
+- `square`
+- `cube`
+- `exp`
+- `abs`
+- `log`
+- `log10`
+- `log2`
+- `log1p`
+- `sqrt`
+- `sin`
+- `cos`
+- `tan`
+- `sinh`
+- `cosh`
+- `tanh`
+- `atan`
+- `asinh`
+- `acosh`
+- `atanh_clip`
+    - Equal to `atanh(mod(x + 1, 2) - 1)`
+- `erf`
+- `erfc`
+- `gamma`
+- `relu`
+- `round`
+- `floor`
+- `ceil`
+- `round`
+- `sign`
 
 ## Custom
 
 Instead of passing a predefined operator as a string,
-you can define with by passing it to the `pysr` function, with, e.g.,
+you can just define a custom function as Julia code. For example:
 
 ```python
     PySRRegressor(
         ...,
         unary_operators=["myfunction(x) = x^2"],
-        binary_operators=["myotherfunction(x, y) = x^2*y"]
+        binary_operators=["myotherfunction(x, y) = x^2*y"],
+        extra_sympy_mappings={
+            "myfunction": lambda x: x**2,
+            "myotherfunction": lambda x, y: x**2 * y,
+        },
     )
 ```
 
@@ -62,7 +82,7 @@ Make sure that it works with
 instead of `1.5e3`, if you write any constant numbers, or simply convert a result to `Float64(...)`.
 
 PySR expects that operators not throw an error for any input value over the entire real line from `-3.4e38` to `+3.4e38`.
-Thus, for "invalid" inputs, such as negative numbers to a `sqrt` function, you may simply return a `NaN` of the same type as the input. For example,
+Thus, for invalid inputs, such as negative numbers to a `sqrt` function, you may simply return a `NaN` of the same type as the input. For example,
 
 ```julia
 my_sqrt(x) = x >= 0 ? sqrt(x) : convert(typeof(x), NaN)
@@ -71,3 +91,9 @@ my_sqrt(x) = x >= 0 ? sqrt(x) : convert(typeof(x), NaN)
 would be a valid operator. The genetic algorithm
 will preferentially selection expressions which avoid
 any invalid values over the training dataset.
+
+
+<!-- Footnote for 1: -->
+<!-- (Will say "However, you may need to define a `extra_sympy_mapping`":) -->
+
+[^1]: However, you will need to define a sympy equivalent in `extra_sympy_mapping` if you want to use a function not in the above list.
diff --git a/pysr/export_sympy.py b/pysr/export_sympy.py
index 81142f481..f99a54a00 100644
--- a/pysr/export_sympy.py
+++ b/pysr/export_sympy.py
@@ -47,6 +47,13 @@
     "ceil": sympy.ceiling,
     "sign": sympy.sign,
     "gamma": sympy.gamma,
+    "round": lambda x: sympy.ceiling(x - 0.5),
+    "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
+    "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
+    "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
+    "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
+    "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
+    "relu": lambda x: sympy.Piecewise((0.0, x < 0), (x, True)),
 }
 
 
diff --git a/pysr/julia_helpers.py b/pysr/julia_helpers.py
index 7918c5e6d..e2f76090c 100644
--- a/pysr/julia_helpers.py
+++ b/pysr/julia_helpers.py
@@ -94,7 +94,15 @@ def install(julia_project=None, quiet=False, precompile=None):  # pragma: no cov
             ],
         )
         # Try installing again:
-        julia.install(quiet=quiet)
+        try:
+            julia.install(quiet=quiet)
+        except julia.tools.PyCallInstallError:
+            warnings.warn(
+                "PyCall.jl failed to install on second attempt. "
+                + "Please consult the GitHub issue "
+                + "https://github.com/MilesCranmer/PySR/issues/257 "
+                + "for advice on fixing this."
+            )
 
     Main, init_log = init_julia(julia_project, quiet=quiet, return_aux=True)
     io_arg = _get_io_arg(quiet)
diff --git a/pysr/param_groupings.yml b/pysr/param_groupings.yml
index 08d6fa5d9..30e9d117d 100644
--- a/pysr/param_groupings.yml
+++ b/pysr/param_groupings.yml
@@ -67,6 +67,7 @@
   - procs
   - multithreading
   - cluster_manager
+  - heap_size_hint_in_bytes
   - batching
   - batch_size
   - precision
diff --git a/pysr/sr.py b/pysr/sr.py
index d824b52c2..77679f8f6 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -11,7 +11,7 @@
 from io import StringIO
 from multiprocessing import cpu_count
 from pathlib import Path
-from typing import List, Optional
+from typing import Callable, Dict, List, Literal, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -171,7 +171,7 @@ def _check_assertions(
 
 
 # Class validation constants
-VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
+VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
 
 
 class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
@@ -455,6 +455,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         "htc". If set to one of these, PySR will run in distributed
         mode, and use `procs` to figure out how many processes to launch.
         Default is `None`.
+    heap_size_hint_in_bytes : int
+        For multiprocessing, this sets the `--heap-size-hint` parameter
+        for new Julia processes. This can be configured when using
+        multi-node distributed compute, to give a hint to each process
+        about how much memory they can use before aggressive garbage
+        collection.
     batching : bool
         Whether to compare population members on small batches during
         evolution. Still uses full dataset for comparing against hall
@@ -653,89 +659,92 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
 
     def __init__(
         self,
-        model_selection="best",
+        model_selection: Literal["best", "accuracy", "score"] = "best",
         *,
-        binary_operators=None,
-        unary_operators=None,
-        niterations=40,
-        populations=15,
-        population_size=33,
-        max_evals=None,
-        maxsize=20,
-        maxdepth=None,
-        warmup_maxsize_by=0.0,
-        timeout_in_seconds=None,
-        constraints=None,
-        nested_constraints=None,
-        loss=None,
-        full_objective=None,
-        complexity_of_operators=None,
-        complexity_of_constants=1,
-        complexity_of_variables=1,
-        parsimony=0.0032,
-        dimensional_constraint_penalty=None,
-        use_frequency=True,
-        use_frequency_in_tournament=True,
-        adaptive_parsimony_scaling=20.0,
-        alpha=0.1,
-        annealing=False,
-        early_stop_condition=None,
-        ncyclesperiteration=550,
-        fraction_replaced=0.000364,
-        fraction_replaced_hof=0.035,
-        weight_add_node=0.79,
-        weight_insert_node=5.1,
-        weight_delete_node=1.7,
-        weight_do_nothing=0.21,
-        weight_mutate_constant=0.048,
-        weight_mutate_operator=0.47,
-        weight_randomize=0.00023,
-        weight_simplify=0.0020,
-        weight_optimize=0.0,
-        crossover_probability=0.066,
-        skip_mutation_failures=True,
-        migration=True,
-        hof_migration=True,
-        topn=12,
-        should_simplify=None,
-        should_optimize_constants=True,
-        optimizer_algorithm="BFGS",
-        optimizer_nrestarts=2,
-        optimize_probability=0.14,
-        optimizer_iterations=8,
-        perturbation_factor=0.076,
-        tournament_selection_n=10,
-        tournament_selection_p=0.86,
-        procs=cpu_count(),
-        multithreading=None,
-        cluster_manager=None,
-        batching=False,
-        batch_size=50,
-        fast_cycle=False,
-        turbo=False,
-        precision=32,
-        enable_autodiff=False,
+        binary_operators: Optional[List[str]] = None,
+        unary_operators: Optional[List[str]] = None,
+        niterations: int = 40,
+        populations: int = 15,
+        population_size: int = 33,
+        max_evals: Optional[int] = None,
+        maxsize: int = 20,
+        maxdepth: Optional[int] = None,
+        warmup_maxsize_by: Optional[float] = None,
+        timeout_in_seconds: Optional[float] = None,
+        constraints: Optional[Dict[str, Union[int, Tuple[int, int]]]] = None,
+        nested_constraints: Optional[Dict[str, Dict[str, int]]] = None,
+        loss: Optional[str] = None,
+        full_objective: Optional[str] = None,
+        complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
+        complexity_of_constants: Union[int, float] = 1,
+        complexity_of_variables: Union[int, float] = 1,
+        parsimony: float = 0.0032,
+        dimensional_constraint_penalty: Optional[float] = None,
+        use_frequency: bool = True,
+        use_frequency_in_tournament: bool = True,
+        adaptive_parsimony_scaling: float = 20.0,
+        alpha: float = 0.1,
+        annealing: bool = False,
+        early_stop_condition: Optional[Union[float, str]] = None,
+        ncyclesperiteration: int = 550,
+        fraction_replaced: float = 0.000364,
+        fraction_replaced_hof: float = 0.035,
+        weight_add_node: float = 0.79,
+        weight_insert_node: float = 5.1,
+        weight_delete_node: float = 1.7,
+        weight_do_nothing: float = 0.21,
+        weight_mutate_constant: float = 0.048,
+        weight_mutate_operator: float = 0.47,
+        weight_randomize: float = 0.00023,
+        weight_simplify: float = 0.0020,
+        weight_optimize: float = 0.0,
+        crossover_probability: float = 0.066,
+        skip_mutation_failures: bool = True,
+        migration: bool = True,
+        hof_migration: bool = True,
+        topn: int = 12,
+        should_simplify: Optional[bool] = None,
+        should_optimize_constants: bool = True,
+        optimizer_algorithm: Literal["BFGS", "NelderMead"] = "BFGS",
+        optimizer_nrestarts: int = 2,
+        optimize_probability: float = 0.14,
+        optimizer_iterations: int = 8,
+        perturbation_factor: float = 0.076,
+        tournament_selection_n: int = 10,
+        tournament_selection_p: float = 0.86,
+        procs: int = cpu_count(),
+        multithreading: Optional[bool] = None,
+        cluster_manager: Optional[
+            Literal["slurm", "pbs", "lsf", "sge", "qrsh", "scyld", "htc"]
+        ] = None,
+        heap_size_hint_in_bytes: Optional[int] = None,
+        batching: bool = False,
+        batch_size: int = 50,
+        fast_cycle: bool = False,
+        turbo: bool = False,
+        precision: int = 32,
+        enable_autodiff: bool = False,
         random_state=None,
-        deterministic=False,
-        warm_start=False,
-        verbosity=1,
-        update_verbosity=None,
-        print_precision=5,
-        progress=True,
-        equation_file=None,
-        temp_equation_file=False,
-        tempdir=None,
-        delete_tempfiles=True,
-        julia_project=None,
-        update=False,
-        output_jax_format=False,
-        output_torch_format=False,
-        extra_sympy_mappings=None,
-        extra_torch_mappings=None,
-        extra_jax_mappings=None,
-        denoise=False,
-        select_k_features=None,
-        julia_kwargs=None,
+        deterministic: bool = False,
+        warm_start: bool = False,
+        verbosity: int = 1,
+        update_verbosity: Optional[int] = None,
+        print_precision: int = 5,
+        progress: bool = True,
+        equation_file: Optional[str] = None,
+        temp_equation_file: bool = False,
+        tempdir: Optional[str] = None,
+        delete_tempfiles: bool = True,
+        julia_project: Optional[str] = None,
+        update: bool = False,
+        output_jax_format: bool = False,
+        output_torch_format: bool = False,
+        extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
+        extra_torch_mappings: Optional[Dict[Callable, Callable]] = None,
+        extra_jax_mappings: Optional[Dict[Callable, str]] = None,
+        denoise: bool = False,
+        select_k_features: Optional[int] = None,
+        julia_kwargs: Optional[Dict] = None,
         **kwargs,
     ):
         # Hyperparameters
@@ -800,10 +809,11 @@ def __init__(
         # -- Selection parameters
         self.tournament_selection_n = tournament_selection_n
         self.tournament_selection_p = tournament_selection_p
-        # Solver parameters
+        # -- Performance parameters
         self.procs = procs
         self.multithreading = multithreading
         self.cluster_manager = cluster_manager
+        self.heap_size_hint_in_bytes = heap_size_hint_in_bytes
         self.batching = batching
         self.batch_size = batch_size
         self.fast_cycle = fast_cycle
@@ -1637,7 +1647,9 @@ def _run(self, X, y, mutated_params, weights, seed):
             fraction_replaced_hof=self.fraction_replaced_hof,
             should_simplify=self.should_simplify,
             should_optimize_constants=self.should_optimize_constants,
-            warmup_maxsize_by=self.warmup_maxsize_by,
+            warmup_maxsize_by=0.0
+            if self.warmup_maxsize_by is None
+            else self.warmup_maxsize_by,
             use_frequency=self.use_frequency,
             use_frequency_in_tournament=self.use_frequency_in_tournament,
             adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
@@ -1720,6 +1732,7 @@ def _run(self, X, y, mutated_params, weights, seed):
             saved_state=self.raw_julia_state_,
             return_state=True,
             addprocs_function=cluster_manager,
+            heap_size_hint_in_bytes=self.heap_size_hint_in_bytes,
             progress=progress and self.verbosity > 0 and len(y.shape) == 1,
             verbosity=int(self.verbosity),
         )
diff --git a/pysr/version.py b/pysr/version.py
index 4260786a0..82a6f4dcb 100644
--- a/pysr/version.py
+++ b/pysr/version.py
@@ -1,2 +1,2 @@
-__version__ = "0.16.3"
-__symbolic_regression_jl_version__ = "0.22.4"
+__version__ = "0.16.6"
+__symbolic_regression_jl_version__ = "0.23.0"
diff --git a/setup.py b/setup.py
index d9f026f46..636c17ac9 100644
--- a/setup.py
+++ b/setup.py
@@ -26,5 +26,5 @@
         "Programming Language :: Python :: 3",
         "Operating System :: OS Independent",
     ],
-    python_requires=">=3.7",
+    python_requires=">=3.8",
 )