diff --git a/doc/source/_toc.yml b/doc/source/_toc.yml index 0be88ccda5d3..066a552b5f0e 100644 --- a/doc/source/_toc.yml +++ b/doc/source/_toc.yml @@ -33,7 +33,7 @@ parts: - file: ray-core/examples/batch_training - file: ray-core/examples/automl_for_time_series - file: ray-core/examples/web-crawler - - file: ray-core/api + - file: ray-core/api/index - file: cluster/getting-started title: "Ray Clusters" diff --git a/doc/source/cluster/running-applications/job-submission/jobs-package-ref.rst b/doc/source/cluster/running-applications/job-submission/jobs-package-ref.rst index 3e6afaec76ec..fa700c94bb0c 100644 --- a/doc/source/cluster/running-applications/job-submission/jobs-package-ref.rst +++ b/doc/source/cluster/running-applications/job-submission/jobs-package-ref.rst @@ -8,7 +8,7 @@ Python SDK API Reference For an overview with examples see :ref:`Ray Jobs `. For the CLI reference see :ref:`Ray Job Submission CLI Reference `. - + .. _job-submission-client-ref: JobSubmissionClient diff --git a/doc/source/ray-core/actors.rst b/doc/source/ray-core/actors.rst index 0b6d4c1c5f0c..08509634958e 100644 --- a/doc/source/ray-core/actors.rst +++ b/doc/source/ray-core/actors.rst @@ -337,7 +337,7 @@ By default, Ray actors won't be :ref:`restarted ` and actor tasks won't be retried when actors crash unexpectedly. You can change this behavior by setting ``max_restarts`` and ``max_task_retries`` options -in :ref:`ray.remote() ` and :ref:`.options() `. +in :func:`ray.remote() ` and :meth:`.options() `. See :ref:`Ray fault tolerance ` for more details. FAQ: Actors, Workers and Resources diff --git a/doc/source/ray-core/actors/actor-utils.rst b/doc/source/ray-core/actors/actor-utils.rst index 01e35107d9dc..6e5b3da20d4d 100644 --- a/doc/source/ray-core/actors/actor-utils.rst +++ b/doc/source/ray-core/actors/actor-utils.rst @@ -11,7 +11,7 @@ Actor Pool .. literalinclude:: ../doc_code/actor-pool.py - See the :ref:`package reference ` for more information. + See the :class:`package reference ` for more information. .. tabbed:: Java @@ -25,7 +25,7 @@ Message passing using Ray Queue ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Sometimes just using one signal to synchronize is not enough. If you need to send data among many tasks or -actors, you can use :ref:`ray.util.queue.Queue `. +actors, you can use :class:`ray.util.queue.Queue `. .. literalinclude:: ../doc_code/actor-queue.py diff --git a/doc/source/ray-core/api.rst b/doc/source/ray-core/api.rst deleted file mode 100644 index 0d86189932f9..000000000000 --- a/doc/source/ray-core/api.rst +++ /dev/null @@ -1,11 +0,0 @@ -API References -============== - -.. toctree:: - :maxdepth: 2 - :caption: Ray Core API References - - package-ref.rst - cli.rst - ../ray-observability/state/cli.rst - ../ray-observability/state/ray-state-api-reference.rst diff --git a/doc/source/ray-core/cli.rst b/doc/source/ray-core/api/cli.rst similarity index 100% rename from doc/source/ray-core/cli.rst rename to doc/source/ray-core/api/cli.rst diff --git a/doc/source/ray-core/api/core.rst b/doc/source/ray-core/api/core.rst new file mode 100644 index 000000000000..c78e9afc418c --- /dev/null +++ b/doc/source/ray-core/api/core.rst @@ -0,0 +1,60 @@ +Core API +======== + +.. autosummary:: + :toctree: doc/ + + ray.init + ray.shutdown + ray.is_initialized + +Tasks +----- + +.. autosummary:: + :toctree: doc/ + + ray.remote + ray.remote_function.RemoteFunction.options + ray.cancel + +Actors +------ + +.. autosummary:: + :toctree: doc/ + + ray.remote + ray.actor.ActorClass.options + ray.method + ray.get_actor + ray.kill + +Objects +------- + +.. autosummary:: + :toctree: doc/ + + ray.get + ray.wait + ray.put + +.. _runtime-context-apis: + +Runtime Context +--------------- +.. autosummary:: + :toctree: doc/ + + ray.runtime_context.get_runtime_context + ray.runtime_context.RuntimeContext + ray.get_gpu_ids + +Cross Language +-------------- +.. autosummary:: + :toctree: doc/ + + ray.cross_language.java_function + ray.cross_language.java_actor_class diff --git a/doc/source/ray-core/api/exceptions.rst b/doc/source/ray-core/api/exceptions.rst new file mode 100644 index 000000000000..d041612573d4 --- /dev/null +++ b/doc/source/ray-core/api/exceptions.rst @@ -0,0 +1,32 @@ +.. _ray-core-exceptions: + +Exceptions +========== + +.. autosummary:: + :toctree: doc/ + + ray.exceptions.RayError + ray.exceptions.RayTaskError + ray.exceptions.RayActorError + ray.exceptions.TaskCancelledError + ray.exceptions.TaskUnschedulableError + ray.exceptions.ActorUnschedulableError + ray.exceptions.AsyncioActorExit + ray.exceptions.LocalRayletDiedError + ray.exceptions.WorkerCrashedError + ray.exceptions.TaskPlacementGroupRemoved + ray.exceptions.ActorPlacementGroupRemoved + ray.exceptions.ObjectStoreFullError + ray.exceptions.OutOfDiskError + ray.exceptions.ObjectLostError + ray.exceptions.ObjectFetchTimedOutError + ray.exceptions.GetTimeoutError + ray.exceptions.OwnerDiedError + ray.exceptions.PlasmaObjectNotAvailable + ray.exceptions.ObjectReconstructionFailedError + ray.exceptions.ObjectReconstructionFailedMaxAttemptsExceededError + ray.exceptions.ObjectReconstructionFailedLineageEvictedError + ray.exceptions.RuntimeEnvSetupError + ray.exceptions.CrossLanguageError + ray.exceptions.RaySystemError diff --git a/doc/source/ray-core/api/index.rst b/doc/source/ray-core/api/index.rst new file mode 100644 index 000000000000..eb5cdd9d0ef5 --- /dev/null +++ b/doc/source/ray-core/api/index.rst @@ -0,0 +1,14 @@ +Ray Core API +============ + +.. toctree:: + :maxdepth: 2 + + core.rst + scheduling.rst + runtime-env.rst + utility.rst + exceptions.rst + cli.rst + ../../ray-observability/api/state/cli.rst + ../../ray-observability/api/state/api.rst diff --git a/doc/source/ray-core/api/runtime-env.rst b/doc/source/ray-core/api/runtime-env.rst new file mode 100644 index 000000000000..607766a9535a --- /dev/null +++ b/doc/source/ray-core/api/runtime-env.rst @@ -0,0 +1,8 @@ +Runtime Env API +=============== + +.. autosummary:: + :toctree: doc/ + + ray.runtime_env.RuntimeEnvConfig + ray.runtime_env.RuntimeEnv diff --git a/doc/source/ray-core/api/scheduling.rst b/doc/source/ray-core/api/scheduling.rst new file mode 100644 index 000000000000..2971aae8e3c8 --- /dev/null +++ b/doc/source/ray-core/api/scheduling.rst @@ -0,0 +1,25 @@ +Scheduling API +============== + +Scheduling Strategy +------------------- + +.. autosummary:: + :toctree: doc/ + + ray.util.scheduling_strategies.PlacementGroupSchedulingStrategy + ray.util.scheduling_strategies.NodeAffinitySchedulingStrategy + +.. _ray-placement-group-ref: + +Placement Group +--------------- + +.. autosummary:: + :toctree: doc/ + + ray.util.placement_group.placement_group + ray.util.placement_group.PlacementGroup + ray.util.placement_group.placement_group_table + ray.util.placement_group.remove_placement_group + ray.util.placement_group.get_current_placement_group diff --git a/doc/source/ray-core/api/utility.rst b/doc/source/ray-core/api/utility.rst new file mode 100644 index 000000000000..36da67415d4f --- /dev/null +++ b/doc/source/ray-core/api/utility.rst @@ -0,0 +1,35 @@ +Utility +======= + +.. autosummary:: + :toctree: doc/ + + ray.util.ActorPool + ray.util.queue.Queue + ray.nodes + ray.cluster_resources + ray.available_resources + +.. _custom-metric-api-ref: + +Custom Metrics +-------------- + +.. autosummary:: + :toctree: doc/ + + ray.util.metrics.Counter + ray.util.metrics.Gauge + ray.util.metrics.Histogram + +.. _package-ref-debugging-apis: + +Debugging +--------- + +.. autosummary:: + :toctree: doc/ + + ray.util.pdb.set_trace + ray.util.inspect_serializability + ray.timeline diff --git a/doc/source/ray-core/configure.rst b/doc/source/ray-core/configure.rst index 23eafaf5c88d..6a2c5c78272d 100644 --- a/doc/source/ray-core/configure.rst +++ b/doc/source/ray-core/configure.rst @@ -57,12 +57,12 @@ If using the command line, connect to the Ray cluster as follow: .. _omp-num-thread-note: .. note:: - Ray sets the environment variable ``OMP_NUM_THREADS=`` if ``num_cpus`` is set on - the task/actor via :ref:`ray.remote() ` and :ref:`.options() `. - Ray sets ``OMP_NUM_THREADS=1`` if ``num_cpus`` is not specified; this + Ray sets the environment variable ``OMP_NUM_THREADS=`` if ``num_cpus`` is set on + the task/actor via :func:`ray.remote() ` and :meth:`task.options() `/:meth:`actor.options() `. + Ray sets ``OMP_NUM_THREADS=1`` if ``num_cpus`` is not specified; this is done to avoid performance degradation with many workers (issue #6998). You can - also override this by explicitly setting ``OMP_NUM_THREADS`` to override anything Ray sets by default. - ``OMP_NUM_THREADS`` is commonly used in numpy, PyTorch, and Tensorflow to perform multi-threaded + also override this by explicitly setting ``OMP_NUM_THREADS`` to override anything Ray sets by default. + ``OMP_NUM_THREADS`` is commonly used in numpy, PyTorch, and Tensorflow to perform multi-threaded linear algebra. In multi-worker setting, we want one thread per worker instead of many threads per worker to avoid contention. Some other libraries may have their own way to configure parallelism. For example, if you're using OpenCV, you should manually set the number of diff --git a/doc/source/ray-core/examples/monte_carlo_pi.rst b/doc/source/ray-core/examples/monte_carlo_pi.rst index faf1caa75139..8f3cd1fa23e5 100644 --- a/doc/source/ray-core/examples/monte_carlo_pi.rst +++ b/doc/source/ray-core/examples/monte_carlo_pi.rst @@ -19,7 +19,7 @@ To get started, install Ray via ``pip install -U ray``. See :ref:`Installing Ray Starting Ray ------------ -First, let's include all modules needed for this tutorial and start a local Ray cluster with :ref:`ray.init() `: +First, let's include all modules needed for this tutorial and start a local Ray cluster with :func:`ray.init() `: .. literalinclude:: ../doc_code/monte_carlo_pi.py :language: python @@ -41,7 +41,7 @@ Ray actors are essentially stateful services that anyone with an instance (a han :start-after: __defining_actor_start__ :end-before: __defining_actor_end__ -We define a Ray actor by decorating a normal Python class with :ref:`ray.remote `. +We define a Ray actor by decorating a normal Python class with :func:`ray.remote `. The progress actor has ``report_progress()`` method that will be called by sampling tasks to update their progress individually and ``get_progress()`` method to get the overall progress. @@ -55,7 +55,7 @@ Ray tasks are stateless functions. They execute asynchronously, and run in paral :start-after: __defining_task_start__ :end-before: __defining_task_end__ -To convert a normal Python function as a Ray task, we decorate the function with :ref:`ray.remote `. +To convert a normal Python function as a Ray task, we decorate the function with :func:`ray.remote `. The sampling task takes a progress actor handle as an input and reports progress to it. The above code shows an example of calling actor methods from tasks. @@ -97,7 +97,7 @@ While sampling tasks are running, we can periodically query the progress by call To call an actor method, use ``actor_handle.method.remote()``. This invocation immediately returns an ``ObjectRef`` as a future and then executes the method asynchronously on the remote actor process. -To fetch the actual returned value of ``ObjectRef``, we use the blocking :ref:`ray.get() `. +To fetch the actual returned value of ``ObjectRef``, we use the blocking :func:`ray.get() `. Calculating π ------------- @@ -108,7 +108,7 @@ Finally, we get number of samples inside the circle from the remote sampling tas :start-after: __calculating_pi_start__ :end-before: __calculating_pi_end__ -As we can see from the above code, besides a single ``ObjectRef``, :ref:`ray.get() ` can also take a list of ``ObjectRef`` and return a list of results. +As we can see from the above code, besides a single ``ObjectRef``, :func:`ray.get() ` can also take a list of ``ObjectRef`` and return a list of results. If you run this tutorial, you will see output like: diff --git a/doc/source/ray-core/fault_tolerance/actors.rst b/doc/source/ray-core/fault_tolerance/actors.rst index 9d01df1e9448..a254fa5fb562 100644 --- a/doc/source/ray-core/fault_tolerance/actors.rst +++ b/doc/source/ray-core/fault_tolerance/actors.rst @@ -24,7 +24,7 @@ After the specified number of restarts, subsequent actor methods will raise a ``RayActorError``. By default, actor tasks execute with at-most-once semantics -(``max_task_retries=0`` in the ``@ray.remote`` :ref:`decorator `). This means that if an +(``max_task_retries=0`` in the ``@ray.remote`` :func:`decorator `). This means that if an actor task is submitted to an actor that is unreachable, Ray will report the error with ``RayActorError``, a Python-level exception that is thrown when ``ray.get`` is called on the future returned by the task. Note that this diff --git a/doc/source/ray-core/fault_tolerance/tasks.rst b/doc/source/ray-core/fault_tolerance/tasks.rst index 69652b1852e8..f99d934e2463 100644 --- a/doc/source/ray-core/fault_tolerance/tasks.rst +++ b/doc/source/ray-core/fault_tolerance/tasks.rst @@ -71,7 +71,7 @@ If a task is hanging, you may want to cancel the task to continue to make progress. You can do this by calling ``ray.cancel`` on an ``ObjectRef`` returned by the task. By default, this will send a KeyboardInterrupt to the task's worker if it is mid-execution. Passing ``force=True`` to ``ray.cancel`` -will force-exit the worker. See :ref:`the API reference ` for +will force-exit the worker. See :func:`the API reference ` for ``ray.cancel`` for more details. Note that currently, Ray will not automatically retry tasks that have been diff --git a/doc/source/ray-core/objects.rst b/doc/source/ray-core/objects.rst index 7c7f0b69eb56..721924c12c2c 100644 --- a/doc/source/ray-core/objects.rst +++ b/doc/source/ray-core/objects.rst @@ -12,7 +12,7 @@ similar. Object refs can be created in two ways. 1. They are returned by remote function calls. - 2. They are returned by ``put`` (:ref:`docstring `). + 2. They are returned by :func:`ray.put() `. .. tabbed:: Python @@ -48,7 +48,7 @@ Object refs can be created in two ways. Fetching Object Data -------------------- -You can use the ``get`` method (:ref:`docstring `) to fetch the result of a remote object from an object ref. +You can use the :func:`ray.get() ` method to fetch the result of a remote object from an object ref. If the current node's object store does not contain the object, the object is downloaded. .. tabbed:: Python diff --git a/doc/source/ray-core/package-ref.rst b/doc/source/ray-core/package-ref.rst deleted file mode 100644 index 190d158b318d..000000000000 --- a/doc/source/ray-core/package-ref.rst +++ /dev/null @@ -1,298 +0,0 @@ -Ray Core API -============ - -Python API ----------- - -.. _ray-init-ref: - -ray.init -~~~~~~~~ - -.. autofunction:: ray.init - -.. _ray-is_initialized-ref: - -ray.is_initialized -~~~~~~~~~~~~~~~~~~ - -.. autofunction:: ray.is_initialized - -.. _ray-remote-ref: - -ray.remote -~~~~~~~~~~ - -.. autofunction:: ray.remote - -.. _ray-options-ref: - -.. autofunction:: ray.remote_function.RemoteFunction.options - -.. autofunction:: ray.actor.ActorClass.options - -.. _scheduling-strategy-ref: - -.. autofunction:: ray.util.scheduling_strategies.PlacementGroupSchedulingStrategy - -.. autofunction:: ray.util.scheduling_strategies.NodeAffinitySchedulingStrategy - -.. _ray-get-ref: - -ray.get -~~~~~~~ - -.. autofunction:: ray.get - -.. _ray-wait-ref: - -ray.wait -~~~~~~~~ - -.. autofunction:: ray.wait - -.. _ray-put-ref: - -ray.put -~~~~~~~ - -.. autofunction:: ray.put - -.. _ray-kill-ref: - -ray.kill -~~~~~~~~ - -.. autofunction:: ray.kill - -.. _ray-cancel-ref: - -ray.cancel -~~~~~~~~~~ - -.. autofunction:: ray.cancel - -.. _ray-get_actor-ref: - - -ray.get_actor -~~~~~~~~~~~~~~~ - -.. autofunction:: ray.get_actor - -.. _ray-get_gpu_ids-ref: - -ray.get_gpu_ids -~~~~~~~~~~~~~~~ - -.. autofunction:: ray.get_gpu_ids - -.. _ray-shutdown-ref: - -ray.shutdown -~~~~~~~~~~~~ - -.. autofunction:: ray.shutdown - -.. _ray-method-ref: - -ray.method -~~~~~~~~~~ - -.. autofunction:: ray.method - -.. _ray-actor-pool-ref: - -ray.util.ActorPool -~~~~~~~~~~~~~~~~~~ - -.. autoclass:: ray.util.ActorPool - :members: - -ray.util.queue.Queue -~~~~~~~~~~~~~~~~~~~~ - -.. _ray-queue-ref: - -.. autoclass:: ray.util.queue.Queue - :members: - -.. _ray-nodes-ref: - -ray.nodes -~~~~~~~~~ - -.. autofunction:: ray.nodes - -.. _ray-timeline-ref: - -ray.timeline -~~~~~~~~~~~~ - -.. autofunction:: ray.timeline - -.. _ray-cluster_resources-ref: - -ray.cluster_resources -~~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: ray.cluster_resources - -.. _ray-available_resources-ref: - -ray.available_resources -~~~~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: ray.available_resources - -ray.cross_language -~~~~~~~~~~~~~~~~~~ - -.. autofunction:: ray.cross_language.java_function - -.. autofunction:: ray.cross_language.java_actor_class - -.. _ray-placement-group-ref: - -Placement Group APIs --------------------- - -placement_group -~~~~~~~~~~~~~~~ - -.. autofunction:: ray.util.placement_group.placement_group - - -PlacementGroup (class) -~~~~~~~~~~~~~~~~~~~~~~ - -.. autoclass:: ray.util.placement_group.PlacementGroup - :members: - -placement_group_table -~~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: ray.util.placement_group.placement_group_table - - -remove_placement_group -~~~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: ray.util.placement_group.remove_placement_group - -get_current_placement_group -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: ray.util.placement_group.get_current_placement_group - -.. _custom-metric-api-ref: - -Custom Metrics APIs -------------------- - -Counter -~~~~~~~ - -.. autoclass:: ray.util.metrics.Counter - :members: - -Gauge -~~~~~ - -.. autoclass:: ray.util.metrics.Gauge - :members: - -Histogram -~~~~~~~~~ - -.. autoclass:: ray.util.metrics.Histogram - :members: - -.. _runtime-context-apis: - -Runtime Context APIs --------------------- - -.. autofunction:: ray.runtime_context.get_runtime_context - -.. autoclass:: ray.runtime_context.RuntimeContext - :members: - -.. _runtime-env-apis: - -Runtime Env APIs ----------------- - -.. autoclass:: ray.runtime_env.RuntimeEnvConfig - :members: - -.. autoclass:: ray.runtime_env.RuntimeEnv - :members: - -.. _package-ref-debugging-apis: - -Debugging APIs --------------- - -.. autofunction:: ray.util.pdb.set_trace - -.. autofunction:: ray.util.inspect_serializability - -.. _ray-core-exceptions: - -Exceptions ----------- - -.. autoclass:: ray.exceptions.RayError - :members: - -.. _ray-core-exceptions-ray-task-error: -.. autoclass:: ray.exceptions.RayTaskError - :members: -.. autoclass:: ray.exceptions.TaskCancelledError - :members: -.. autoclass:: ray.exceptions.GetTimeoutError - :members: -.. _ray-core-exceptions-ray-actor-error: -.. autoclass:: ray.exceptions.RayActorError - :members: -.. _ray-core-exceptions-runtime-env-setup-error: -.. autoclass:: ray.exceptions.RuntimeEnvSetupError - :members: -.. autoclass:: ray.exceptions.TaskUnschedulableError - :members: -.. autoclass:: ray.exceptions.ActorUnschedulableError - :members: -.. autoclass:: ray.exceptions.TaskPlacementGroupRemoved - :members: -.. autoclass:: ray.exceptions.ActorPlacementGroupRemoved - :members: -.. autoclass:: ray.exceptions.LocalRayletDiedError - :members: -.. autoclass:: ray.exceptions.WorkerCrashedError - :members: -.. autoclass:: ray.exceptions.RaySystemError - :members: -.. autoclass:: ray.exceptions.ObjectStoreFullError - :members: -.. autoclass:: ray.exceptions.OutOfDiskError - :members: -.. _ray-core-exceptions-object-lost-error: -.. autoclass:: ray.exceptions.ObjectLostError - :members: -.. autoclass:: ray.exceptions.ObjectFetchTimedOutError - :members: -.. autoclass:: ray.exceptions.OwnerDiedError - :members: -.. autoclass:: ray.exceptions.ObjectReconstructionFailedError - :members: -.. autoclass:: ray.exceptions.ObjectReconstructionFailedMaxAttemptsExceededError - :members: -.. autoclass:: ray.exceptions.ObjectReconstructionFailedLineageEvictedError - :members: -.. autoclass:: ray.exceptions.PlasmaObjectNotAvailable - :members: -.. autoclass:: ray.exceptions.AsyncioActorExit - :members: -.. autoclass:: ray.exceptions.CrossLanguageError - :members: diff --git a/doc/source/ray-core/patterns/closure-capture-large-objects.rst b/doc/source/ray-core/patterns/closure-capture-large-objects.rst index b5cace64173c..c51a24f7ecc7 100644 --- a/doc/source/ray-core/patterns/closure-capture-large-objects.rst +++ b/doc/source/ray-core/patterns/closure-capture-large-objects.rst @@ -3,13 +3,13 @@ Anti-pattern: Closure capturing large objects harms performance **TLDR:** Avoid closure capturing large objects in remote functions or classes, use object store instead. -When you define a :ref:`ray.remote ` function or class, +When you define a :func:`ray.remote ` function or class, it is easy to accidentally capture large (more than a few MB) objects implicitly in the definition. This can lead to slow performance or even OOM since Ray is not designed to handle serialized functions or classes that are very large. For such large objects, there are two options to resolve this problem: -- Use :ref:`ray.put() ` to put the large objects in the Ray object store, and then pass object references as arguments to the remote functions or classes (*"better approach #1"* below) +- Use :func:`ray.put() ` to put the large objects in the Ray object store, and then pass object references as arguments to the remote functions or classes (*"better approach #1"* below) - Create the large objects inside the remote functions or classes by passing a lambda method (*"better approach #2"*). This is also the only option for using unserializable objects. diff --git a/doc/source/ray-core/patterns/limit-pending-tasks.rst b/doc/source/ray-core/patterns/limit-pending-tasks.rst index fd5ccddbc42d..8a266990c4d4 100644 --- a/doc/source/ray-core/patterns/limit-pending-tasks.rst +++ b/doc/source/ray-core/patterns/limit-pending-tasks.rst @@ -3,7 +3,7 @@ Pattern: Using ray.wait to limit the number of pending tasks ============================================================ -In this pattern, we use :ref:`ray.wait() ` to limit the number of pending tasks. +In this pattern, we use :func:`ray.wait() ` to limit the number of pending tasks. If we continuously submit tasks faster than their process time, we will accumulate tasks in the pending task queue, which can eventually cause OOM. With ``ray.wait()``, we can apply backpressure and limit the number of pending tasks so that the pending task queue won't grow indefinitely and cause OOM. diff --git a/doc/source/ray-core/patterns/nested-tasks.rst b/doc/source/ray-core/patterns/nested-tasks.rst index 170b8f63525d..070761ed7720 100644 --- a/doc/source/ray-core/patterns/nested-tasks.rst +++ b/doc/source/ray-core/patterns/nested-tasks.rst @@ -28,7 +28,7 @@ Code example :start-after: __pattern_start__ :end-before: __pattern_end__ -We call :ref:`ray.get() ` after both ``quick_sort_distributed`` function invocations take place. +We call :func:`ray.get() ` after both ``quick_sort_distributed`` function invocations take place. This allows you to maximize parallelism in the workload. See :doc:`ray-get-loop` for more details. Notice in the execution times above that with smaller tasks, the non-distributed version is faster. However, as the task execution diff --git a/doc/source/ray-core/patterns/pass-large-arg-by-value.rst b/doc/source/ray-core/patterns/pass-large-arg-by-value.rst index 37a6816f2245..b95b981fdec9 100644 --- a/doc/source/ray-core/patterns/pass-large-arg-by-value.rst +++ b/doc/source/ray-core/patterns/pass-large-arg-by-value.rst @@ -3,7 +3,7 @@ Anti-pattern: Passing the same large argument by value repeatedly harms performance =================================================================================== -**TLDR:** Avoid passing the same large argument by value to multiple tasks, use :ref:`ray.put() ` and pass by reference instead. +**TLDR:** Avoid passing the same large argument by value to multiple tasks, use :func:`ray.put() ` and pass by reference instead. When passing a large argument (>100KB) by value to a task, Ray will implicitly store the argument in the object store and the worker process will fetch the argument to the local object store from the caller's object store before running the task. diff --git a/doc/source/ray-core/patterns/pipelining.rst b/doc/source/ray-core/patterns/pipelining.rst index 42ad5e5320c2..cf9e63bdb2ce 100644 --- a/doc/source/ray-core/patterns/pipelining.rst +++ b/doc/source/ray-core/patterns/pipelining.rst @@ -23,5 +23,5 @@ Code example .. literalinclude:: ../doc_code/pattern_pipelining.py In the example above, a worker actor pulls work off of a queue and then does some computation on it. -Without pipelining, we call :ref:`ray.get() ` immediately after requesting a work item, so we block while that RPC is in flight, causing idle CPU time. +Without pipelining, we call :func:`ray.get() ` immediately after requesting a work item, so we block while that RPC is in flight, causing idle CPU time. With pipelining, we instead preemptively request the next work item before processing the current one, so we can use the CPU while the RPC is in flight which increases the CPU utilization. diff --git a/doc/source/ray-core/patterns/ray-get-loop.rst b/doc/source/ray-core/patterns/ray-get-loop.rst index 5e75191a5e1f..4ecf9666d774 100644 --- a/doc/source/ray-core/patterns/ray-get-loop.rst +++ b/doc/source/ray-core/patterns/ray-get-loop.rst @@ -3,7 +3,7 @@ Anti-pattern: Calling ray.get in a loop harms parallelism ========================================================= -**TLDR:** Avoid calling :ref:`ray.get() ` in a loop since it's a blocking call; use ``ray.get()`` only for the final result. +**TLDR:** Avoid calling :func:`ray.get() ` in a loop since it's a blocking call; use ``ray.get()`` only for the final result. A call to ``ray.get()`` fetches the results of remotely executed functions. However, it is a blocking call, which means that it always waits until the requested result is available. If you call ``ray.get()`` in a loop, the loop will not continue to run until the call to ``ray.get()`` is resolved. diff --git a/doc/source/ray-core/patterns/ray-get-submission-order.rst b/doc/source/ray-core/patterns/ray-get-submission-order.rst index 9a4d57b32537..f0141e2daae3 100644 --- a/doc/source/ray-core/patterns/ray-get-submission-order.rst +++ b/doc/source/ray-core/patterns/ray-get-submission-order.rst @@ -1,12 +1,12 @@ Anti-pattern: Processing results in submission order using ray.get increases runtime ==================================================================================== -**TLDR:** Avoid processing independent results in submission order using :ref:`ray.get() ` since results may be ready in a different order than the submission order. +**TLDR:** Avoid processing independent results in submission order using :func:`ray.get() ` since results may be ready in a different order than the submission order. A batch of tasks is submitted, and we need to process their results individually once they’re done. If each task takes a different amount of time to finish and we process results in submission order, we may waste time waiting for all of the slower (straggler) tasks that were submitted earlier to finish while later faster tasks have already finished. -Instead, we want to process the tasks in the order that they finish using :ref:`ray.wait() ` to speed up total time to completion. +Instead, we want to process the tasks in the order that they finish using :func:`ray.wait() ` to speed up total time to completion. .. figure:: ../images/ray-get-submission-order.svg diff --git a/doc/source/ray-core/patterns/ray-get-too-many-objects.rst b/doc/source/ray-core/patterns/ray-get-too-many-objects.rst index bd03de0962a3..d0f247eebc65 100644 --- a/doc/source/ray-core/patterns/ray-get-too-many-objects.rst +++ b/doc/source/ray-core/patterns/ray-get-too-many-objects.rst @@ -1,7 +1,7 @@ Anti-pattern: Fetching too many objects at once with ray.get causes failure =========================================================================== -**TLDR:** Avoid calling :ref:`ray.get() ` on too many objects since this will lead to heap out-of-memory or object store out-of-space. Instead fetch and process one batch at a time. +**TLDR:** Avoid calling :func:`ray.get() ` on too many objects since this will lead to heap out-of-memory or object store out-of-space. Instead fetch and process one batch at a time. If you have a large number of tasks that you want to run in parallel, trying to do ``ray.get()`` on all of them at once could lead to failure with heap out-of-memory or object store out-of-space since Ray needs to fetch all the objects to the caller at the same time. Instead you should get and process the results one batch at a time. Once a batch is processed, Ray will evict objects in that batch to make space for future batches. diff --git a/doc/source/ray-core/patterns/redefine-task-actor-loop.rst b/doc/source/ray-core/patterns/redefine-task-actor-loop.rst index 939b16dc585f..c0b2191773b3 100644 --- a/doc/source/ray-core/patterns/redefine-task-actor-loop.rst +++ b/doc/source/ray-core/patterns/redefine-task-actor-loop.rst @@ -3,7 +3,7 @@ Anti-pattern: Redefining the same remote function or class harms performance **TLDR:** Avoid redefining the same remote function or class. -Decorating the same function or class multiple times using the :ref:`ray.remote ` decorator leads to slow performance in Ray. +Decorating the same function or class multiple times using the :func:`ray.remote ` decorator leads to slow performance in Ray. For each Ray remote function or class, Ray will pickle it and upload to GCS. Later on, the worker that runs the task or actor will download and unpickle it. Each decoration of the same function or class generates a new remote function or class from Ray's perspective. diff --git a/doc/source/ray-core/patterns/return-ray-put.rst b/doc/source/ray-core/patterns/return-ray-put.rst index 8bd1ae3e527c..4afc6b90b4ff 100644 --- a/doc/source/ray-core/patterns/return-ray-put.rst +++ b/doc/source/ray-core/patterns/return-ray-put.rst @@ -1,7 +1,7 @@ Anti-pattern: Returning ray.put() ObjectRefs from a task harms performance and fault tolerance ============================================================================================== -**TLDR:** Avoid calling :ref:`ray.put() ` on task return values and returning the resulting ObjectRefs. +**TLDR:** Avoid calling :func:`ray.put() ` on task return values and returning the resulting ObjectRefs. Instead, return these values directly if possible. Returning ray.put() ObjectRefs are considered anti-patterns for the following reasons: diff --git a/doc/source/ray-core/patterns/unnecessary-ray-get.rst b/doc/source/ray-core/patterns/unnecessary-ray-get.rst index 121cecabd297..c0322c22e190 100644 --- a/doc/source/ray-core/patterns/unnecessary-ray-get.rst +++ b/doc/source/ray-core/patterns/unnecessary-ray-get.rst @@ -3,7 +3,7 @@ Anti-pattern: Calling ray.get unnecessarily harms performance ============================================================= -**TLDR:** Avoid calling :ref:`ray.get() ` unnecessarily for intermediate steps. Work with object references directly, and only call ``ray.get()`` at the end to get the final result. +**TLDR:** Avoid calling :func:`ray.get() ` unnecessarily for intermediate steps. Work with object references directly, and only call ``ray.get()`` at the end to get the final result. When ``ray.get()`` is called, objects must be transferred to the worker/node that calls ``ray.get()``. If you don't need to manipulate the object, you probably don't need to call ``ray.get()`` on it! diff --git a/doc/source/ray-core/scheduling/index.rst b/doc/source/ray-core/scheduling/index.rst index 5ec7f1c9bab6..16b28525e83b 100644 --- a/doc/source/ray-core/scheduling/index.rst +++ b/doc/source/ray-core/scheduling/index.rst @@ -31,7 +31,7 @@ If all nodes are infeasible, the task or actor cannot be scheduled until feasibl Scheduling Strategies --------------------- -Tasks or actors support a :ref:`scheduling_strategy ` option to specify the strategy used to decide the best node among feasible nodes. +Tasks or actors support a :func:`scheduling_strategy ` option to specify the strategy used to decide the best node among feasible nodes. Currently the supported strategies are the followings. "DEFAULT" diff --git a/doc/source/ray-core/scheduling/placement-group.rst b/doc/source/ray-core/scheduling/placement-group.rst index a5c613874d13..d6d01f05b899 100644 --- a/doc/source/ray-core/scheduling/placement-group.rst +++ b/doc/source/ray-core/scheduling/placement-group.rst @@ -360,7 +360,7 @@ Let's create a placement group. Recall that each bundle is a collection of resou Now let's define an actor that uses GPU. We'll also define a task that use ``extra_resources``. You can schedule actors/tasks on the placement group using -:ref:`options(scheduling_strategy=PlacementGroupSchedulingStrategy(...)) `. +:class:`options(scheduling_strategy=PlacementGroupSchedulingStrategy(...)) `. .. tabbed:: Python @@ -470,7 +470,7 @@ You can schedule actors/tasks on the placement group using }; RAY_REMOTE(&Counter::Ping, &Counter::GetValue, CreateCounter); - + // Create GPU actors on a gpu bundle. for (int index = 0; index < 2; index++) { ray::Actor(CreateCounter) @@ -660,9 +660,9 @@ Placement Group Lifetimes .. tabbed:: Python By default, the lifetimes of placement groups are not detached and will be destroyed - when the driver is terminated (but, if it is created from a detached actor, it is - killed when the detached actor is killed). If you'd like to keep the placement group - alive regardless of its job or detached actor, you should specify + when the driver is terminated (but, if it is created from a detached actor, it is + killed when the detached actor is killed). If you'd like to keep the placement group + alive regardless of its job or detached actor, you should specify `lifetime="detached"`. For example: .. code-block:: python @@ -671,8 +671,8 @@ Placement Group Lifetimes pg = placement_group([{"CPU": 2}, {"CPU": 2}], strategy="STRICT_SPREAD", lifetime="detached") ray.get(pg.ready()) - The placement group's lifetime will be independent of the driver now. This means it - is possible to retrieve the placement group from other drivers regardless of when + The placement group's lifetime will be independent of the driver now. This means it + is possible to retrieve the placement group from other drivers regardless of when the current driver exits. Let's see an example: .. code-block:: python diff --git a/doc/source/ray-core/scheduling/resources.rst b/doc/source/ray-core/scheduling/resources.rst index 04e7b811be74..e2455cc27032 100644 --- a/doc/source/ray-core/scheduling/resources.rst +++ b/doc/source/ray-core/scheduling/resources.rst @@ -78,7 +78,7 @@ There are several ways to do that depending on how you start the Ray cluster: .. tabbed:: ray.init() - If you are using :ref:`ray.init() ` to start a single node Ray cluster, you can do the following to manually specify node resources: + If you are using :func:`ray.init() ` to start a single node Ray cluster, you can do the following to manually specify node resources: .. literalinclude:: ../doc_code/resources.py :language: python @@ -136,7 +136,8 @@ The default resource requirements for actors was chosen for historical reasons. It's recommended to always explicitly set ``num_cpus`` for actors to avoid any surprises. If resources are specified explicitly, they are required for both scheduling and running.) -You can also explicitly specify a task's or actor's resource requirements (for example, one task may require a GPU) instead of using default ones via :ref:`ray.remote() ` and :ref:`.options() `. +You can also explicitly specify a task's or actor's resource requirements (for example, one task may require a GPU) instead of using default ones via :func:`ray.remote() ` +and :meth:`task.options() `/:meth:`actor.options() `. .. tabbed:: Python diff --git a/doc/source/ray-core/tasks.rst b/doc/source/ray-core/tasks.rst index f3e65113ca0c..429f7f715445 100644 --- a/doc/source/ray-core/tasks.rst +++ b/doc/source/ray-core/tasks.rst @@ -164,7 +164,7 @@ Waiting for Partial Results --------------------------- Calling **ray.get** on Ray task results will block until the task finished execution. After launching a number of tasks, you may want to know which ones have -finished executing without blocking on all of them. This could be achieved by (:ref:`ray-wait-ref`). The function +finished executing without blocking on all of them. This could be achieved by :func:`ray.wait() `. The function works as follows. .. tabbed:: Python @@ -215,7 +215,7 @@ For tasks that return multiple objects, Ray also supports remote generators that Cancelling tasks ---------------- -Ray tasks can be canceled by calling ``ray.cancel`` (:ref:`docstring `) on the returned Object ref. +Ray tasks can be canceled by calling :func:`ray.cancel() ` on the returned Object ref. .. tabbed:: Python @@ -242,7 +242,7 @@ By default, Ray will :ref:`retry ` failed tasks due to system failures and specified application-level failures. You can change this behavior by setting ``max_retries`` and ``retry_exceptions`` options -in :ref:`ray.remote() ` and :ref:`.options() `. +in :func:`ray.remote() ` and :meth:`.options() `. See :ref:`Ray fault tolerance ` for more details. diff --git a/doc/source/ray-core/tasks/using-ray-with-gpus.rst b/doc/source/ray-core/tasks/using-ray-with-gpus.rst index d6c74be1e113..c795f3a59913 100644 --- a/doc/source/ray-core/tasks/using-ray-with-gpus.rst +++ b/doc/source/ray-core/tasks/using-ray-with-gpus.rst @@ -40,7 +40,7 @@ and assign GPUs to the task or actor by setting the ``CUDA_VISIBLE_DEVICES`` env :start-after: __get_gpu_ids_start__ :end-before: __get_gpu_ids_end__ -Inside a task or actor, :ref:`ray.get_gpu_ids() ` will return a +Inside a task or actor, :func:`ray.get_gpu_ids() ` will return a list of GPU IDs that are available to the task or actor. Typically, it is not necessary to call ``ray.get_gpu_ids()`` because Ray will automatically set the ``CUDA_VISIBLE_DEVICES`` environment variable, @@ -99,7 +99,7 @@ task tries to use the same GPU. To address the problem, Ray disables the worker process reuse between GPU tasks by default, where the GPU resources is released after the task process exits. Since this adds overhead to GPU task scheduling, you can re-enable worker reuse by setting ``max_calls=0`` -in the :ref:`ray.remote ` decorator. +in the :func:`ray.remote ` decorator. .. literalinclude:: ../doc_code/gpus.py :language: python diff --git a/doc/source/ray-more-libs/joblib.rst b/doc/source/ray-more-libs/joblib.rst index d55efbf21f8d..d41ed8aa4dab 100644 --- a/doc/source/ray-more-libs/joblib.rst +++ b/doc/source/ray-more-libs/joblib.rst @@ -51,8 +51,8 @@ a multi-node Ray cluster instead. with joblib.parallel_backend('ray'): search.fit(digits.data, digits.target) -You can also set the ``ray_remote_args`` argument in ``parallel_backend`` to :ref:`configure -the Ray Actors ` making up the Pool. This can be used to eg. :ref:`assign resources +You can also set the ``ray_remote_args`` argument in ``parallel_backend`` to :func:`configure +the Ray Actors ` making up the Pool. This can be used to eg. :ref:`assign resources to Actors, such as GPUs `. .. code-block:: python diff --git a/doc/source/ray-observability/api/state/api.rst b/doc/source/ray-observability/api/state/api.rst new file mode 100644 index 000000000000..bfd37f82c391 --- /dev/null +++ b/doc/source/ray-observability/api/state/api.rst @@ -0,0 +1,96 @@ +State API +========= + +.. _state-api-ref: + +.. note:: + + APIs are :ref:`alpha `. This feature requires a full installation of Ray using ``pip install "ray[default]"``. + +For an overview with examples see :ref:`Monitoring Ray States `. + +For the CLI reference see :ref:`Ray State CLI Reference ` or :ref:`Ray Log CLI Reference `. + +State Python SDK +----------------- + +State APIs are also exported as functions. + +Summary APIs +~~~~~~~~~~~~ + +.. autosummary:: + :toctree: doc/ + + ray.experimental.state.api.summarize_actors + ray.experimental.state.api.summarize_objects + ray.experimental.state.api.summarize_tasks + +List APIs +~~~~~~~~~~ + +.. autosummary:: + :toctree: doc/ + + ray.experimental.state.api.list_actors + ray.experimental.state.api.list_placement_groups + ray.experimental.state.api.list_nodes + ray.experimental.state.api.list_jobs + ray.experimental.state.api.list_workers + ray.experimental.state.api.list_tasks + ray.experimental.state.api.list_objects + ray.experimental.state.api.list_runtime_envs + +Get APIs +~~~~~~~~~ + +.. autosummary:: + :toctree: doc/ + + ray.experimental.state.api.get_actor + ray.experimental.state.api.get_placement_group + ray.experimental.state.api.get_node + ray.experimental.state.api.get_worker + ray.experimental.state.api.get_task + ray.experimental.state.api.get_objects + +Log APIs +~~~~~~~~ + +.. autosummary:: + :toctree: doc/ + + ray.experimental.state.api.list_logs + ray.experimental.state.api.get_log + +.. _state-api-schema: + +State APIs Schema +----------------- + +.. autosummary:: + :toctree: doc/ + + ray.experimental.state.common.ActorState + ray.experimental.state.common.TaskState + ray.experimental.state.common.NodeState + ray.experimental.state.common.PlacementGroupState + ray.experimental.state.common.WorkerState + ray.experimental.state.common.ObjectState + ray.experimental.state.common.RuntimeEnvState + ray.experimental.state.common.JobState + ray.experimental.state.common.StateSummary + ray.experimental.state.common.TaskSummaries + ray.experimental.state.common.TaskSummaryPerFuncOrClassName + ray.experimental.state.common.ActorSummaries + ray.experimental.state.common.ActorSummaryPerClass + ray.experimental.state.common.ObjectSummaries + ray.experimental.state.common.ObjectSummaryPerKey + +State APIs Exceptions +--------------------- + +.. autosummary:: + :toctree: doc/ + + ray.experimental.state.exception.RayStateApiException diff --git a/doc/source/ray-observability/state/cli.rst b/doc/source/ray-observability/api/state/cli.rst similarity index 95% rename from doc/source/ray-observability/state/cli.rst rename to doc/source/ray-observability/api/state/cli.rst index da765ac79c15..7b38592eb795 100644 --- a/doc/source/ray-observability/state/cli.rst +++ b/doc/source/ray-observability/api/state/cli.rst @@ -7,7 +7,7 @@ State ----- This section contains commands to access the :ref:`live state of Ray resources (actor, task, object, etc.) `. -.. note:: +.. note:: APIs are :ref:`alpha `. This feature requires a full installation of Ray using ``pip install "ray[default]"``. This feature also requires the dashboard component to be available. The dashboard component needs to be included when starting the ray cluster, which is the default behavior for ``ray start`` and ``ray.init()``. For more in-depth debugging, you could check the dashboard log at ``/dashboard.log``, which is usually ``/tmp/ray/session_latest/logs/dashboard.log``. @@ -34,11 +34,11 @@ Log --- This section contains commands to :ref:`access logs ` from Ray clusters. -.. note:: +.. note:: APIs are :ref:`alpha `. This feature requires a full installation of Ray using ``pip install "ray[default]"``. -Log CLI allows users to access the log from the cluster. +Log CLI allows users to access the log from the cluster. Note that only the logs from alive nodes are available through this API. .. click:: ray.experimental.state.state_cli:logs_state_cli_group diff --git a/doc/source/ray-observability/overview.rst b/doc/source/ray-observability/overview.rst index 3da5515c01c7..f45bbfbf69a5 100644 --- a/doc/source/ray-observability/overview.rst +++ b/doc/source/ray-observability/overview.rst @@ -26,9 +26,9 @@ Exceptions Creating a new task or submitting an actor task generates an object reference. When ``ray.get`` is called on the object reference, the API raises an exception if anything goes wrong with a related task, actor or object. For example, -- :ref:`RayTaskError ` is raised when there's an error from user code that throws an exception. -- :ref:`RayActorError ` is raised when an actor is dead (by a system failure such as node failure or user-level failure such as an exception from ``__init__`` method). -- :ref:`RuntimeEnvSetupError ` is raised when the actor or task couldn't be started because :ref:`a runtime environment ` failed to be created. +- :class:`RayTaskError ` is raised when there's an error from user code that throws an exception. +- :class:`RayActorError ` is raised when an actor is dead (by a system failure such as node failure or user-level failure such as an exception from ``__init__`` method). +- :class:`RuntimeEnvSetupError ` is raised when the actor or task couldn't be started because :ref:`a runtime environment ` failed to be created. See :ref:`Exceptions Reference ` for more details. @@ -134,16 +134,16 @@ Here's an example output. Metrics ------- -Ray collects and exposes the physical stats (e.g., CPU, memory, GRAM, disk, and network usage of each node), -internal stats (e.g., number of actors in the cluster, number of worker failures of the cluster), +Ray collects and exposes the physical stats (e.g., CPU, memory, GRAM, disk, and network usage of each node), +internal stats (e.g., number of actors in the cluster, number of worker failures of the cluster), and custom metrics (e.g., metrics defined by users). All stats can be exported as time series data (to Prometheus by default) and used -to monitor the cluster over time. +to monitor the cluster over time. See :ref:`Ray Metrics ` for more details. Profiling --------- -Ray is compatible with Python profiling tools such as ``CProfile``. It also supports its built-in profiling tool such as :ref:```ray timeline`` `. +Ray is compatible with Python profiling tools such as ``CProfile``. It also supports its built-in profiling tool such as :ref:```ray timeline`` `. See :ref:`Profiling ` for more details. diff --git a/doc/source/ray-observability/state/ray-state-api-reference.rst b/doc/source/ray-observability/state/ray-state-api-reference.rst deleted file mode 100644 index b5ae074368fb..000000000000 --- a/doc/source/ray-observability/state/ray-state-api-reference.rst +++ /dev/null @@ -1,180 +0,0 @@ -Ray State API -============= - -.. _state-api-ref: - -.. note:: - - APIs are :ref:`alpha `. This feature requires a full installation of Ray using ``pip install "ray[default]"``. - -For an overview with examples see :ref:`Monitoring Ray States `. - -For the CLI reference see :ref:`Ray State CLI Reference ` or :ref:`Ray Log CLI Reference `. - -State Python SDK ------------------ - -State APIs are also exported as functions. - -Summary APIs -~~~~~~~~~~~~ -.. autofunction:: ray.experimental.state.api.summarize_actors -.. autofunction:: ray.experimental.state.api.summarize_objects -.. autofunction:: ray.experimental.state.api.summarize_tasks - -List APIs -~~~~~~~~~~ - -.. autofunction:: ray.experimental.state.api.list_actors -.. autofunction:: ray.experimental.state.api.list_placement_groups -.. autofunction:: ray.experimental.state.api.list_nodes -.. autofunction:: ray.experimental.state.api.list_jobs -.. autofunction:: ray.experimental.state.api.list_workers -.. autofunction:: ray.experimental.state.api.list_tasks -.. autofunction:: ray.experimental.state.api.list_objects -.. autofunction:: ray.experimental.state.api.list_runtime_envs - -Get APIs -~~~~~~~~~ - -.. autofunction:: ray.experimental.state.api.get_actor -.. autofunction:: ray.experimental.state.api.get_placement_group -.. autofunction:: ray.experimental.state.api.get_node -.. autofunction:: ray.experimental.state.api.get_worker -.. autofunction:: ray.experimental.state.api.get_task -.. autofunction:: ray.experimental.state.api.get_objects - -Log APIs -~~~~~~~~ -.. autofunction:: ray.experimental.state.api.list_logs -.. autofunction:: ray.experimental.state.api.get_log - -.. _state-api-schema: - -State APIs Schema ------------------ - -.. _state-api-schema-actor: - -ActorState -~~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.ActorState - :members: - -.. _state-api-schema-task: - -TaskState -~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.TaskState - :members: - -.. _state-api-schema-node: - -NodeState -~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.NodeState - :members: - -.. _state-api-schema-pg: - -PlacementGroupState -~~~~~~~~~~~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.PlacementGroupState - :members: - -.. _state-api-schema-worker: - -WorkerState -~~~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.WorkerState - :members: - -.. _state-api-schema-obj: - -ObjectState -~~~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.ObjectState - :members: - -.. _state-api-schema-runtime-env: - -RuntimeEnvState -~~~~~~~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.RuntimeEnvState - :members: - -.. _state-api-schema-job: - -JobState -~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.JobState - :members: - -.. _state-api-schema-summary: - -StateSummary -~~~~~~~~~~~~ - -.. autoclass:: ray.experimental.state.common.StateSummary - :members: - -.. _state-api-schema-task-summary: - -TaskSummary -~~~~~~~~~~~ - -.. _state-api-schema-task-summaries: - -.. autoclass:: ray.experimental.state.common.TaskSummaries - :members: - -.. _state-api-schema-task-summary-per-key: - -.. autoclass:: ray.experimental.state.common.TaskSummaryPerFuncOrClassName - :members: - -.. _state-api-schema-actor-summary: - -ActorSummary -~~~~~~~~~~~~ - -.. _state-api-schema-actor-summaries: - -.. autoclass:: ray.experimental.state.common.ActorSummaries - :members: - -.. _state-api-schema-actor-summary-per-key: - -.. autoclass:: ray.experimental.state.common.ActorSummaryPerClass - :members: - -.. _state-api-schema-object-summary: - -ObjectSummary -~~~~~~~~~~~~~ - -.. _state-api-schema-object-summaries: - -.. autoclass:: ray.experimental.state.common.ObjectSummaries - :members: - -.. _state-api-schema-object-summary-per-key: - -.. autoclass:: ray.experimental.state.common.ObjectSummaryPerKey - :members: - -State APIs Exceptions ---------------------- - -.. _state-api-exceptions: - -.. autoclass:: ray.experimental.state.exception.RayStateApiException - :members: \ No newline at end of file diff --git a/doc/source/ray-observability/state/state-api.rst b/doc/source/ray-observability/state/state-api.rst index 04bd682e6d07..78778ef3b753 100644 --- a/doc/source/ray-observability/state/state-api.rst +++ b/doc/source/ray-observability/state/state-api.rst @@ -7,7 +7,7 @@ Monitoring Ray States Ray state APIs allow users to conveniently access the current state (snapshot) of Ray through CLI or Python SDK. -.. note:: +.. note:: APIs are :ref:`alpha `. This feature requires a full installation of Ray using ``pip install "ray[default]"``. This feature also requires the dashboard component to be available. The dashboard component needs to be included when starting the ray cluster, which is the default behavior for ``ray start`` and ``ray.init()``. For more in-depth debugging, you could check the dashboard log at ``/dashboard.log``, which is usually ``/tmp/ray/session_latest/logs/dashboard.log``. @@ -27,12 +27,12 @@ Run any workload. In this example, you will use the following script that runs 2 def task_running_300_seconds(): print("Start!") time.sleep(300) - + @ray.remote class Actor: def __init__(self): print("Actor created") - + # Create 2 tasks tasks = [task_running_300_seconds.remote() for _ in range(2)] @@ -84,7 +84,7 @@ Let's list all actors. .. code-block:: python - from ray.experimental.state.api import list_actors + from ray.experimental.state.api import list_actors print(list_actors()) .. code-block:: text @@ -100,15 +100,15 @@ Let's list all actors. 0 31405554844820381c2f0f8501000000 Actor 96956 ALIVE 1 f36758a9f8871a9ca993b1d201000000 Actor 96955 ALIVE -You can get the state of a single task using the get API. +You can get the state of a single task using the get API. .. tabbed:: CLI .. code-block:: bash # In this case, 31405554844820381c2f0f8501000000 - ray get actors - + ray get actors + .. tabbed:: Python SDK .. code-block:: python @@ -139,7 +139,7 @@ You can also access logs through ``ray logs`` API. ray list actors # In this case, ACTOR_ID is 31405554844820381c2f0f8501000000 - ray logs actor --id + ray logs actor --id .. tabbed:: Python SDK @@ -164,13 +164,13 @@ Key Concepts Ray state APIs allow you to access **states** of **resources** through **summary**, **list**, and **get** APIs. It also supports **logs** API to access logs. - **states**: The state of the cluster of corresponding resources. States consist of immutable metadata (e.g., actor's name) and mutable states (e.g., actor's scheduling state or pid). -- **resources**: Resources created by Ray. E.g., actors, tasks, objects, placement groups, and etc. +- **resources**: Resources created by Ray. E.g., actors, tasks, objects, placement groups, and etc. - **summary**: API to return the summarized view of resources. - **list**: API to return every individual entity of resources. - **get**: API to return a single entity of resources in detail. - **logs**: API to access the log of actors, tasks, workers, or system log files. -Summary +Summary ------- Return the summarized information of the given Ray resource (objects, actors, tasks). It is recommended to start monitoring states through summary APIs first. When you find anomalies @@ -193,7 +193,7 @@ E.g., Summarize all actors from ray.experimental.state.api import summarize_actors print(summarize_actors()) -E.g., Summarize all tasks +E.g., Summarize all tasks ~~~~~~~~~~~~~~~~~~~~~~~~~ .. tabbed:: CLI @@ -209,7 +209,7 @@ E.g., Summarize all tasks from ray.experimental.state.api import summarize_tasks print(summarize_tasks()) -E.g., Summarize all objects +E.g., Summarize all objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -223,7 +223,7 @@ E.g., Summarize all objects .. code-block:: bash - ray summary objects + ray summary objects .. tabbed:: Python SDK @@ -235,34 +235,34 @@ E.g., Summarize all objects List ---- -Get a list of resources, possible resources include: +Get a list of resources, possible resources include: -- :ref:`Actors `, e.g., actor id, state, pid, death_cause. (:ref:`output schema `) -- :ref:`Tasks `, e.g., name, scheduling state, type, runtime env info (:ref:`output schema `) -- :ref:`Objects `, e.g., object id, callsites, reference types. (:ref:`output schema `) -- :ref:`Jobs `, e.g., start/end time, entrypoint, status. (:ref:`output schema `) -- :ref:`Placement Groups `, e.g., name, bundles, stats. (:ref:`output schema `) -- Nodes (Ray worker nodes), e.g., node id, node ip, node state. (:ref:`output schema `) -- Workers (Ray worker processes), e.g., worker id, type, exit type and details. (:ref:`output schema `) -- :ref:`Runtime environments `, e.g., runtime envs, creation time, nodes (:ref:`output schema `) +- :ref:`Actors `, e.g., actor id, state, pid, death_cause. (:class:`output schema `) +- :ref:`Tasks `, e.g., name, scheduling state, type, runtime env info (:class:`output schema `) +- :ref:`Objects `, e.g., object id, callsites, reference types. (:class:`output schema `) +- :ref:`Jobs `, e.g., start/end time, entrypoint, status. (:class:`output schema `) +- :ref:`Placement Groups `, e.g., name, bundles, stats. (:class:`output schema `) +- Nodes (Ray worker nodes), e.g., node id, node ip, node state. (:class:`output schema `) +- Workers (Ray worker processes), e.g., worker id, type, exit type and details. (:class:`output schema `) +- :ref:`Runtime environments `, e.g., runtime envs, creation time, nodes (:class:`output schema `) -E.g., List all nodes +E.g., List all nodes ~~~~~~~~~~~~~~~~~~~~~ .. tabbed:: CLI .. code-block:: bash - ray list nodes + ray list nodes .. tabbed:: Python SDK .. code-block:: python - from ray.experimental.state.api import list_nodes() + from ray.experimental.state.api import list_nodes() list_nodes() -E.g., List all placement groups +E.g., List all placement groups ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. tabbed:: CLI @@ -275,10 +275,10 @@ E.g., List all placement groups .. code-block:: python - from ray.experimental.state.api import list_placement_groups + from ray.experimental.state.api import list_placement_groups list_placement_groups() - + E.g., List local referenced objects created by a process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -294,7 +294,7 @@ E.g., List local referenced objects created by a process .. code-block:: python - from ray.experimental.state.api import list_objects + from ray.experimental.state.api import list_objects list_objects(filters=[("pid", "=", ), ("reference_type", "=", "LOCAL_REFERENCE")]) E.g., List alive actors @@ -310,7 +310,7 @@ E.g., List alive actors .. code-block:: python - from ray.experimental.state.api import list_actors + from ray.experimental.state.api import list_actors list_actors(filters=[("state", "=", "ALIVE")]) E.g., List running tasks @@ -326,7 +326,7 @@ E.g., List running tasks .. code-block:: python - from ray.experimental.state.api import list_tasks + from ray.experimental.state.api import list_tasks list_tasks(filters=[("state", "=", "RUNNING")]) E.g., List non-running tasks @@ -342,7 +342,7 @@ E.g., List non-running tasks .. code-block:: python - from ray.experimental.state.api import list_tasks + from ray.experimental.state.api import list_tasks list_tasks(filters=[("state", "!=", "RUNNING")]) E.g., List running tasks that have a name func @@ -358,7 +358,7 @@ E.g., List running tasks that have a name func .. code-block:: python - from ray.experimental.state.api import list_tasks + from ray.experimental.state.api import list_tasks list_tasks(filters=[("state", "=", "RUNNING"), ("name", "=", "task_running_300_seconds()")]) E.g., List tasks with more details @@ -376,7 +376,7 @@ E.g., List tasks with more details .. code-block:: python - from ray.experimental.state.api import list_tasks + from ray.experimental.state.api import list_tasks list_tasks(detail=True) Get @@ -389,13 +389,13 @@ E.g., Get a task info .. code-block:: bash - ray get tasks + ray get tasks .. tabbed:: Python SDK .. code-block:: python - from ray.experimental.state.api import get_task + from ray.experimental.state.api import get_task get_task(id=) E.g., Get a node info @@ -405,13 +405,13 @@ E.g., Get a node info .. code-block:: bash - ray get nodes + ray get nodes .. tabbed:: Python SDK .. code-block:: python - from ray.experimental.state.api import get_node + from ray.experimental.state.api import get_node get_node(id=) Logs @@ -435,11 +435,11 @@ E.g., Get all retrievable log file names from a head node in a cluster .. code-block:: python - # You could get the node id / node ip from `ray list nodes` - from ray.experimental.state.api import list_logs - # `ray logs` by default print logs from a head node. - # So in order to list the same logs, you should provide the head node id. - # You could get the node id / node ip from `ray list nodes` + # You could get the node id / node ip from `ray list nodes` + from ray.experimental.state.api import list_logs + # `ray logs` by default print logs from a head node. + # So in order to list the same logs, you should provide the head node id. + # You could get the node id / node ip from `ray list nodes` list_logs(node_id=) E.g., Get a particular log file from a node @@ -452,13 +452,13 @@ E.g., Get a particular log file from a node # You could get the node id / node ip from `ray list nodes` ray logs cluster gcs_server.out --node-id # `ray logs cluster` is alias to `ray logs` when querying with globs. - ray logs gcs_server.out --node-id + ray logs gcs_server.out --node-id .. tabbed:: Python SDK .. code-block:: python - from ray.experimental.state.api import get_log + from ray.experimental.state.api import get_log # Node IP could be retrieved from list_nodes() or ray.nodes() for line in get_log(filename="gcs_server.out", node_id=): @@ -471,7 +471,7 @@ E.g., Stream a log file from a node .. code-block:: bash - # You could get the node id / node ip from `ray list nodes` + # You could get the node id / node ip from `ray list nodes` ray logs raylet.out --node-ip --follow # Or, ray logs cluster raylet.out --node-ip --follow @@ -481,7 +481,7 @@ E.g., Stream a log file from a node .. code-block:: python - from ray.experimental.state.api import get_log + from ray.experimental.state.api import get_log # Node IP could be retrieved from list_nodes() or ray.nodes() # The loop will block with `follow=True` @@ -508,7 +508,7 @@ E.g., Stream log from an actor with actor id for line in get_log(actor_id=, follow=True): print(line) -E.g., Stream log from a pid +E.g., Stream log from a pid ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. tabbed:: CLI @@ -521,7 +521,7 @@ E.g., Stream log from a pid .. code-block:: python - from ray.experimental.state.api import get_log + from ray.experimental.state.api import get_log # Node IP could be retrieved from list_nodes() or ray.nodes() # You could get the pid of the worker running the actor easily when output diff --git a/doc/source/ray-references/api.rst b/doc/source/ray-references/api.rst index 9700827cd15b..5f9e1e0199ea 100644 --- a/doc/source/ray-references/api.rst +++ b/doc/source/ray-references/api.rst @@ -13,4 +13,4 @@ API References ../rllib/package_ref/index.rst ../workflows/api/api.rst ../cluster/package-overview.rst - ../ray-core/package-ref.rst + ../ray-core/api/index.rst diff --git a/python/ray/data/block.py b/python/ray/data/block.py index 3de5992d8511..d3629ab6fd35 100644 --- a/python/ray/data/block.py +++ b/python/ray/data/block.py @@ -209,21 +209,18 @@ def build(self) -> "BlockExecStats": @DeveloperAPI @dataclass class BlockMetadata: - """Metadata about the block. - - Attributes: - num_rows: The number of rows contained in this block, or None. - size_bytes: The approximate size in bytes of this block, or None. - schema: The pyarrow schema or types of the block elements, or None. - input_files: The list of file paths used to generate this block, or - the empty list if indeterminate. - exec_stats: Execution stats for this block. - """ + """Metadata about the block.""" + #: The number of rows contained in this block, or None. num_rows: Optional[int] + #: The approximate size in bytes of this block, or None. size_bytes: Optional[int] + #: The pyarrow schema or types of the block elements, or None. schema: Optional[Union[type, "pyarrow.lib.Schema"]] + #: The list of file paths used to generate this block, or + #: the empty list if indeterminate. input_files: Optional[List[str]] + #: Execution stats for this block. exec_stats: Optional[BlockExecStats] def __post_init__(self): diff --git a/python/ray/data/datasource/partitioning.py b/python/ray/data/datasource/partitioning.py index b7255d0a7721..30462514c36a 100644 --- a/python/ray/data/datasource/partitioning.py +++ b/python/ray/data/datasource/partitioning.py @@ -42,25 +42,23 @@ class Partitioning: Path-based partition formats embed all partition keys and values directly in their dataset file paths. - - Attributes: - style: The partition style - may be either HIVE or DIRECTORY. - base_dir: "/"-delimited base directory that all partitioned paths should - exist under (exclusive). File paths either outside of, or at the first - level of, this directory will be considered unpartitioned. Specify - `None` or an empty string to search for partitions in all file path - directories. - field_names: The partition key field names (i.e. column names for tabular - datasets). When non-empty, the order and length of partition key - field names must match the order and length of partition values. - Required when parsing DIRECTORY partitioned paths or generating - HIVE partitioned paths. - filesystem: Filesystem that will be used for partition path file I/O. """ + #: The partition style - may be either HIVE or DIRECTORY. style: PartitionStyle + #: "/"-delimited base directory that all partitioned paths should + #: exist under (exclusive). File paths either outside of, or at the first + #: level of, this directory will be considered unpartitioned. Specify + #: `None` or an empty string to search for partitions in all file path + #: directories. base_dir: Optional[str] = None + #: The partition key field names (i.e. column names for tabular + #: datasets). When non-empty, the order and length of partition key + #: field names must match the order and length of partition values. + #: Required when parsing DIRECTORY partitioned paths or generating + #: HIVE partitioned paths. field_names: Optional[List[str]] = None + #: Filesystem that will be used for partition path file I/O. filesystem: Optional["pyarrow.fs.FileSystem"] = None def __post_init__(self): diff --git a/python/ray/experimental/state/api.py b/python/ray/experimental/state/api.py index daefed8b423b..883c43cacbe0 100644 --- a/python/ray/experimental/state/api.py +++ b/python/ray/experimental/state/api.py @@ -550,12 +550,12 @@ def get_actor( Returns: None if actor not found, or dictionarified - :ref:`ActorState `. + :class:`ActorState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).get( StateResource.ACTORS, id, GetApiOptions(timeout=timeout), _explain=_explain ) @@ -589,12 +589,12 @@ def get_placement_group( Returns: None if actor not found, or dictionarified - :ref:`PlacementGroupState `. + :class:`~ray.experimental.state.common.PlacementGroupState`. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).get( StateResource.PLACEMENT_GROUPS, id, @@ -621,12 +621,12 @@ def get_node( Returns: None if actor not found, or dictionarified - :ref:`NodeState `. + :class:`NodeState `. Raises: - Exceptions: :ref:`RayStateApiException ` + Exceptions: :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).get( StateResource.NODES, id, @@ -653,12 +653,12 @@ def get_worker( Returns: None if actor not found, or dictionarified - :ref:`WorkerState `. + :class:`WorkerState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).get( StateResource.WORKERS, id, @@ -685,12 +685,13 @@ def get_task( Returns: None if task not found, or a list of dictionarified - :ref:`TaskState ` from the task attempts. + :class:`~ray.experimental.state.common.TaskState` + from the task attempts. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).get( StateResource.TASKS, id, @@ -719,12 +720,13 @@ def get_objects( failed query information. Returns: - List of dictionarified :ref:`ObjectState `. + List of dictionarified + :class:`~ray.experimental.state.common.ObjectState`. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).get( StateResource.OBJECTS, id, @@ -753,7 +755,7 @@ def list_actors( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `ActorState`) will be queried and returned. See - :ref:`ActorState `. + :class:`ActorState `. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -761,12 +763,12 @@ def list_actors( Returns: List of dictionarified - :ref:`ActorState `. + :class:`ActorState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.ACTORS, options=ListApiOptions( @@ -800,7 +802,7 @@ def list_placement_groups( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `PlacementGroupState`) will be queried and returned. See - :ref:`PlacementGroupState `. + :class:`~ray.experimental.state.common.PlacementGroupState`. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -808,12 +810,12 @@ def list_placement_groups( Returns: List of dictionarified - :ref:`PlacementGroupState `. + :class:`~ray.experimental.state.common.PlacementGroupState`. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.PLACEMENT_GROUPS, options=ListApiOptions( @@ -844,7 +846,7 @@ def list_nodes( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `NodeState`) will be queried and returned. See - :ref:`NodeState `. + :class:`NodeState `. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -852,12 +854,12 @@ def list_nodes( Returns: List of dictionarified - :ref:`NodeState `. + :class:`NodeState `. Raises: - Exceptions: :ref:`RayStateApiException ` + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.NODES, options=ListApiOptions( @@ -888,7 +890,7 @@ def list_jobs( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `JobState`) will be queried and returned. See - :ref:`JobState `. + :class:`JobState `. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -896,12 +898,12 @@ def list_jobs( Returns: List of dictionarified - :ref:`JobState `. + :class:`JobState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.JOBS, options=ListApiOptions( @@ -932,7 +934,7 @@ def list_workers( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `WorkerState`) will be queried and returned. See - :ref:`WorkerState `. + :class:`WorkerState `. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -940,12 +942,12 @@ def list_workers( Returns: List of dictionarified - :ref:`WorkerState `. + :class:`WorkerState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.WORKERS, options=ListApiOptions( @@ -976,7 +978,7 @@ def list_tasks( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `WorkerState`) will be queried and returned. See - :ref:`WorkerState `. + :class:`WorkerState `. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -984,12 +986,12 @@ def list_tasks( Returns: List of dictionarified - :ref:`WorkerState `. + :class:`WorkerState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.TASKS, options=ListApiOptions( @@ -1020,7 +1022,7 @@ def list_objects( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `ObjectState`) will be queried and returned. See - :ref:`ObjectState `. + :class:`ObjectState `. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -1028,12 +1030,12 @@ def list_objects( Returns: List of dictionarified - :ref:`ObjectState `. + :class:`ObjectState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.OBJECTS, options=ListApiOptions( @@ -1064,7 +1066,7 @@ def list_runtime_envs( timeout: Max timeout value for the state APIs requests made. detail: When True, more details info (specified in `RuntimeEnvState`) will be queried and returned. See - :ref:`RuntimeEnvState `. + :class:`RuntimeEnvState `. raise_on_missing_output: When True, exceptions will be raised if there is missing data due to truncation/data source unavailable. _explain: Print the API information such as API latency or @@ -1072,12 +1074,12 @@ def list_runtime_envs( Returns: List of dictionarified - :ref:`RuntimeEnvState `. + :class:`RuntimeEnvState `. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI - failed to query the data. - """ + Exceptions: :class:`RayStateApiException ` + if the CLI failed to query the data. + """ # noqa: E501 return StateApiClient(address=address).list( StateResource.RUNTIME_ENVS, options=ListApiOptions( @@ -1161,9 +1163,9 @@ def get_log( A Generator of log line, None for SendType and ReturnType. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 api_server_url = ray_address_to_api_server_url(address) media_type = "stream" if follow else "file" @@ -1232,10 +1234,10 @@ def list_logs( values are list of log filenames. Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data, or ConnectionError if failed to resolve the ray address. - """ + """ # noqa: E501 assert ( node_ip is not None or node_id is not None ), "At least one of node ip and node id is required" @@ -1291,12 +1293,13 @@ def summarize_tasks( failed query information. Return: - Dictionarified :ref:`TaskSummaries ` + Dictionarified + :class:`~ray.experimental.state.common.TaskSummaries` Raises: - Exceptions: :ref:`RayStateApiException ` + Exceptions: :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).summary( SummaryResource.TASKS, options=SummaryApiOptions(timeout=timeout), @@ -1323,12 +1326,13 @@ def summarize_actors( failed query information. Return: - Dictionarified :ref:`ActorSummaries ` + Dictionarified + :class:`~ray.experimental.state.common.ActorSummaries` Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).summary( SummaryResource.ACTORS, options=SummaryApiOptions(timeout=timeout), @@ -1355,12 +1359,12 @@ def summarize_objects( failed query information. Return: - Dictionarified :ref:`ObjectSummaries ` + Dictionarified :class:`~ray.experimental.state.common.ObjectSummaries` Raises: - Exceptions: :ref:`RayStateApiException ` if the CLI + Exceptions: :class:`RayStateApiException ` if the CLI failed to query the data. - """ + """ # noqa: E501 return StateApiClient(address=address).summary( SummaryResource.OBJECTS, options=SummaryApiOptions(timeout=timeout), diff --git a/python/ray/experimental/state/state_cli.py b/python/ray/experimental/state/state_cli.py index 1f297c9eaba0..55f94ad6bc31 100644 --- a/python/ray/experimental/state/state_cli.py +++ b/python/ray/experimental/state/state_cli.py @@ -353,7 +353,7 @@ def ray_get( The output schema is defined at :ref:`State API Schema section. ` For example, the output schema of `ray get tasks ` is - :ref:`ray.experimental.state.common.TaskState `. + :class:`~ray.experimental.state.common.TaskState`. Usage: @@ -378,9 +378,9 @@ def ray_get( id: The id of the resource. Raises: - :ref:`RayStateApiException ` + :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 # All resource names use '_' rather than '-'. But users options have '-' resource = StateResource(resource.replace("-", "_")) @@ -466,7 +466,7 @@ def ray_list( The output schema is defined at :ref:`State API Schema section. ` For example, the output schema of `ray list tasks` is - :ref:`ray.experimental.state.common.TaskState `. + :class:`~ray.experimental.state.common.TaskState`. Usage: @@ -517,9 +517,9 @@ def ray_list( resource: The type of the resource to query. Raises: - :ref:`RayStateApiException ` + :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 # All resource names use '_' rather than '-'. But users options have '-' resource = StateResource(resource.replace("-", "_")) format = AvailableFormat(format) @@ -581,12 +581,12 @@ def task_summary(ctx, timeout: float, address: str): task function names. The output schema is - :ref:`ray.experimental.state.common.TaskSummaries `. + :class:`~ray.experimental.state.common.TaskSummaries`. Raises: - :ref:`RayStateApiException ` + :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 print( format_summary_output( summarize_tasks( @@ -612,13 +612,13 @@ def actor_summary(ctx, timeout: float, address: str): actor class names. The output schema is - :ref:`ray.experimental.state.common.ActorSummaries - `. + :class:`ray.experimental.state.common.ActorSummaries + `. Raises: - :ref:`RayStateApiException ` + :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 print( format_summary_output( summarize_actors( @@ -663,13 +663,13 @@ def object_summary(ctx, timeout: float, address: str): ``` The output schema is - :ref:`ray.experimental.state.common.ObjectSummaries - `. + :class:`ray.experimental.state.common.ObjectSummaries + `. Raises: - :ref:`RayStateApiException ` + :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 print( format_object_summary_output( summarize_objects( @@ -920,9 +920,9 @@ def log_cluster( ``` Raises: - :ref:`RayStateApiException ` if the CLI + :class:`RayStateApiException ` if the CLI is failed to query the data. - """ + """ # noqa: E501 if node_id is None and node_ip is None: node_ip = _get_head_node_ip(address) @@ -1029,10 +1029,10 @@ def log_actor( ``` Raises: - :ref:`RayStateApiException ` + :class:`RayStateApiException ` if the CLI is failed to query the data. MissingParameter if inputs are missing. - """ + """ # noqa: E501 if pid is None and id is None: raise click.MissingParameter( @@ -1102,10 +1102,10 @@ def log_worker( ``` Raises: - :ref:`RayStateApiException ` + :class:`RayStateApiException ` if the CLI is failed to query the data. MissingParameter if inputs are missing. - """ + """ # noqa: E501 _print_log( address=address, diff --git a/rllib/policy/sample_batch.py b/rllib/policy/sample_batch.py index 98fe4213b1dd..0f83b70fb0f9 100644 --- a/rllib/policy/sample_batch.py +++ b/rllib/policy/sample_batch.py @@ -804,7 +804,7 @@ def to_device(self, device, framework="torch"): def size_bytes(self) -> int: """Returns sum over number of bytes of all data buffers. - For numpy arrays, we use `.nbytes`. For all other value types, we use + For numpy arrays, we use ``.nbytes``. For all other value types, we use sys.getsizeof(...). Returns: