Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add built-in cprofile tooling #278

Merged
merged 5 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 65 additions & 6 deletions dedalus/core/solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,23 @@
import h5py
import pathlib
import scipy.linalg
import cProfile
import pstats
from math import prod
from collections import defaultdict
import pickle

from . import subsystems
from . import timesteppers
from .evaluator import Evaluator
from ..libraries.matsolvers import matsolvers
from ..tools.config import config
from ..tools.array import scipy_sparse_eigs
from ..tools.parallel import ProfileWrapper, parallel_mkdir

PROFILE_DEFAULT = config['profiling'].getboolean('PROFILE_DEFAULT')
PARALLEL_PROFILE_DEFAULT = config['profiling'].getboolean('PARALLEL_PROFILE_DEFAULT')
PROFILE_DIRECTORY = pathlib.Path(config['profiling'].get('PROFILE_DIRECTORY'))

import logging
logger = logging.getLogger(__name__.split('.')[-1])
Expand Down Expand Up @@ -485,6 +494,10 @@ class InitialValueSolver(SolverBase):
Iteration cadence for enforcing Hermitian symmetry on real variables (default: 100).
warmup_iterations : int, optional
Number of warmup iterations to disregard when computing runtime statistics (default: 10).
profile : bool, optional
Save accumulated profiles with cProfile (default: False).
parallel_profile : bool, optional
Save per-process and accumulated profiles with cProfile (default: False).
**kw :
Other options passed to ProblemBase.

Expand All @@ -510,15 +523,22 @@ class InitialValueSolver(SolverBase):
matsolver_default = 'MATRIX_FACTORIZER'
matrices = ['M', 'L']

def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, **kw):
def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, profile=PROFILE_DEFAULT, parallel_profile=PARALLEL_PROFILE_DEFAULT, **kw):
logger.debug('Beginning IVP instantiation')
super().__init__(problem, **kw)
if np.isrealobj(self.dtype.type()):
self.enforce_real_cadence = enforce_real_cadence
else:
self.enforce_real_cadence = None
# Setup timing and profiling
self.dist = problem.dist
self._bcast_array = np.zeros(1, dtype=float)
self.init_time = self.world_time
if profile or parallel_profile:
parallel_mkdir(PROFILE_DIRECTORY, comm=self.dist.comm)
self.profile = True
self.parallel_profile = parallel_profile
self.setup_profiler = cProfile.Profile()
self.warmup_profiler = cProfile.Profile()
self.run_profiler = cProfile.Profile()
self.setup_profiler.enable()
# Build subsystems and subproblems
super().__init__(problem, **kw)
# Build LHS matrices
self.build_matrices(self.subproblems, ['M', 'L'])
# Compute total modes
Expand All @@ -538,6 +558,10 @@ def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterat
self.sim_time = self.initial_sim_time = problem.time.allreduce_data_max(layout='g')
self.iteration = self.initial_iteration = 0
self.warmup_iterations = warmup_iterations
if np.isrealobj(self.dtype.type()):
self.enforce_real_cadence = enforce_real_cadence
else:
self.enforce_real_cadence = None
# Default integration parameters
self.stop_sim_time = np.inf
self.stop_wall_time = np.inf
Expand Down Expand Up @@ -648,8 +672,14 @@ def step(self, dt):
wall_time = self.wall_time
if self.iteration == self.initial_iteration:
self.start_time = wall_time
if self.profile:
self.dump_profiles(self.setup_profiler, "setup")
self.warmup_profiler.enable()
if self.iteration == self.initial_iteration + self.warmup_iterations:
self.warmup_time = wall_time
if self.profile:
self.dump_profiles(self.warmup_profiler, "warmup")
self.run_profiler.enable()
# Advance using timestepper
self.timestepper.step(dt, wall_time)
# Update iteration
Expand Down Expand Up @@ -704,6 +734,8 @@ def log_stats(self, format=".4g"):
logger.info(f"Final iteration: {self.iteration}")
logger.info(f"Final sim time: {self.sim_time}")
logger.info(f"Setup time (init - iter 0): {self.start_time:{format}} sec")
if self.profile:
self.dump_profiles(self.run_profiler, "runtime")
if self.iteration >= self.initial_iteration + self.warmup_iterations:
warmup_time = self.warmup_time - self.start_time
run_time = log_time - self.warmup_time
Expand All @@ -716,3 +748,30 @@ def log_stats(self, format=".4g"):
logger.info(f"Speed: {(modes*stages/cpus/run_time):{format}} mode-stages/cpu-sec")
else:
logger.info(f"Timings unavailable because warmup did not complete.")

def dump_profiles(self, profiler, name):
"Save profiling data to disk."
comm = self.dist.comm
# Disable and create stats on each process
profiler.create_stats()
p = pstats.Stats(profiler)
p.strip_dirs()
# Gather using wrapper class to avoid pickling issues
profiles = comm.gather(ProfileWrapper(p.stats), root=0)
# Sum stats on root process
if comm.rank == 0:
if self.parallel_profile:
stats = {'primcalls': defaultdict(list),
'totcalls': defaultdict(list),
'tottime': defaultdict(list),
'cumtime': defaultdict(list)}
for profile in profiles:
for func, (primcalls, totcalls, tottime, cumtime, callers) in profile.stats.items():
stats['primcalls'][func].append(primcalls)
stats['totcalls'][func].append(totcalls)
stats['tottime'][func].append(tottime)
stats['cumtime'][func].append(cumtime)
pickle.dump(stats, open(PROFILE_DIRECTORY / f"{name}_parallel.pickle", 'wb'))
# Creation of joint_stats destroys profiles, so do this second
joint_stats = pstats.Stats(*profiles)
joint_stats.dump_stats(PROFILE_DIRECTORY / f"{name}.prof")
12 changes: 12 additions & 0 deletions dedalus/dedalus.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,15 @@
# This works around NFS caching issues
FILEHANDLER_TOUCH_TMPFILE = False

[profiling]

# Default profile setting for solvers
# This saves accumulated profiling data using cProfile
PROFILE_DEFAULT = False

# Default parallel profile setting for solvers
# This saves per-process and accumulated profiling data using cProfile
PARALLEL_PROFILE_DEFAULT = False

# Profile directory base (will be expanded to <PROFILE_DIRECTORY>/runtime.prof, etc)
PROFILE_DIRECTORY = profiles
21 changes: 21 additions & 0 deletions dedalus/tools/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""

import pathlib
from mpi4py import MPI


Expand Down Expand Up @@ -56,3 +57,23 @@ def __enter__(self):
def __exit__(self, type, value, traceback):
for i in range(self.size-self.rank):
self.comm.Barrier()


class ProfileWrapper:
"""Pickleable wrapper for cProfile.Profile for use with pstats.Stats"""

def __init__(self, stats):
self.stats = stats

def create_stats(self):
pass


def parallel_mkdir(path, comm=MPI.COMM_WORLD):
"""Create a directory from root process."""
path = pathlib.Path(path)
with Sync(comm=comm, enter=False, exit=True) as sync:
if sync.comm.rank == 0:
if not path.exists():
path.mkdir()