Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter PIDs to profile #761

Merged
merged 21 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f37e6f4
Add --pids flag that enables profiling only provided pids
mpozniak95 Mar 30, 2023
bb1673a
Enable filtering pids in pyperf, perf and phpspy
mpozniak95 Apr 4, 2023
d798298
Fix file formatting
mpozniak95 Apr 4, 2023
ecc81ca
Remove debug lines, for phpspy and pyperf filter output instead of pr…
mpozniak95 Apr 5, 2023
c9c3562
Check if any of provided PID is alive
mpozniak95 Apr 5, 2023
ae2ef5f
Add comments with links to issues for pyperf and phpspy
mpozniak95 Apr 6, 2023
74a64cc
Change extra_args variable to pid_args variable in perf profiler, rem…
mpozniak95 Apr 7, 2023
5b2f3d2
Use kwargs in ProfilerState class
mpozniak95 Apr 7, 2023
8473ad9
Add two more asserts in test_profiling_provided_pids
mpozniak95 Apr 7, 2023
7e63f40
Move checking for alive PID's to verify_preconditions, make GProfiler…
mpozniak95 Apr 7, 2023
08a19ae
Filter out only alive pids from pids_to_profile list
mpozniak95 Apr 7, 2023
8178673
Fix tests
mpozniak95 Apr 7, 2023
3b2bec9
Fix tests
mpozniak95 Apr 7, 2023
b42e05e
Fix checking if pids flag provided with profile-spawned
mpozniak95 Apr 11, 2023
406c378
Store list of processes to profile instead of pids, fix kwargs-only f…
mpozniak95 Apr 11, 2023
09fea34
Log PID's given by --pids
mpozniak95 Apr 12, 2023
c57bef1
Move logging pids given after initializing logger
mpozniak95 Apr 12, 2023
89dd8d8
Don't use flag -a with flag -p when using perf
mpozniak95 Apr 12, 2023
7b921d2
Reformat perf profiler file
mpozniak95 Apr 12, 2023
5b0a765
Move logging given pids after starting gprofiler log
mpozniak95 Apr 12, 2023
177459c
Update gprofiler/main.py
Jongy Apr 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion gprofiler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pathlib import Path
from threading import Event
from types import FrameType, TracebackType
from typing import Iterable, Optional, Type, cast
from typing import Iterable, List, Optional, Type, cast

import configargparse
import humanfriendly
Expand Down Expand Up @@ -117,6 +117,7 @@ def __init__(
duration: int,
profile_api_version: str,
profiling_mode: str,
pids_to_profile: List[int],
profile_spawned_processes: bool = True,
remote_logs_handler: Optional[RemoteLogsHandler] = None,
controller_process: Optional[Process] = None,
Expand Down Expand Up @@ -153,6 +154,7 @@ def __init__(
profile_spawned_processes,
bool(user_args.get("insert_dso_name")),
profiling_mode,
pids_to_profile,
container_names_client,
)
self.system_profiler, self.process_profilers = get_profilers(user_args, profiler_state=self._profiler_state)
Expand Down Expand Up @@ -521,6 +523,14 @@ def parse_cmd_args() -> configargparse.Namespace:
parser.add_argument(
"--rotating-output", action="store_true", default=False, help="Keep only the last profile result"
)
parser.add_argument(
"--pids",
dest="pids_to_profile",
action="append",
default=list(),
type=str,
help="Coma separated list of processes that will be filtered to profile",
)

_add_profilers_arguments(parser)

Expand Down Expand Up @@ -791,6 +801,26 @@ def parse_cmd_args() -> configargparse.Namespace:
if args.nodejs_mode in ("perf", "attach-maps") and args.perf_mode not in ("fp", "smart"):
parser.error("--nodejs-mode perf or attach-maps requires --perf-mode 'fp' or 'smart'")

if args.profile_spawned_processes and len(args.pids_to_profile) > 0:
parser.error("--pids is not allowed when profiling spawned processes")

pids_to_profile = []
try:
if len(args.pids_to_profile) > 0:
for input in args.pids_to_profile:
pids = [int(pid) for pid in input.split(",")]
for pid in pids:
try:
Process(pid)
pids_to_profile.append(pid)
except NoSuchProcess:
continue
if len(pids_to_profile) == 0:
parser.error("There aren't any alive processes from provided PID list")
except ValueError:
parser.error("--pids should be integer or coma separated list of integers f.e. --pids 13,452,2388")
setattr(args, "pids_to_profile", pids_to_profile)

return args


Expand Down Expand Up @@ -1048,6 +1078,7 @@ def main() -> None:
args.duration,
args.profile_api_version,
args.profiling_mode,
args.pids_to_profile,
args.profile_spawned_processes,
remote_logs_handler,
controller_process,
Expand Down
11 changes: 10 additions & 1 deletion gprofiler/profiler_state.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from threading import Event
from typing import Optional
from typing import List, Optional

from gprofiler.containers_client import ContainerNamesClient
from gprofiler.utils import TemporaryDirectoryWithMode
Expand All @@ -15,6 +15,7 @@ def __init__(
profile_spawned_processes: bool,
insert_dso_name: bool,
profiling_mode: str,
pids_to_profile: List[int],
container_names_client: Optional[ContainerNamesClient],
) -> None:
self._stop_event = stop_event
Expand All @@ -24,6 +25,7 @@ def __init__(
self._temporary_dir = TemporaryDirectoryWithMode(dir=storage_dir, mode=0o755)
self._storage_dir = self._temporary_dir.name
self._container_names_client = container_names_client
self._pids_to_profile = pids_to_profile

@property
def stop_event(self) -> Event:
Expand All @@ -49,6 +51,13 @@ def profiling_mode(self) -> str:
def container_names_client(self) -> Optional[ContainerNamesClient]:
return self._container_names_client

@property
def pids_to_profile(self) -> Optional[List[int]]:
if len(self._pids_to_profile) > 0:
return self._pids_to_profile
else:
return None

def get_container_name(self, pid: int) -> str:
if self._container_names_client is not None:
return self._container_names_client.get_container_name(pid)
Expand Down
14 changes: 11 additions & 3 deletions gprofiler/profilers/perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@ def merge_global_perfs(

total_fp_samples = sum([sum(stacks.values()) for stacks in fp_perf.values()])
total_dwarf_samples = sum([sum(stacks.values()) for stacks in dwarf_perf.values()])
fp_to_dwarf_sample_ratio = total_fp_samples / total_dwarf_samples
if total_dwarf_samples == 0:
fp_to_dwarf_sample_ratio = 0.0 # ratio can be 0 because if total_dwarf_samples is 0 then it will be never used
else:
fp_to_dwarf_sample_ratio = total_fp_samples / total_dwarf_samples

# The FP perf is used here as the "main" perf, to which the DWARF perf is scaled.
merged_pid_to_stacks_counters: ProcessToStackSampleCounters = defaultdict(Counter)
Expand Down Expand Up @@ -389,6 +392,10 @@ def __init__(
self._node_processes: List[Process] = []
self._node_processes_attached: List[Process] = []
self._perf_memory_restart = perf_memory_restart
extra_args: List[str] = []
if self._profiler_state.pids_to_profile is not None:
extra_args.append("--pid")
extra_args.append(",".join([str(pid) for pid in self._profiler_state.pids_to_profile]))

if perf_mode in ("fp", "smart"):
self._perf_fp: Optional[PerfProcess] = PerfProcess(
Expand All @@ -397,20 +404,21 @@ def __init__(
os.path.join(self._profiler_state.storage_dir, "perf.fp"),
False,
perf_inject,
[],
extra_args,
)
self._perfs.append(self._perf_fp)
else:
self._perf_fp = None

if perf_mode in ("dwarf", "smart"):
extra_args.extend(["--call-graph", f"dwarf,{perf_dwarf_stack_size}"])
self._perf_dwarf: Optional[PerfProcess] = PerfProcess(
self._frequency,
self._profiler_state.stop_event,
os.path.join(self._profiler_state.storage_dir, "perf.dwarf"),
True,
False, # no inject in dwarf mode, yet
["--call-graph", f"dwarf,{perf_dwarf_stack_size}"],
extra_args,
)
self._perfs.append(self._perf_dwarf)
else:
Expand Down
5 changes: 5 additions & 0 deletions gprofiler/profilers/php.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ def extract_metadata_section(re_expr: Pattern, metadata_line: str) -> str:

profiles: ProcessToProfileData = {}
for pid in results:
# Because of https://github.com/Granulate/gprofiler/issues/763,
# for now we only filter output of phpspy to return only profiles from chosen pids
if profiler_state.pids_to_profile is not None:
if pid not in profiler_state.pids_to_profile:
continue
# TODO: appid & app metadata for php!
appid = None
app_metadata = None
Expand Down
5 changes: 5 additions & 0 deletions gprofiler/profilers/profiler_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ def _profiling_error_stack(
def snapshot(self) -> ProcessToProfileData:
processes_to_profile = self._select_processes_to_profile()
logger.debug(f"{self.__class__.__name__}: selected {len(processes_to_profile)} processes to profile")
if self._profiler_state.pids_to_profile is not None:
processes_to_profile = [
process for process in processes_to_profile if process.pid in self._profiler_state.pids_to_profile
]
logger.debug(f"Processes left after filtering: {len(processes_to_profile)}")
self._notify_selected_processes(processes_to_profile)

if not processes_to_profile:
Expand Down
5 changes: 5 additions & 0 deletions gprofiler/profilers/python_ebpf.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,11 @@ def snapshot(self) -> ProcessToProfileData:
parsed = python._add_versions_to_stacks(parsed)
profiles = {}
for pid in parsed:
# Because of https://github.com/Granulate/gprofiler/issues/764,
# for now we only filter output of pyperf to return only profiles from chosen pids
if self._profiler_state.pids_to_profile is not None:
if pid not in self._profiler_state.pids_to_profile:
continue
try:
process = Process(pid)
appid = application_identifiers.get_python_app_id(process)
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ def java_args() -> Tuple[str]:

@fixture()
def profiler_state(tmp_path: Path, insert_dso_name: bool) -> ProfilerState:
return ProfilerState(Event(), str(tmp_path), False, insert_dso_name, CPU_PROFILING_MODE, ContainerNamesClient())
return ProfilerState(
Event(), str(tmp_path), False, insert_dso_name, CPU_PROFILING_MODE, list(), ContainerNamesClient()
)


def make_path_world_accessible(path: Path) -> None:
Expand Down
32 changes: 32 additions & 0 deletions tests/test_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,35 @@ def test_container_name_when_stopped(
collapsed_text = wait_for_gprofiler_container(container, output_collapsed)
assert "py-spy> Stopped sampling because process exited" in container.logs().decode()
assert f";{application_container_name};python" in collapsed_text


@pytest.mark.parametrize("in_container", [False])
@pytest.mark.parametrize("runtime", ["java"])
@pytest.mark.parametrize("profiler_type", ["ap"])
def test_profiling_provided_pids(
docker_client: DockerClient,
gprofiler_docker_image: Image,
output_directory: Path,
output_collapsed: Path,
runtime_specific_args: List[str],
profiler_flags: List[str],
application_factory: Callable[[], _GeneratorContextManager],
profiler_type: str,
) -> None:
"""
Tests that gprofiler will profile only processes provided via flag --pids
"""
with application_factory() as pid1:
with application_factory() as pid2:
profiler_flags.extend(["--pids", str(pid1)])
container = start_gprofiler_in_container_for_one_session(
docker_client,
gprofiler_docker_image,
output_directory,
output_collapsed,
runtime_specific_args,
profiler_flags,
)
wait_for_gprofiler_container(container, output_collapsed)
assert f"Profiling process {pid1} with async-profiler" in container.logs().decode()
assert f"Profiling process {pid2} with async-profiler" not in container.logs().decode()