Add files

jojje · Jun 5, 2024 · 36d390a · 36d390a
1 parent 22663ae
commit 36d390a
Show file tree

Hide file tree

Showing 12 changed files with 947 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,25 @@
+
+# Temporary and binary files
+__pycache__
+.*.swp
+
+# Project files
+.idea
+.vscode
+
+# Package files
+*.egg
+*.eggs/
+.installed.cfg
+*.egg-info
+
+# Build and docs folder/files
+build/*
+dist/*
+sdist/*
+
+# Per-project virtualenvs
+.venv*/
+
+# Poetry
+poetry.lock 
diff --git a/LICENSE b/LICENSE
diff --git a/Makefile b/Makefile
@@ -0,0 +1,25 @@
+.PHONY: test lint build publish publish-test init-env clean
+
+test:
+	poetry run pytest
+
+lint:
+	poetry run flake8 --max-line-length 120 --ignore=E302,E305,E231,E226
+	poetry run mypy osmem
+
+build:
+	poetry build --format wheel
+
+publish:
+	@poetry config pypi-token.pypi $(PYPI_TOKEN)
+	poetry publish
+
+publish-test:
+	@echo poetry publish -r test-pypi --username=__token__  --password=[api-token]
+
+init-env:
+	python -m pip install poetry
+	poetry install
+
+clean:
+	rm -rf dist
diff --git a/README.md b/README.md
@@ -0,0 +1,92 @@
+# osmem
+
+Shows memory usage information for process trees
+
+## Usage
+
+Find out the top-n types of processes consuming most memory.
+```
+> osmem top -n 3
+
+  PID  Aggregate  Process
+-----  ---------  -------
+ 1516   12582 MB  firefox.exe
+ 3408    9048 MB  Code.exe
+ 4432    3627 MB  cmdagent.exe
+```
+Aggregates all processes with the same name, which makes it a whole lot easier to understand just how 
+much memory all those tabs one forgets to close in firefox/chrome actually consume.
+
+Get a break-down of all the processes on the system, how much memory each process consumes, and how the
+memory usage aggregates up through the process hierarchy.
+```
+> osmem tree
+
+  PID  Aggregate   Memory  Process
+-----  ---------  -------  -------
+    0      12 MB     0 MB  System Idle Process
+    4      12 MB     0 MB    System
+  800                1 MB      smss.exe
+ 3216               10 MB      MemCompression
+  168                0 MB
+  276              119 MB  Registry
+  688      11 MB     4 MB  cmd.exe
+ 1500                6 MB    conhost.exe
+  920    5496 MB     1 MB  wininit.exe
+ 1088    5482 MB     7 MB    services.exe
+    8                5 MB      svchost.exe
+ 1144                5 MB      svchost.exe
+...
+```
+
+Since some processes are spawned several times with the same name, it may be beneficial to see the actual
+command line arguments for each process. For instance, "svchost" on windows or "sshd" / "bash" /
+"docker-proxy" on linux say very little about the _specific_ nature of such a process.
+
+```
+> osmem tree -c
+
+  PID  Aggregate   Memory  Process  Command
+-----  ---------  -------  -------  --------------------
+    0      12 MB     0 MB  System Idle Process
+    4      12 MB     0 MB    System
+  800                1 MB      smss.exe  \SystemRoot\System32\smss.exe
+ 3216               10 MB      MemCompression
+  168                0 MB
+  276              119 MB  Registry
+  688      11 MB     4 MB  cmd.exe  C:\Windows\System32\cmd.exe
+ 1500                6 MB    conhost.exe  \??\C:\Windows\system32\conhost.exe 0x4
+  920    5498 MB     1 MB  wininit.exe  wininit.exe
+ 1088    5484 MB     8 MB    services.exe  C:\Windows\system32\services.exe
+    8                5 MB      svchost.exe  C:\Windows\system32\svchost.exe -k LocalSystemNetworkRestricted -p
+ 1144                4 MB      svchost.exe  C:\Windows\system32\svchost.exe -k LocalService -p -s nsi
+...
+```
+
+```
+$ osmem tree -c
+
+  PID  Aggregate  Memory  Process  Command
+-----  ---------  ------  -------  --------------------
+    1     941 MB   11 MB  systemd  /sbin/init
+  369              46 MB    systemd-journald  /lib/systemd/systemd-journald
+  408               7 MB    systemd-udevd  /lib/systemd/systemd-udevd
+  501               4 MB    rpcbind  /sbin/rpcbind -f -w
+...
+```
+
+For *nix users, this is somewhat similar to `ps -wwef` but gets you the memory usage as well, and the memory aggregation.
+
+## Development
+
+To execute the main function of the programs, either of the following options are viable
+
+* `osmem` as a CLI
+* `python -m osmem` as a python package
+
+To simplify development, common actions are provided via [Makefile](Makefile) targets:
+
+* test - default targets, runs pytest on the project
+* lint - performs flake8 and mypy linting
+* build - create a wheel package distribution, ready to be shared with someone else.
+* clean - removes temporary files generated as part of the package creation.
diff --git a/osmem/__init__.py b/osmem/__init__.py
@@ -0,0 +1,12 @@
+from importlib.metadata import version
+
+try:
+    __version__ = version('osmem')
+except Exception:
+    __version__ = 'unknown'
+
+del version
+
+__all__ = [
+    '__version__',
+]
diff --git a/osmem/__main__.py b/osmem/__main__.py
@@ -0,0 +1,12 @@
+import os
+import sys
+try:
+    from .cli import main
+except ImportError:
+    from cli import main  # type: ignore [import, no-redef]
+
+# give the program a descriptive name if run from python -m
+if sys.argv[0].endswith('__main__.py'):
+    sys.argv[0] = os.path.dirname(__file__)
+
+main()
diff --git a/osmem/cli.py b/osmem/cli.py
@@ -0,0 +1,64 @@
+# cli.py - main user interface
+# Copyright (C) 2024 Jonas Tingeborn
+
+import argparse
+import sys
+from functools import partial
+
+from .memory import (summarize_process_memory, show_process_tree, output_formatter,
+                     row_printer, size_formatter, Unit)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Shows memory usage information for process trees'
+    )
+
+    def common(p):
+        p.add_argument('-b', action='store_true', help='memory size in bytes')
+        p.add_argument('-k', action='store_true', help='memory size in kilobytes')
+        p.add_argument('-g', action='store_true', help='memory size in gigabytes')
+
+    sp = parser.add_subparsers(dest='cmd', metavar='')
+    tree = sp.add_parser('tree', help="display memory use for the system's process tree")
+    tree.add_argument('-c', '--commands', action='store_true', help='show command line arguments for each process')
+    common(tree)
+
+    top = sp.add_parser('top', help='show process names consuming most memory (aggregates by process name)')
+    top.add_argument('-n', type=int, metavar='N', default=10,
+                     help='top N memory hogs (default: 10)')
+    common(top)
+
+    if len(sys.argv) < 2:
+        sys.argv.append('-h')
+    return parser.parse_args()
+
+
+def main():
+    opts = parse_args()
+
+    unit = Unit.GB if opts.g else Unit.KB if opts.k else Unit.B if opts.b else Unit.MB
+    format_size = partial(size_formatter, unit=unit)
+
+    if opts.cmd == 'top':
+        format = partial(output_formatter, show_proc=False, show_commands=False)
+        summarize_process_memory(
+            format_size=format_size,
+            format=format,
+            print_row=partial(row_printer, format_size=format_size, show_commands=False),
+            sortby_size=bool(opts.n),
+            max_rows=opts.n,
+            reverse=True
+        )
+    else:
+        format = partial(output_formatter, show_proc=True, show_commands=opts.commands)
+        show_process_tree(
+            format_size=format_size,
+            format=format,
+            print_row=partial(row_printer, format_size=format_size, show_commands=opts.commands),
+            show_commands=opts.commands
+        )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/osmem/memory.py b/osmem/memory.py
@@ -0,0 +1,155 @@
+# memory.py - domain functions for the program
+# Copyright (C) 2024 Jonas Tingeborn
+
+from typing import Callable, Tuple, Dict, Optional
+from enum import Enum
+import sys
+
+from .process import get_processes
+
+LenMap = Dict[str, int]
+PrintRow = Callable[[str, int, int, int, str, str, bool, int], None]
+FormatSize = Callable[[int], str]
+FormatOutput = Callable[[LenMap], Tuple[str, str]]
+
+class Unit(Enum):
+    B = 1
+    KB = 2
+    MB = 3
+    GB = 4
+
+
+def summarize_process_memory(format_size:FormatSize, format:FormatOutput, print_row:PrintRow,
+                             sortby_size:bool = False, max_rows:Optional[int] = None, reverse:bool = False):
+    processes = get_processes()
+    names = sorted(list(set(p['name'] for p in processes.values())))
+    retained = {}  # type: ignore [var-annotated]
+
+    # Collapse memory use of all processes having the same name
+    for p in processes.values():
+        name = p['name']
+        if name in retained:
+            retained[name]['bytes'] += p['bytes']
+        else:
+            retained[name] = p
+
+    # Optional sorting by size, ascending
+    if sortby_size:
+        names = sorted(retained, key=lambda name: retained[name]['bytes'])
+
+    # Optional limit on number of rows to show
+    limit = min(len(names), max_rows) if max_rows is not None else len(names)
+
+    # Optional reversing of result
+    if reverse:
+        names = list(reversed(names))
+
+    # Derive column formatting
+    lens = get_format_lengths(format_size, processes)
+    fmt, header = format(lens)
+
+    print(header, file=sys.stderr)
+    for name in names[:limit]:
+        p = retained[name]
+        print_row(fmt, p['pid'], p['bytes'], -1, p['name'], '', True, 0)
+
+
+def get_format_lengths(format_size:FormatSize, processes: Dict[int, Dict]) -> Dict[str, int]:
+    max_pid, max_procmem, totalmem = 0, 0, 0
+    for p in processes.values():
+        max_pid = max(max_pid, p['pid'])
+        max_procmem = max(max_procmem, p['bytes'])
+        totalmem += p['bytes']
+        # max_procname = max(max_procname, len(p['name']))
+
+    return {
+        'pid': len(str(max_pid)),
+        'procname': 0,  # looks better to not return the widest process name. Let it be the column header width instead
+        'procmem': len(format_size(max_procmem)),
+        'totalmem': len(format_size(totalmem)),
+    }
+
+
+def show_process_tree(format_size:FormatSize, format:FormatOutput, print_row:PrintRow, show_commands:bool):
+    processes = get_processes(with_cmd=show_commands)
+
+    # Identify top/root processes to traverse from
+    roots = sorted(p['pid'] for p in processes.values() if p['ppid'] is None or p['ppid'] == p['pid'])
+
+    # Helper to find max depth and widest process name in the process tree(s)
+    def dfs(pid, level=0, seen=None):
+        if seen is None:
+            seen = set()
+        if pid not in seen:
+            seen.add(pid)
+            p = processes[pid]
+            yield pid, level, len(p['name'])
+            for cpid in p['children']:
+                yield from dfs(cpid, level+1, seen)
+
+    # Derive column formatting
+    lens = get_format_lengths(format_size, processes)
+    fmt, header = format(lens)
+
+    # Print the header, then each tree in a DFS manner, starting from each process root in PID order
+    print(header, file=sys.stderr)
+    for root_pid in roots:
+        for pid, level, _ in dfs(root_pid):
+            p = processes[pid]
+            print_row(fmt, pid, p['bytes'], cummulative_bytes(processes, pid),
+                      p['name'], p['cmd'], False, level)
+
+
+def cummulative_bytes(processes, pid):
+    process = processes[pid]
+    total = process['bytes']
+    for cpid in process['children']:
+        total += cummulative_bytes(processes, cpid)
+    return total
+
+
+def row_printer(fmt:str, pid:int, bytes:int, totbytes:int, name:str, cmd:str, only_total:bool, indent:int,
+                format_size:FormatSize, show_commands:bool):
+    tot = '' if totbytes < 0 or totbytes == bytes else format_size(totbytes)
+    pad = ' '
+    if only_total:
+        print(fmt % (str(pid), format_size(bytes), (pad*(indent*2)) + name))
+    else:
+        vals = [str(pid), tot, format_size(bytes), (pad*(indent*2)) + name]
+        if show_commands:
+            vals.append(cmd)
+        print(fmt % tuple(vals))
+
+
+def output_formatter(len_map:Dict[str, int], show_proc:bool, show_commands:bool) -> Tuple[str, str]:
+    headers = ['PID', 'Aggregate', 'Memory', 'Process']
+    mpid, mtot, mmem, mname = [max(len_map[k], len(header)) for k, header
+                               in zip(('pid', 'totalmem', 'procmem', 'procname'), headers)]
+
+    cols = [
+        ('PID', f'%{mpid}s', '-'*mpid),
+        ('Aggregate', f'%{mtot}s', '-'*mtot),
+        ('Memory', f'%{mmem}s', '-'*mmem),
+        ('Process', f'%-{mname}s', '-'*mname),
+        ('Command', '%s', '-'*20),
+    ]
+    if not show_proc:
+        cols = [x for x in cols if x[0] != 'Memory']
+    if not show_commands:
+        cols = [x for x in cols if x[0] != 'Command']
+
+    names = [x[0] for x in cols]
+    fmt = '  '.join(x[1] for x in cols)
+    dashes = [x[2] for x in cols]
+    header = '\n'.join(fmt % tuple(x) for x in (names, dashes))
+    return fmt, header
+
+def size_formatter(n:int, unit:Unit) -> str:
+    if unit == Unit.MB:
+        return '%.0d MB' % (n/(1 << 20))
+    elif unit == Unit.GB:
+        return '%.0d GB' % (n/(1 << 30))
+    elif unit == Unit.KB:
+        return '%.0d KB' % (n/(1 << 10))
+    else:  # bytes
+        return str(n)