connelldave · connelldave · Feb 6, 2022 · Jan 20, 2022 · Jan 20, 2022 · Jan 21, 2022
@@ -0,0 +1,70 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ master ]
+  schedule:
+    - cron: '29 8 * * 4'
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Learn more about CodeQL language support at https://git.io/codeql-language-support
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+        # queries: ./path/to/local/query, your-org/your-repo/queries@main
+
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 https://git.io/JvXDl
+
+    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
+    #    and modify them (or add more) to build your code if your project
+    #    uses a compiled language
+
+    #- run: |
+    #   make bootstrap
+    #   make release
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
@@ -22,13 +22,4 @@ jobs:
         run: poetry install -n -v
 
       - name: Lint with flakeheaven
-        run: poetry run flakeheaven lint
-
-      - name: Lint with black
-        run: poetry run black . --check
-
-      - name: Lint with isort
-        run: poetry run isort . --check
-
-      - name: Lint with mypy
-        run: poetry run mypy .
+        run: poetry run pre-commit run --all-files
@@ -1,4 +1,4 @@
-  
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

@@ -0,0 +1,53 @@
+exclude: "^docs/gitbook/"
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.3.0 # Use the ref you want to point at
+    hooks:
+      - id: trailing-whitespace
+      - id: check-ast
+      - id: check-case-conflict
+      - id: debug-statements
+      - id: check-yaml
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.10.1
+    hooks:
+      - id: isort
+        name: isort (python)
+
+  - repo: https://github.com/ambv/black
+    rev: 21.12b0
+    hooks:
+      - id: black
+        language_version: python3.8
+
+  - repo: https://github.com/pre-commit/pygrep-hooks
+    rev: v1.7.0
+    hooks:
+      - id: python-use-type-annotations
+      - id: python-no-eval
+      - id: python-no-log-warn
+
+  - repo: local
+    hooks:
+      - id: pytest
+        name: pytest
+        entry: poetry run pytest tests
+        language: system
+        pass_filenames: false
+        # alternatively you could `types: [python]` so it only runs when python files change
+        # though tests might be invalidated if you were to say change a data file
+        always_run: true
+
+      - id: flakeheaven
+        name: flakeheaven
+        entry: poetry run flakeheaven lint
+        language: system
+        pass_filenames: false
+
+      - id: mypy
+        name: mypy
+        entry: poetry run mypy .
+        language: system
+        pass_filenames: false
+
@@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.5.0] - 2022-06-02
+### Added
+- thread_workers argument
+
+### Fixed
+- Memory leak when running in large organisations: botocove now allows
+ completed Session objects to be garbage collected
+
 ## [1.4.1] - 2022-15-01
 ### Added
 - Support for Policy and PolicyArn restriction on assumed roles

@@ -9,7 +9,7 @@ account.
 - Easy
 - Dolphin Themed 🐬
 
-A simple decorator for functions to remove time and complexity burden. Uses 
+A simple decorator for functions to remove time and complexity burden. Uses
 `ThreadPoolExecutor` to run boto3 sessions against one to all
 of your AWS accounts at (nearly!) the same speed as running against one.
 
@@ -79,13 +79,13 @@ def get_iam_users(session):
 
 def main():
     # No session passed as the decorator injects it
-    all_results = get_iam_users() 
+    all_results = get_iam_users()
     # Now returns a Dict with keys Results, Exceptions and FailedAssumeRole
-    
+
     # A list of dictionaries for each account, with account details included.
     # Each account's get_iam_users return is in a "Result" key.
-    print(all_results["Results"]) 
-    
+    print(all_results["Results"])
+
     # A list of dictionaries for each account that raised an exception
     print(all_results["Exceptions"])
 
@@ -96,7 +96,7 @@ def main():
 ## Arguments
 
 ### Cove
-`@cove()`: 
+`@cove()`:
 
 Uses boto3 credential chain to get every AWS account within the
 organization, assume the `OrganizationAccountAccessRole` in it and run the
@@ -119,12 +119,12 @@ be ignored.
 
 `rolename`: Optional[str]
 
-An IAM role name that will be attempted to assume in all target accounts. 
+An IAM role name that will be attempted to assume in all target accounts.
 Defaults to the AWS Organization default, `OrganizationAccountAccessRole`.
 
 `role_session_name`: Optional[str]
 
-An IAM role session name that will be passed to each Cove session's `sts.assume_role()` call. 
+An IAM role session name that will be passed to each Cove session's `sts.assume_role()` call.
 Defaults to the name of the role being used if unset.
 
 `policy`: Optional[str]
@@ -154,12 +154,20 @@ It is vital to run interruptible, idempotent code with this argument as `True`.
 
 Defaults to True. When True, will leverage the Boto3 Organizations API to list
 all accounts in the organization, and enrich each `CoveSession` with information
-available (`Id`, `Arn`, `Name`). 
+available (`Id`, `Arn`, `Name`, `Status`, `Email`). Disabling this and providing your
+own full list of accounts may be a desirable optimisation if speed is an issue.
 
 `org_master=False` means `target_ids` must be provided (as no list of accounts
 can be created for you), as well as likely `rolename`. Only `Id` will be
 available to `CoveSession`.
 
+`thread_workers`: int
+
+Defaults to 20. Cove utilises a ThreadPoolWorker under the hood, which can be tuned
+with this argument. Number of thread workers directly corrolates to memory usage: see
+[here](#is-botocove-thread-safe)
+
+
 ### CoveSession
 
 Cove supplies an enriched Boto3 session to each function called. Account details
@@ -180,7 +188,7 @@ def do_nothing(session: CoveSession):
 Wrapped functions return a dictionary. Each value contains List[Dict[str, Any]]:
 ```
 {
-    "Results": results: 
+    "Results": results:
     "Exceptions": exceptions,
     "FailedAssumeRole": invalid_sessions,
 }
@@ -195,13 +203,35 @@ An example of cove_output["Results"]:
     'Status': 'ACTIVE',
     'AssumeRoleSuccess': True,
     'Result': wrapped_function_return_value # Result of wrapped func
-    } 
-] 
+    }
+]
 ```
 
+### Is botocove thread safe?
+
+botocove is thread safe, but number of threaded executions will be bound by memory,
+network IO and AWS api rate limiting. Defaulting to 20 thread workers is a reasonable
+starting point, but can be further optimised for runtime with experimentation.
+
+botocove has no constraint or understanding of the function it's wrapping: it is
+recommended to avoid shared state for botocove wrapped functions, and to write simple
+functions that are written to be idempotent and independent.
+
+[Boto3 Session objects are not natively thread safe and should not be shared across threads](https://boto3.amazonaws.com/v1/documentation/api/1.14.31/guide/session.html#multithreading-or-multiprocessing-with-sessions).
+However, botocove is instantiating a new Session object per thread/account and running
+decorated functions inside their own closure. A shared client is used from the host account
+that botocove is run from (eg, an organization master account) -
+[clients are threadsafe](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/clients.html#multithreading-or-multiprocessing-with-clients) and allow this.
+
+boto3 sessions have a significant memory footprint:
+Version 1.5.0 of botocove was re-written to ensure that boto3 sessions are released
+after completion which resolved memory starvation issues. This was discussed here:
+https://github.com/connelldave/botocove/issues/20 and a relevant boto3 issue is here:
+https://github.com/boto/boto3/issues/1670
+
 ### botocove?
 
 It turns out that the Amazon's Boto dolphins are solitary or small-group animals,
-unlike the large pods of dolphins in the oceans. This killed my "large group of 
+unlike the large pods of dolphins in the oceans. This killed my "large group of
 boto" idea, so the next best idea was where might they all shelter together... a
 cove!
@@ -5,16 +5,16 @@
 
 from boto3.session import Session
 
+from botocove.cove_host_account import CoveHostAccount
 from botocove.cove_runner import CoveRunner
-from botocove.cove_sessions import CoveSessions
 from botocove.cove_types import CoveOutput, CoveSessionInformation, R
 
 logger = logging.getLogger(__name__)
 
 
 def dataclass_converter(d: CoveSessionInformation) -> Dict[str, Any]:
     """Unpack dataclass into dict and remove None values"""
-    return {k: v for k, v in asdict(d).items() if v}
+    return {k: v for k, v in asdict(d).items() if v is not None}
 
 
 def cove(
@@ -29,11 +29,13 @@ def cove(
     assuming_session: Optional[Session] = None,
     raise_exception: bool = False,
     org_master: bool = True,
+    thread_workers: int = 20
 ) -> Callable:
     def decorator(func: Callable[..., R]) -> Callable[..., CoveOutput]:
         @functools.wraps(func)
         def wrapper(*args: Any, **kwargs: Any) -> CoveOutput:
-            valid_sessions, invalid_sessions = CoveSessions(
+
+            host_account = CoveHostAccount(
                 target_ids=target_ids,
                 ignore_ids=ignore_ids,
                 rolename=rolename,
@@ -42,23 +44,32 @@ def wrapper(*args: Any, **kwargs: Any) -> CoveOutput:
                 policy_arns=policy_arns,
                 org_master=org_master,
                 assuming_session=assuming_session,
-            ).get_cove_sessions()
+            )
 
             runner = CoveRunner(
-                valid_sessions=valid_sessions,
+                host_account=host_account,
                 func=func,
                 raise_exception=raise_exception,
                 func_args=args,
                 func_kwargs=kwargs,
+                thread_workers=thread_workers,
             )
 
             output = runner.run_cove_function()
 
             # Rewrite dataclasses into untyped dicts to retain current functionality
             return CoveOutput(
-                FailedAssumeRole=[dataclass_converter(f) for f in invalid_sessions],
                 Results=[dataclass_converter(r) for r in output["Results"]],
-                Exceptions=[dataclass_converter(e) for e in output["Exceptions"]],
+                Exceptions=[
+                    dataclass_converter(e)
+                    for e in output["Exceptions"]
+                    if e.AssumeRoleSuccess
+                ],
+                FailedAssumeRole=[
+                    dataclass_converter(f)
+                    for f in output["Exceptions"]
+                    if not f.AssumeRoleSuccess
+                ],
             )
 
         return wrapper