diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..b2723ee --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,16 @@ +# Codecov configuration to make it a bit less noisy +coverage: + status: + patch: false + project: + default: + threshold: 50% +comment: + layout: "header" + require_changes: false + branches: null + behavior: default + flags: null + paths: null +ignore: + - "basicrta/_version.py" diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..683b072 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +basicrta/_version.py export-subst diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..0654834 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,34 @@ +--- +name: Bug report +about: Create a report to help us improve + +--- + +## Expected behavior ## + + + + +## Actual behavior ## + + + + +## Code to reproduce the behavior ## + + + +``` python +import basicrta + +... + +``` + +## Current environment ## + +- Which version are you using? (run `python -c "import basicrta; print(basicrta.__version__)"`) +- Which version of Python (`python -V`)? +- Which operating system? +- What is the output of `pip list`? +- If you use conda, what is the output of `conda list`? \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..0be4ea9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project + +--- + +## Is your feature request related to a problem? ## + + + +## Describe the solution you'd like ## + + + +## Describe alternatives you've considered ## + + + +## Additional context ## + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..90d6c3f --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,17 @@ + + + +Fixes # + +Changes made in this Pull Request: + + - + - + + +PR Checklist +------------ + - [ ] Tests? + - [ ] Docs? + - [ ] CHANGELOG updated? + - [ ] Issue raised/referenced? diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml new file mode 100644 index 0000000..d0d6124 --- /dev/null +++ b/.github/workflows/gh-ci.yaml @@ -0,0 +1,169 @@ +name: GH Actions CI +on: + push: + branches: + - main + pull_request: + branches: + - main + schedule: + # Weekly tests at midnight on Sundays run on main by default: + # Scheduled workflows run on the latest commit on the default or base branch. + # (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule) + - cron: "0 0 * * 0" + +concurrency: + # Specific group naming so CI is only cancelled + # within same PR or on merge to main + group: ${{ github.ref }}-${{ github.head_ref }}-${{ github.workflow }} + cancel-in-progress: true + +defaults: + run: + shell: bash -l {0} + +jobs: + environment-config: + runs-on: ubuntu-latest + outputs: + stable-python-version: ${{ steps.get-compatible-python.outputs.stable-python }} + python-matrix: ${{ steps.get-compatible-python.outputs.python-versions }} + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - id: get-compatible-python + uses: MDAnalysis/mdanalysis-compatible-python@main + with: + release: "latest" + + main-tests: + if: "github.repository == 'rsexton2/basicrta'" + needs: environment-config + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macOS-latest, ubuntu-latest, windows-latest] + python-version: ${{ fromJSON(needs.environment-config.outputs.python-matrix) }} + mdanalysis-version: ["latest", "develop"] + + steps: + - uses: actions/checkout@v4 + + - name: Build information + run: | + uname -a + df -h + ulimit -a + + + # More info on options: https://github.com/conda-incubator/setup-miniconda + - name: Install conda dependencies + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + environment-file: devtools/conda-envs/test_env.yaml + add-pip-as-python-dependency: true + architecture: x64 + + miniforge-variant: Mambaforge + use-mamba: true + channels: conda-forge, defaults + + activate-environment: basicrta-test + auto-update-conda: true + auto-activate-base: false + show-channel-urls: true + + + - name: Install MDAnalysis version + uses: MDAnalysis/install-mdanalysis@main + with: + version: ${{ matrix.mdanalysis-version }} + install-tests: false + installer: mamba + shell: bash -l {0} + + - name: Install package + run: | + python --version + python -m pip install . --no-deps + + - name: Python information + run: | + which python + which pip + pip list + + conda info + conda list + + + - name: Run tests + run: | + pytest -n 2 -v --cov=basicrta --cov-report=xml --color=yes basicrta/tests/ + + - name: codecov + if: github.repository == 'rsexton2/basicrta' && github.event_name != 'schedule' + uses: codecov/codecov-action@v3 + with: + file: coverage.xml + name: codecov-${{ matrix.os }}-py${{ matrix.python-version }} + verbose: True + + + pylint_check: + if: "github.repository == 'rsexton2/basicrta'" + needs: environment-config + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ needs.environment-config.outputs.stable-python-version }} + + - name: Install Pylint + run: | + which pip + which python + pip install pylint mdanalysis + + - name: Run Pylint + env: + PYLINTRC: .pylintrc + run: | + pylint basicrta + + + pypi_check: + if: "github.repository == 'rsexton2/basicrta'" + needs: environment-config + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ needs.environment-config.outputs.stable-python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ needs.environment-config.outputs.stable-python-version }} + + - name: Install dependencies + run: | + pip install pipx twine + + - name: Build package + run: | + python -m pipx run build --sdist + + - name: Check package build + run: | + DISTRIBUTION=$(ls -t1 dist/basicrta-*.tar.gz | head -n 1) + test -n "${DISTRIBUTION}" || { echo "no distribution dist/basicrta-*.tar.gz found"; exit 1; } + echo "twine check $DISTRIBUTION" + twine check $DISTRIBUTION diff --git a/.gitignore b/.gitignore deleted file mode 100644 index b8ca166..0000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*~ -__pycache__ -*.egg-info diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 73f69e0..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml -# Editor-based HTTP Client requests -/httpRequests/ diff --git a/.idea/basic-rta.iml b/.idea/basic-rta.iml deleted file mode 100644 index a027b29..0000000 --- a/.idea/basic-rta.iml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2d..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index f8283c3..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index ebe7184..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/other.xml b/.idea/other.xml deleted file mode 100644 index 640fd80..0000000 --- a/.idea/other.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..062c53f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,21 @@ + +# To install the git pre-commit hook run: +# pre-commit install +# To update the pre-commit hooks run: +# pre-commit install-hooks +exclude: '^(\.tox|ci/templates|\.bumpversion\.cfg)(/|$)' +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: master + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: debug-statements + - repo: https://github.com/timothycrosley/isort + rev: master + hooks: + - id: isort + - repo: https://gitlab.com/pycqa/flake8 + rev: master + hooks: + - id: flake8 diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..a889cba --- /dev/null +++ b/.pylintrc @@ -0,0 +1,612 @@ +# -*- Mode: conf; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore= + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=no + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=1 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + +# Allow optimization of some AST trees. This will activate a peephole AST +# optimizer, which will apply various small optimizations. For instance, it can +# be used to obtain the result of joining multiple strings with the addition +# operator. Joining a lot of strings can lead to a maximum recursion error in +# Pylint and this flag can prevent that. It has one side effect, the resulting +# AST will be different than the one from reality. This option is deprecated +# and it will be removed in Pylint 2.0. +optimize-ast=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +disable=all + +enable=abstract-class-instantiated, + access-member-before-definition, + anomalous-unicode-escape-in-string, + apply-builtin, + assert-on-tuple, + assigning-non-slot, + assignment-from-no-return, + backtick, + bad-except-order, + bad-exception-context, + bad-format-character, + bad-format-string, + bad-format-string-key, + bad-open-mode, + bad-reversed-sequence, + bad-staticmethod-argument, + bad-str-strip-call, + binary-op-exception, + boolean-datetime + buffer-builtin, + catching-non-exception, + cmp-builtin, + cmp-method, + coerce-builtin, + coerce-method, + confusing-with-statement, + continue-in-finally, + delslice-method, + deprecated-method, + deprecated-module, + dict-iter-method, + dict-view-method, + duplicate-argument-name, + duplicate-bases, + duplicate-except, + duplicate-key, + eval-used, + exec-used, + execfile-builtin, + file-builtin, + format-combined-specification, + format-needs-mapping, + getslice-method, + global-at-module-level, + global-statement, + global-variable-not-assigned, + global-variable-undefined, + hex-method, + import-star-module-level, + inconsistent-mro, + inherit-non-class, + init-is-generator, + input-builtin, + intern-builtin, + invalid-all-object, + invalid-encoded-data, + invalid-format-index, + invalid-slots, + invalid-slots-object, + invalid-star-assignment-target, + logging-format-truncated, + logging-not-lazy, + logging-too-few-args, + logging-too-many-args, + logging-unsupported-format, + long-suffix, + lost-exception, + lowercase-l-suffix, + method-hidden, + misplaced-bare-raise, + misplaced-future, + missing-format-argument-key, + missing-format-attribute, + missing-format-string-key, + missing-kwoa, + missing-super-argument, + mixed-format-string, + mixed-indentation, + no-init, + no-method-argument, + non-iterator-returned, + non-parent-init-called, + nonexistent-operator, + nonlocal-and-global, + nonlocal-without-binding, + nonstandard-exception, + not-a-mapping, + not-an-iterable, + not-async-context-manager, + not-context-manager, + not-in-loop, + notimplemented-raised, + oct-method, + old-ne-operator, + old-octal-literal, + old-raise-syntax, + parameter-unpacking, + raising-non-exception, + raising-string, + raw_input-builtin, + redefine-in-handler, + redundant-keyword-arg, + redundant-unittest-assert, + reload-builtin, + repeated-keyword, + return-arg-in-generator, + return-in-init, + return-outside-function, + setslice-method, + signature-differs, + slots-on-old-class, + standarderror-builtin, + star-needs-assignment-target, + super-on-old-class, + too-few-format-args, + too-few-public-methods, + too-many-function-attributes, + too-many-star-expressions, + truncated-format-string, + unexpected-keyword-arg, + unichr-builtin, + unicode-builtin, + unnecessary-pass, + unpacking-in-except, + unreachable, + unsubscriptable-object, + unsupported-binary-operation, + unsupported-membership-test, + unused-format-string-argument, + unused-format-string-key, + useless-else-on-loop, + using-cmp-argument, + using-constant-test, + yield-inside-async-function, + yield-outside-function, + old-division, + relative-import, + no-absolute-import, + print-statement, + xrange-builtin, + zip-builtin-not-iterating, + map-builtin-not-iterating, + +# Things we'd like to try. +# Procedure: +# 1. Enable a bunch. +# 2. See if there's spurious ones; if so disable. +# 3. Record above. +# 4. Remove from this list. + # abstract-method, + # anomalous-backslash-in-string, + # arguments-differ, + # assignment-from-none, + # attribute-defined-outside-init, + # bad-builtin, + # bad-indentation, + # bad-super-call, + # bare-except, + # basestring-builtin, + # broad-except, + # cell-var-from-loop, + # dangerous-default-value, + # deprecated-lambda, + # expression-not-assigned, + # filter-builtin-not-iterating, + # fixme, + # function-redefined, + # import-error, + # indexing-exception, + # invalid-name, + # invalid-sequence-index, + # invalid-slice-index, + # logging-format-interpolation, + # long-builtin, + # metaclass-assignment, + # missing-docstring, + # next-method-called, + # no-member, + # no-self-argument, + # no-value-for-parameter, + # nonzero-method, + # not-callable, + # pointless-statement, + # pointless-string-statement, + # property-on-old-class, + # protected-access, + # redefined-builtin, + # redefined-outer-name, + # reduce-builtin, + # reimported, + # round-builtin, + # super-init-not-called, + # too-many-arguments, + # too-many-format-args, + # too-many-function-args, + # too-many-locals, + # undefined-all-variable, + # undefined-loop-variable, + # unexpected-special-method-signature, + # unnecessary-lambda, + # unnecessary-semicolon, + # unpacking-non-sequence, + # unused-argument, + # unused-import, + # unused-variable, + # unused-wildcard-import, + # used-before-assignment, + # wildcard-import, + # wrong-import-order, + # invalid-unary-operand-type, + # raising-bad-type, + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=parseable + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". This option is deprecated +# and it will be removed in Pylint 2.0. +files-output=no + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_$|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules= + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma,dict-separator + +# Maximum number of lines in a module +max-module-lines=2000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[BASIC] + +# List of builtins function names that should not be used, separated by a comma +bad-functions=map,filter,input + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_,x + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for function names +function-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for variable names +variable-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming hint for constant names +const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for attribute names +attr-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for argument names +argument-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming hint for class attribute names +class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming hint for inline iteration names +inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Naming hint for class names +class-name-hint=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Naming hint for module names +module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for method names +method-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + + +[ELIF] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=10 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=30 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branches=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=0 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of boolean expressions in a if statement +max-bool-expr=5 + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..cefcd54 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,23 @@ +# Authors + +basicrta was created by Ricky Sexton in 2024. + + +All contributing authors are listed in this file below. +The repository history at https://github.com/rsexton2/basicrta +and the CHANGELOG show individual code contributions. + +## Chronological list of authors + + + +**2024** +- Ricky Sexton <@rsexton2> \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5041f92 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,37 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + + + +## [Unreleased] + +### Authors + + +### Added + + +### Fixed + + +### Changed + + +### Deprecated + + +### Removed + diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 7258a88..f6ddc52 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,49 +1,99 @@ -# Contributor Covenant Code of Conduct +# Code of conduct + +The basicrta *Code of Conduct* sets the rules for the behavior of +every member in the basicrta community so that everyone can +experience a welcoming, supportive, and productive environment that is +free from harassment. + + +**Table of Contents** + +- [Code of conduct](#code-of-conduct) + - [Code of Conduct and Community Guidelines](#code-of-conduct-and-community-guidelines) + - [Scope](#scope) + - [Enforcement](#enforcement) + - [Acknowledgment](#acknowledgment) + + +## Code of Conduct and Community Guidelines + +basicrta is part of an engaged and respectful community made up of people from all +over the world. Your involvement helps us to further our mission and to create +an open platform that serves a broad range of communities, from research and +education to industry and beyond. This diversity is one of our biggest +strengths, but it can also lead to communication issues and conflicts. +Therefore, we have a few ground rules we ask that our community members adhere +to. + +Fundamentally, we are committed to providing a productive, +harassment-free environment for everyone. Rather than considering this +code an exhaustive list of things that you can’t do, take it in the +spirit it is intended - a guide to make it easier to enrich all of us +and the communities in which we participate. + +Importantly: as a member of our community, you are also a steward of these +values. Not all problems need to be resolved via formal processes, and often a +quick, friendly but clear word on an online forum or in person can help resolve +a misunderstanding and de-escalate things. + +By embracing the following principles, guidelines and actions to follow or +avoid, you will help us make basicrta a welcoming and productive community. + +1. **Be friendly and patient**. + +2. **Be welcoming**. We strive to be a community that welcomes and supports + people of all backgrounds and identities. This includes, but is not limited + to, members of any race, ethnicity, culture, national origin, color, + immigration status, social and economic class, educational level, sex, sexual + orientation, gender identity and expression, age, physical appearance, family + status, political belief, technological or professional choices, academic + discipline, religion, mental ability, and physical ability. + +3. **Be considerate**. Your work will be used by other people, and you in turn + will depend on the work of others. Any decision you take will affect users + and colleagues, and you should take those consequences into account when + making decisions. Remember that we're a world-wide community. You may be + communicating with someone with a different primary language or cultural + background. + +4. **Be respectful**. Not all of us will agree all the time, but disagreement is + no excuse for poor behavior or poor manners. We might all experience some + frustration now and then, but we cannot allow that frustration to turn into a + personal attack. It’s important to remember that a community where people + feel uncomfortable or threatened is not a productive one. + +5. **Be careful in the words that you choose**. Be kind to others. Do not insult + or put down other community members. Harassment and other exclusionary + behavior are not acceptable. This includes, but is not limited to: + * threats or violent language directed against another person + * discriminatory jokes and language + * posting sexually explicit or violent material + * posting (or threatening to post) other people's personally identifying + information ("doxing") + * personal insults, especially those using racist or sexist terms + * unwelcome sexual attention + * advocating for, or encouraging, any of the above behavior + * repeated harassment of others. In general, if someone asks you to stop, + then stop + +6. **Moderate your expectations**. Many in our community volunteer their time. + They are probably not purposefully ignoring issues, refusing to engage in + discussion, avoiding features, etc. but often just unavailable. + +7. **When we disagree, try to understand why**. Disagreements, both social and + technical, happen all the time and basicrta is no exception. It is important + that we resolve disagreements and differing views constructively. Remember + that we’re different. The strength of basicrta comes from its varied community + that includes people from a wide range of backgrounds. Different people have + different perspectives on issues. Being unable to understand why someone + holds a viewpoint doesn’t mean they’re wrong. Don’t forget that it is human + to err and blaming each other doesn’t get us anywhere. Instead, focus on + helping to resolve issues and learning from mistakes. + +8. **A simple apology can go a long way**. It can often de-escalate a situation, + and telling someone that you are sorry is act of empathy that doesn’t + automatically imply an admission of guilt. -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, -body size, disability, ethnicity, gender identity and expression, level of -experience, nationality, personal appearance, race, religion, or sexual -identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -Moreover, project maintainers will strive to offer feedback and advice to -ensure quality and consistency of contributions to the code. Contributions -from outside the group of project maintainers are strongly welcomed but the -final decision as to whether commits are merged into the codebase rests with -the team of project maintainers. ## Scope @@ -57,7 +107,7 @@ project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at 'rsexton2@asu.edu'. The project team will +reported by contacting the project team at 'risexto878@gmail.com'. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of @@ -67,11 +117,19 @@ Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 1.4, available at -[http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +## Acknowledgment + +Original text was adapted from +the +[*Speak Up!*](http://web.archive.org/web/20141109123859/http://speakup.io/coc.html), +[*Django*](https://www.djangoproject.com/conduct), +[*Contributor Covenant*](http://contributor-covenant.org/), +[*Jupyter*](https://github.com/jupyter/governance/blob/master/conduct/code_of_conduct.md), +[*MolSSI*](https://github.com/MolSSI/cookiecutter-cms/blob/master/%7B%7Bcookiecutter.repo_name%7D%7D/CODE_OF_CONDUCT.md), +and [*MDAnalysis*](https://github.com/MDAnalysis/mdanalysis/blob/develop/CODE_OF_CONDUCT.md) projects, +modified by basicrta. +We are grateful to those projects for contributing these +materials under open licensing terms for us to easily reuse. + +All content on this page is licensed under a [*Creative Commons +Attribution*](http://creativecommons.org/licenses/by/3.0/) license. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..35e4866 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,29 @@ +# How to contribute + +We welcome all contributions to basicrta! + +Contributions can take many forms, such as: + +* sharing bug reports or feature requests through the [Issue Tracker](https://github.com/rsexton2/basicrta/issues) +* asking or answering questions, or otherwise joining in on discussions +* adding bug fixes, new features, or otherwise improving the code +* adding or improving documentation + +The second two options both involve making a [pull request](https://github.com/rsexton2/basicrta/pulls) . + +There are many existing guides on how to make a contribution to an open +source project on GitHub. In short, the steps are to: + + * Ensure that you have a [GitHub account](https://github.com/signup/free) + * [Fork](https://help.github.com/articles/fork-a-repo/) the repository into your own account + * On your local machine, [clone](https://help.github.com/articles/cloning-a-repository/) your fork + * Create a development environment from source, following the Installation from source instructions + * Create a new branch off the `main` branch with a meaningful name (e.g. ``git checkout -b fix-issue-39``) + * Add your modifications to the code or documentation + * Add tests if modifying the code + * Commit and push changes to GitHub, and open a pull request + +Guides such as the [MDAnalysis User Guide](https://userguide.mdanalysis.org/stable/contributing.html) +have been written in much more detail to go through these steps more thoroughly. +We strongly encourage you to check those for help, and we welcome any questions. +Thank you for your contribution! \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 108252c..8794964 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,6 @@ include LICENSE include MANIFEST.in -include CODE_OF_CONDUCT.md -include versioneer.py +include AUTHORS.md CHANGELOG.md CODE_OF_CONDUCT.md README.md graft basicrta -global-exclude *.py[cod] __pycache__ *.so \ No newline at end of file +global-exclude *.py[cod] __pycache__ *.so diff --git a/README.md b/README.md index 5a83f51..9a9e413 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,101 @@ -Bayesian Single-Cutoff Residence Time Analysis (basicrta) +basicrta ============================== [//]: # (Badges) -![workflow](https://github.com/Becksteinlab/basicrta/actions/workflows/python-package.yml/badge.svg) -[![codecov](https://codecov.io/gh/Becksteinlab/basicrta/graph/badge.svg?token=WoGPuQEqNI)](https://codecov.io/gh/Becksteinlab/basicrta) -A Bayesian analysis to determine the time-scales involved in protein-small molecule binding events -using molecular dynamics data. +| **Latest release** | [![Last release tag][badge_release]][url_latest_release] ![GitHub commits since latest release (by date) for a branch][badge_commits_since] [![Documentation Status][badge_docs]][url_docs]| +| :----------------- | :------- | +| **Status** | [![GH Actions Status][badge_actions]][url_actions] [![codecov][badge_codecov]][url_codecov] | +| **Community** | [![License: GPL v3][badge_license]][url_license] [![Powered by MDAnalysis][badge_mda]][url_mda]| +[badge_actions]: https://github.com/rsexton2/basicrta-kit/actions/workflows/gh-ci.yaml/badge.svg +[badge_codecov]: https://codecov.io/gh/rsexton2/basicrta-kit/branch/main/graph/badge.svg +[badge_commits_since]: https://img.shields.io/github/commits-since/rsexton2/basicrta-kit/latest +[badge_docs]: https://readthedocs.org/projects/basicrta-kit/badge/?version=latest +[badge_license]: https://img.shields.io/badge/License-GPLv2-blue.svg +[badge_mda]: https://img.shields.io/badge/powered%20by-MDAnalysis-orange.svg?logoWidth=16&logo= +[badge_release]: https://img.shields.io/github/release-pre/rsexton2/basicrta-kit.svg +[url_actions]: https://github.com/rsexton2/basicrta-kit/actions?query=branch%3Amain+workflow%3Agh-ci +[url_codecov]: https://codecov.io/gh/rsexton2/basicrta-kit/branch/main +[url_docs]: https://basicrta-kit.readthedocs.io/en/latest/?badge=latest +[url_latest_release]: https://github.com/rsexton2/basicrta-kit/releases +[url_license]: https://www.gnu.org/licenses/gpl-2.0 +[url_mda]: https://www.mdanalysis.org +A package to extract binding kinetics from molecular dynamics simulations + +basicrta is bound by a [Code of Conduct](https://github.com/rsexton2/basicrta-kit/blob/main/CODE_OF_CONDUCT.md). + +### Installation + +To build basicrta from source, +we highly recommend using virtual environments. +If possible, we strongly recommend that you use +[Anaconda](https://docs.conda.io/en/latest/) as your package manager. +Below we provide instructions both for `conda` and +for `pip`. + +#### With conda + +Ensure that you have [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) installed. + +Create a virtual environment and activate it: + +``` +conda create --name basicrta +conda activate basicrta +``` + +Install the development and documentation dependencies: + +``` +conda env update --name basicrta --file devtools/conda-envs/test_env.yaml +conda env update --name basicrta --file docs/requirements.yaml +``` + +Build this package from source: + +``` +pip install -e . +``` + +If you want to update your dependencies (which can be risky!), run: + +``` +conda update --all +``` + +And when you are finished, you can exit the virtual environment with: + +``` +conda deactivate +``` + +#### With pip + +To build the package from source, run: + +``` +pip install . +``` + +If you want to create a development environment, install +the dependencies required for tests and docs with: + +``` +pip install ".[test,doc]" +``` ### Copyright -Copyright (c) 2022, Ricky Sexton +The basicrta source code is hosted at https://github.com/rsexton2/basicrta-kit +and is available under the GNU General Public License, version 2 (see the file [LICENSE](https://github.com/rsexton2/basicrta-kit/blob/main/LICENSE)). + +Copyright (c) 2024, Ricky Sexton +>>>>>>> basicrta-kit/master #### Acknowledgements Project based on the -[Computational Molecular Science Python Cookiecutter](https://github.com/molssi/cookiecutter-cms) version 1.6. +[MDAnalysis Cookiecutter](https://github.com/MDAnalysis/cookiecutter-mda) version 0.1. +Please cite [MDAnalysis](https://github.com/MDAnalysis/mdanalysis#citation) when using basicrta in published work. diff --git a/basicrta/__init__.py b/basicrta/__init__.py index a29b7ea..472e0eb 100644 --- a/basicrta/__init__.py +++ b/basicrta/__init__.py @@ -1,3 +1,4 @@ +<<<<<<< HEAD """Bayesian single-cutoff residence time analysis""" # Add imports here @@ -10,3 +11,14 @@ __version__ = versions['version'] __git_revision__ = versions['full-revisionid'] del get_versions, versions +======= +""" +basicrta +A package to extract binding kinetics from molecular dynamics simulations +""" + +# Add imports here +from importlib.metadata import version + +__version__ = version("basicrta") +>>>>>>> basicrta-kit/master diff --git a/basicrta/contacts.py b/basicrta/contacts.py new file mode 100755 index 0000000..8ca4f74 --- /dev/null +++ b/basicrta/contacts.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +import os +os.environ['MKL_NUM_THREADS'] = '1' +from tqdm import tqdm +import numpy as np +import multiprocessing +from MDAnalysis.lib import distances +import collections +from multiprocessing import Pool, Lock +import MDAnalysis as mda +import pickle +import glob +from basicrta import istarmap + + +class MapContacts(object): + """ + This class is used to create the map of contacts between two groups of + atoms. A single cutoff is used to define a contact between the two groups, + where if any atomic distance between the two groups is less than the cutoff, + a contact is considered formed. + """ + + def __init__(self, u, ag1, ag2, nproc=1, frames=None, cutoff=10.0, + nslices=100): + self.u, self.nproc = u, nproc + self.ag1, self.ag2 = ag1, ag2 + self.cutoff, self.frames, self.nslices = cutoff, frames, nslices + + + def run(self): + if self.frames: + sliced_frames = np.array_split(self.frames, self.nslices) + else: + sliced_frames = np.array_split(np.arange(len(self.u.trajectory)), + self.nslices) + + input_list = [[i, self.u.trajectory[aslice]] for + i, aslice in enumerate(sliced_frames)] + + lens = [] + with Pool(nproc, initializer=tqdm.set_lock, initargs=(Lock(),)) as p: + for alen in tqdm(p.istarmap(self._run_contacts, input_list), + total=self.nslices, position=0, + desc='overall progress'): + lens.append(alen) + lens = np.array(lens) + mapsize = sum(lens) + bounds = np.concatenate([[0], np.cumsum(lens)]) + dtype = np.dtype(np.float64, + metadata={'top': self.u.filename, + 'traj': self.u.trajectory.filename, + 'ag1': ag1, 'ag2': ag2}) + + contact_map = np.memmap('.tmpmap', mode='w+', + shape=(mapsize, 5), dtype=np.float64) + for i in range(self.nslices): + contact_map[bounds[i]:bounds[i+1]] = np.genfromtxt(f'.contacts_' + f'{i:04}', + delimiter=',') + contact_map.flush() + + contact_map.dump('contacts.pkl', protocol=5) + # os.remove('.tmpmap') + # cfiles = glob.glob('.contacts*') + # [os.remove(f) for f in cfiles] + print('\nSaved contacts as "contacts.pkl"') + + + def _run_contacts(self, i, sliced_traj): + from basicrta.util import get_dec + + try: + proc = int(multiprocessing.current_process().name.split('-')[-1]) + except ValueError: + proc = 1 + + # dec = get_dec(self.u.trajectory.ts.dt/1000) # convert to ns + # text = f'slice {i+1} of {self.nslices}' + # dset = [] + # for ts in tqdm(sliced_traj, desc=text, position=proc, + # total=len(sliced_traj), leave=False): + # b = distances.capped_distance(self.ag1.positions, + # self.ag2.positions, + # max_cutoff=self.cutoff) + # pairlist = [(self.ag1.resids[b[0][i, 0]], + # self.ag2.resids[b[0][i, 1]]) for i in + # range(len(b[0]))] + # pairdir = collections.Counter(a for a in pairlist) + # lsum = 0 + # for j in pairdir: + # temp = pairdir[j] + # dset.append([ts.frame, j[0], j[1], + # min(b[1][lsum:lsum+temp]), + # np.round(ts.time, dec)/1000]) # convert to ns + # lsum += temp + # np.save(f'.contacts_{i:04}', np.array(dset)) + # return len(dset) + with open(f'.contacts_{i:04}', 'w+') as f: + dec = get_dec(self.u.trajectory.ts.dt/1000) # convert to ns + text = f'slice {i+1} of {self.nslices}' + data_len = 0 + for ts in tqdm(sliced_traj, desc=text, position=proc, + total=len(sliced_traj), leave=False): + dset = [] + b = distances.capped_distance(self.ag1.positions, + self.ag2.positions, + max_cutoff=self.cutoff) + pairlist = [(self.ag1.resids[b[0][i, 0]], + self.ag2.resids[b[0][i, 1]]) for i in + range(len(b[0]))] + pairdir = collections.Counter(a for a in pairlist) + lsum = 0 + for j in pairdir: + temp = pairdir[j] + dset.append([ts.frame, j[0], j[1], + min(b[1][lsum:lsum+temp]), + np.round(ts.time, dec)/1000]) # convert to ns + lsum += temp + [f.write(f"{line}".strip('[]') + "\n") for line in dset] + data_len += len(dset) + f.flush() + return data_len + + # def _run_contacts(self, i, sliced_traj): + # from basicrta.util import get_dec + # + # data_len = 0 + # oldmap = np.memmap(f'.tmpmap', mode='w+', shape=(data_len + 1, 5), + # dtype=np.float64) + # del oldmap + # + # dec = get_dec(self.u.trajectory.ts.dt/1000) # convert to ns + # text = f'process {i+1} of {self.nproc}' + # for ts in tqdm(sliced_traj, desc=text, position=i, + # total=len(sliced_traj), leave=False): + # oldmap = np.memmap(f'.tmpmap', mode='r', shape=(data_len+1, 5), + # dtype=np.float64) + # dset = [] + # b = distances.capped_distance(self.ag1.positions, + # self.ag2.positions, + # max_cutoff=self.cutoff) + # pairlist = [(self.ag1.resids[b[0][i, 0]], + # self.ag2.resids[b[0][i, 1]]) for i in + # range(len(b[0]))] + # pairdir = collections.Counter(a for a in pairlist) + # lsum = 0 + # for j in pairdir: + # temp = pairdir[j] + # dset.append([ts.frame, j[0], j[1], + # min(b[1][lsum:lsum+temp]), + # np.round(ts.time, dec)/1000]) # convert to ns + # lsum += temp + # new_len = data_len + len(dset) + 1 + # newmap = np.memmap(f'.contacts_{i:03}', mode='w+', + # shape=(new_len, 5), dtype=np.float64) + # newmap[:data_len] = oldmap[:data_len] + # newmap[data_len:new_len] = dset + # del oldmap + # oldmap = np.memmap(f'.tmpmap', mode='w+', + # shape=(new_len, 5), dtype=np.float64) + # oldmap[:] = newmap[:] + # data_len += new_len + # # map.dump() + # return map + +class ProcessContacts(object): + def __init__(self, cutoff, nproc, map_name='contacts.pkl'): + self.cutoff, self.nproc = cutoff, nproc + self.map_name = map_name + + + def run(self): + from basicrta.util import siground + + if os.path.exists(self.map_name): + with open(self.map_name, 'r+b') as f: + memmap = pickle.load(f) + # memmap = np.load(self.map_name, mmap_mode='r') + memmap = memmap[memmap[:, -2] <= self.cutoff] + dtype = memmap.dtype + else: + raise FileNotFoundError(f'{self.map_name} not found. Specify the ' + 'contacts file using the "map_name" ' + 'argument') + + self.ts = siground(np.unique(memmap[1:, 4]-memmap[:-1, 4])[1], 1) + lresids = np.unique(memmap[:, 2]) + params = [[res, memmap[memmap[:, 2] == res], i] for i, res in + enumerate(lresids)] + pool = Pool(self.nproc, initializer=tqdm.set_lock, initargs=(Lock(),)) + + try: + lens = pool.starmap(self._lipswap, params) + except KeyboardInterrupt: + pool.terminate() + pool.close() + + bounds = np.concatenate([[0], np.cumsum(lens)]) + mapsize = sum(lens) + contact_map = np.memmap(f'.tmpmap', mode='w+', + shape=(mapsize, 4), dtype=dtype) + + for i in range(self.nproc): + contact_map[bounds[i]:bounds[i+1]] = np.load(f'.contacts_{i:04}.' + f'npy') + contact_map.flush() + + contact_map.dump(f'contacts_{self.cutoff}.pkl', protocol=5) + # os.remove('.tmpmap') + # cfiles = glob.glob('.contacts*') + # [os.remove(f) for f in cfiles] + print(f'\nSaved contacts to "contacts_{self.cutoff}.npy"') + + + def _lipswap(self, lip, memarr, i): + from basicrta.util import get_dec + try: + # proc = int(multiprocessing.current_process().name[-1]) + proc = int(multiprocessing.current_process().name.split('-')[-1]) + except ValueError: + proc = 1 + + presids = np.unique(memarr[:, 1]) + dset = [] + dec, ts = get_dec(self.ts), self.ts + for pres in tqdm(presids, desc=f'lipID {int(lip)}', position=proc, + leave=False): + stimes = np.round(memarr[:, -1][memarr[:, 1] == pres], dec) + if len(stimes) == 0: + continue + stimes = np.concatenate([np.array([-1]), stimes, + np.array([stimes[-1] + 1])]) + diff = np.round(stimes[1:] - stimes[:-1], dec) + singles = stimes[ + np.where((diff[1:] > ts) & (diff[:-1] > ts))[0] + 1] + diff[diff > ts] = 0 + inds = np.where(diff == 0)[0] + sums = [sum(diff[inds[i]:inds[i + 1]]) for i in + range(len(inds) - 1)] + clens = np.round(np.array(sums), dec) + minds = np.where(clens != 0)[0] + clens = clens[minds] + ts + strt_times = stimes[inds[minds] + 1] + + [dset.append([pres, lip, time, ts]) for time in singles] + [dset.append([pres, lip, time, clen]) for time, clen in + zip(strt_times, clens)] + np.save(f'.contacts_{i:04}', np.array(dset)) + return len(dset) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--top', type=str) + parser.add_argument('--traj', type=str, nargs='+') + parser.add_argument('--sel1', type=str) + parser.add_argument('--sel2', type=str) + parser.add_argument('--cutoff', type=float) + parser.add_argument('--nproc', type=int, default=1) + parser.add_argument('--nslices', type=int, default=100) + args = parser.parse_args() + + u = mda.Universe(args.top) + [u.load_new(traj) for traj in args.traj] + cutoff, nproc, nslices = args.cutoff, args.nproc, args.nslices + ag1 = u.select_atoms(args.sel1) + ag2 = u.select_atoms(args.sel2) + + MapContacts(u, ag1, ag2, nproc=nproc, nslices=nslices).run() + ProcessContacts(cutoff, nproc).run() \ No newline at end of file diff --git a/basicrta/data/README.md b/basicrta/data/README.md new file mode 100644 index 0000000..550587e --- /dev/null +++ b/basicrta/data/README.md @@ -0,0 +1,17 @@ +# Sample Package Data + +This directory contains sample additional data you may want to include with your package. +This is a place where non-code related additional information (such as data files, molecular structures, etc.) can +go that you want to ship alongside your code. + +Please note that it is not recommended to place large files in your git directory. If your project requires files larger +than a few megabytes in size it is recommended to host these files elsewhere. This is especially true for binary files +as the `git` structure is unable to correctly take updates to these files and will store a complete copy of every version +in your `git` history which can quickly add up. As a note most `git` hosting services like GitHub have a 1 GB per repository +cap. + +## Including package data + +Modify your package's `setup.py` file and the `setup()` command. Include the +[`package_data`](http://setuptools.readthedocs.io/en/latest/setuptools.html#basic-use) keyword and point it at the +correct files. \ No newline at end of file diff --git a/basicrta/data/__init__.py b/basicrta/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/basicrta/data/files.py b/basicrta/data/files.py new file mode 100644 index 0000000..6217432 --- /dev/null +++ b/basicrta/data/files.py @@ -0,0 +1,17 @@ +""" +Location of data files +====================== + +Use as :: + + from basicrta.data.files import * + +""" + +__all__ = [ + "MDANALYSIS_LOGO", # example file of MDAnalysis logo +] + +from pkg_resources import resource_filename + +MDANALYSIS_LOGO = resource_filename(__name__, "mda.txt") diff --git a/basicrta/data/mda.txt b/basicrta/data/mda.txt new file mode 100644 index 0000000..7893f85 --- /dev/null +++ b/basicrta/data/mda.txt @@ -0,0 +1,45 @@ + + ▄▄████████▄▄ + ,▓███████████████▄ ____ + ▄███████████████████ ╔D╠╠╠╠╠╠╬φ_ + ▐█████████████████████ ╔╠╠╠╠╠╠╠╠╠╠╠╠▒ + ██████████████████████▌ ╠╠╠╠╠╠╠╠╠╠╠╠╠╠H + ███████████████████████ ╠╠╠╠╠╠╠╠╠╠╠╠╠╠H + ╙██████████████████████ ╠╠╠╠╠╠╠╠╠╠╠╠╠╩ + ╙██████████████████████ ╙╠╠╠╠╠╠╠╠╠╝^ + '██████████████████████_ `'^╙╙^` + `▀███████████▀▀▀███████▄ + `` _╓╗φ@@@φ╗╖,╙▀████▄▄╓,,,▄▄▄▓█████▓▄_ + ,φ▓╬╬╬╬╬╬╬╬╬╬╬╬▒,╙█████████████████████▓_ + Æ╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▌ ▀██████████████████████ + _,,_ ╣╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▓ ╚██████████████████████ + ╔╠╠╠╠╠╬_ [╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬L ██████████████████████▌ + ╠╠╠╠╠╠╠H ╟╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▒ ███████████████████████ + `╚╠╠╠╠╙ ╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬L ██████████████████████▌ + [╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╩ ███████████████████████ + é╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╜ ██████████████████████▀ + _╗▓╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╝╙,▓████████████████████▀" + _╓╗╗╗╗╗╖,__ __╓φ▓╬╬╝╜"▄▄▄▄▄▄▄▄▄╠╙╙^""▄▄▓████▀" └╙╙▀▀▀"" + ╔Æ╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╜╓▓█████████████████████▀ + ╗╣╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╩ ██████████████████████▀ + ╣╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▌ ██████████████████████Ñ ╔@DD╠DK╓ + [╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬ ╟██████████████████████ ╒╠╠╠╠╠╠╠╠╠╠ + ╢╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬ ██████████████████████▌ ╚╠╠╠╠╠╠╠╠╠╠H + ╚╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬ ╟█████████████████████` '╠╠╠╠╠╠╠╠╠╠ + ╫╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▒ ████████████████████▌ `╚╠╠╠╠╠╝╙ + ╚╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬W ██████████████████▀ + ╙╣╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▓╖╙▀█████████████▀ + "╙╨╝╝╝╨╙^`` `"╙╝╬╬▓W╓╙▀▀▀▀▀▀▀▀"_,,,__ + _╓╦φφφφ╦,_ '╝╬╬╬╬▓▒▒▒▒▓╬╬╬╬╬╬╬╬╬▓╗_ + ,φ╠╠╠╠╠╠╠╠╠╠╠D╦_ `╣╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▓, + ╔╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╦ ╟╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▄ + j╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠H ╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬L + ╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠ ╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▓ + ╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠ ║╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▌ + ²╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠ ╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬ + '╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╩ `╣╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬╬▓ + '╚╠╠╠╠╠╠╠╠╠╝╙ ╙╣╬╬╬╬╬╬╬╬╬╬╬╬╬╝^ + ````` `╙╨╝╝╝╝╝╨╙` + +--- +asciiart.club diff --git a/basicrta/gibbs.py b/basicrta/gibbs.py new file mode 100644 index 0000000..432573d --- /dev/null +++ b/basicrta/gibbs.py @@ -0,0 +1,361 @@ +"""Analysis functions +""" + +import os +import gc +import pickle +import numpy as np +import matplotlib as mpl +import matplotlib.pyplot as plt +from numpy.random import default_rng +from tqdm import tqdm +from MDAnalysis.analysis.base import Results +from basicrta.util import confidence_interval +from multiprocessing import Pool, Lock + +gc.enable() +mpl.rcParams['pdf.fonttype'] = 42 +rng = default_rng() + + +class ProcessProtein(object): + def __init__(self, niter): + self.residues, self.niter = {}, niter + + + def __getitem__(self, item): + return getattr(self, item) + + + def collect_results(self): + from glob import glob + if not os.getcwd().split('/')[-1][:8] == 'basicrta': + raise NotImplementedError('navigate to basicrta-{cutoff} directory' + 'and rerun') + dirs = np.array(glob('?[0-9]*')) + sorted_inds = np.array([int(adir[1:]) for adir in dirs]).argsort() + dirs = dirs[sorted_inds] + for adir in tqdm(dirs): + if os.path.exists(f'{adir}/gibbs_{self.niter}.pkl'): + with open(f'{adir}/gibbs_{self.niter}.pkl', 'rb') as r: + self.residues[adir] = pickle.load(r) + elif os.path.exists(f'{adir}/results_{self.niter}.pkl'): + try: + self.residues[adir] = Gibbs().load_results(f'{adir}/results' + f'_{self.niter}.pkl') + except ValueError: + print(f'{adir} does not contain a valid dataset') + continue + else: + print(f'results for {adir} do not exist') + # raise FileNotFoundError(f'results for {adir} do not exist') + + +class ParallelGibbs(object): + """ + A module to take a contact map and run Gibbs samplers for each residue + """ + + def __init__(self, contacts, nproc=1, ncomp=15, niter=50000): + with open(contacts, 'r+b') as f: + self.contacts = pickle.load(f) + self.cutoff = float(contacts.strip('.npy').split('/')[-1].split('_')[-1]) + self.niter, self.nproc, self.ncomp = niter, nproc, ncomp + + + def run(self, run_resids): + from basicrta.util import run_residue + + protids = np.unique(self.contacts[:, 0]) + times = [self.contacts[self.contacts[:, 0] == i][:, 3] for i in protids] + + lens = np.array([len(np.unique(time)) for time in times]) + validinds = np.where(lens > 50)[0] + protids, times = protids[validinds], times[validinds] + residues = self.contacts.dtype['ag1'].residues + resids = np.array([int(res[1:]) for res in residues]) + + if run_resids: + inds = np.array([np.where(resids == resid)[0] for resid in + run_resids]) + residues, times = residues[inds], times[inds] + + if not os.path.exists(f'basicrta-{self.cutoff}'): + os.mkdir(f'basicrta-{self.cutoff}') + os.chdir(f'basicrta-{self.cutoff}') + + input_list = np.array([[times[i], residues[i], i % self.nproc, + self.ncomp, self.niter] for i in + range(len(residues))], dtype=object) + + with (Pool(self.nproc, initializer=tqdm.set_lock, initargs=(Lock(),)) as + p): + for _ in tqdm(p.istarmap(run_residue, input_list), + total=len(residues), position=0, + desc='overall progress'): + pass + + +class Gibbs(object): + """Gibbs sampler to estimate parameters of an exponential mixture for a set + of data. Results are stored in gibbs.results, which uses /home/ricky + MDAnalysis.analysis.base.Results(). If 'results=None' the gibbs sampler has + not been executed, which requires calling '.run()' + """ + + def __init__(self, times=None, residue=None, loc=0, ncomp=15, niter=50000): + self.times, self.residue = times, residue + self.niter, self.loc, self.ncomp = niter, loc, ncomp + self.g, self.burnin = 100, 10000 + self.processed_results = Results() + + if times.size: + diff = (np.sort(times)[1:]-np.sort(times)[:-1]) + self.ts = diff[diff != 0][0] + else: + self.ts = None + + self.keys = {'times', 'residue', 'loc', 'ncomp', 'niter', 'g', 'burnin', + 'processed_results', 'ts'} + + + def __getitem__(self, item): + return getattr(self, item) + + + def _prepare(self): + from basicrta.util import get_s + self.t, self.s = get_s(self.times, self.ts) + + if not os.path.exists(f'{self.residue}'): + os.mkdir(f'{self.residue}') + + # initialize arrays + self.indicator = np.memmap(f'{self.residue}/.indicator_{self.niter}.' + f'npy', + shape=((self.niter + 1) // self.g, + self.times.shape[0]), + mode='w+', dtype=np.uint8) + self.mcweights = np.zeros(((self.niter + 1) // self.g, self.ncomp)) + self.mcrates = np.zeros(((self.niter + 1) // self.g, self.ncomp)) + + # guess hyperparameters + self.whypers = np.ones(self.ncomp) / [self.ncomp] + self.rhypers = np.ones((self.ncomp, 2)) * [1, 3] + + + def run(self): + # initialize weights and rates + self._prepare() + inrates = 0.5 * 10 ** np.arange(-self.ncomp + 2, 2, dtype=float) + tmpw = 9 * 10 ** (-np.arange(1, self.ncomp + 1, dtype=float)) + weights, rates = tmpw / tmpw.sum(), inrates[::-1] + + # gibbs sampler + for j in tqdm(range(1, self.niter+1), + desc=f'{self.residue}-K{self.ncomp}', + position=self.loc, leave=False): + + # compute probabilities + tmp = weights*rates*np.exp(np.outer(-rates, self.times)).T + z = (tmp.T/tmp.sum(axis=1)).T + + # sample indicator + s = np.argmax(rng.multinomial(1, z), axis=1) + + # get indicator for each data point + inds = [np.where(s == i)[0] for i in range(self.ncomp)] + + # compute total time and number of point for each component + Ns = np.array([len(inds[i]) for i in range(self.ncomp)]) + Ts = np.array([self.times[inds[i]].sum() for i in range(self.ncomp)]) + + # sample posteriors + weights = rng.dirichlet(self.whypers+Ns) + rates = rng.gamma(self.rhypers[:, 0]+Ns, 1/(self.rhypers[:, 1]+Ts)) + + # save every g steps + if j%self.g == 0: + ind = j//self.g-1 + self.mcweights[ind], self.mcrates[ind] = weights, rates + self.indicator[ind] = s + + # attributes to save + attrs = ["mcweights", "mcrates", "ncomp", "niter", "s", "t", "residue", + "times"] + values = [self.mcweights, self.mcrates, self.ncomp, self.niter, self.s, + self.t, self.residue, self.times] + + self._save_results(attrs, values) + self._process_gibbs() + + + def _process_gibbs(self, cutoff=1e-4): + from basicrta.util import mixture_and_plot + from scipy import stats + + burnin_ind = self.burnin // self.g + inds = np.where(self.mcweights[burnin_ind:] > cutoff) + indices = (np.arange(self.burnin, self.niter + 1, self.g)[inds[0]] // + self.g) + weights, rates = self.mcweights[burnin_ind:], self.mcrates[burnin_ind:] + fweights, frates = weights[inds], rates[inds] + + lens = [len(row[row > cutoff]) for row in self.mcweights[burnin_ind:]] + lmin, lmode, lmax = np.min(lens), stats.mode(lens).mode, np.max(lens) + train_param = lmode + + Indicator = np.zeros((self.times.shape[0], train_param)) + indicator = np.memmap(f'{self.residue}/.indicator_{self.niter}.npy', + shape=((self.niter + 1) // self.g, + self.times.shape[0]), + mode='r', dtype=np.uint8) + + gm, labels = mixture_and_plot(self, 'GaussianMixture', n_init=17, + n_components=lmode, + covariance_type='spherical') + indicator = indicator[burnin_ind:] + for j in np.unique(inds[0]): + mapinds = labels[inds[0] == j] + for i, indx in enumerate(inds[1][inds[0] == j]): + tmpind = np.where(indicator[j] == indx)[0] + Indicator[tmpind, mapinds[i]] += 1 + + Indicator = (Indicator.T / Indicator.sum(axis=1)).T + + attrs = ["weights", "rates", "ncomp", "residue", "indicator", "labels", + "iteration", "niter"] + values = [fweights, frates, lmode, self.residue, Indicator, + labels, indices, self.niter] + self._save_results(attrs, values, processed=True) + self._estimate_params() + self._pickle_self() + + + def _pickle_self(self): + with open(f'{self.residue}/gibbs_{self.niter}.pkl', 'w+b') as f: + pickle.dump(self, f) + + + def _save_results(self, attrs, values, processed=False): + if processed: + r = self.processed_results + else: + r = self + + for attr, value in zip(attrs, values): + setattr(r, attr, value) + + if processed: + with open(f'{r.residue}/processed_results_{r.niter}.pkl', + 'wb') as W: + pickle.dump(r, W) + else: + with open(f'{r.residue}/results_{r.niter}.pkl', 'wb') as W: + pickle.dump(r, W) + + + def load_results(self, results, processed=False): + if processed: + with open(results, 'r+b') as f: + r = pickle.load(f) + + for attr in list(r.keys()): + setattr(self.processed_results, attr, r[f'{attr}']) + else: + with open(results, 'r+b') as f: + r = pickle.load(f) + + for attr in list(r.keys()): + setattr(self, attr, r[f'{attr}']) + + self._process_gibbs() + return self + + + def hist_results(self, scale=1.5, save=False): + cmap = mpl.colormaps['tab20'] + rp = self.processed_results + + fig, ax = plt.subplots(1, 2, figsize=(4*scale, 3*scale)) + [ax[0].hist(rp.weights[rp.labels == i], + bins=np.exp(np.linspace(np.log(rp.weights[rp.labels == i] + .min()), + np.log(rp.weights[rp.labels == i] + .max()), 50)), + label=f'{i}', alpha=0.5, color=cmap(i)) + for i in range(rp.ncomp)] + [ax[1].hist(rp.rates[rp.labels == i], + bins=np.exp(np.linspace(np.log(rp.rates[rp.labels == i] + .min()), + np.log(rp.rates[rp.labels == i] + .max()), 50)), + label=f'{i}', alpha=0.5, color=cmap(i)) + for i in range(rp.ncomp)] + ax[0].set_xscale('log') + ax[1].set_xscale('log') + ax[0].legend(title='component') + ax[1].legend(title='component') + ax[0].set_xlabel(r'weight') + ax[1].set_xlabel(r'rate ($ns^{-1}$)') + ax[0].set_ylabel('count') + ax[0].set_xlim(1e-4, 1) + ax[1].set_xlim(1e-3, 10) + plt.tight_layout() + if save: + plt.savefig('hist_results.png', bbox_inches='tight') + plt.savefig('hist_results.pdf', bbox_inches='tight') + plt.show() + + + def plot_results(self, scale=1.5, sparse=1, save=False): + cmap = mpl.colormaps['tab10'] + rp = self.processed_results + + fig, ax = plt.subplots(2, figsize=(4*scale, 3*scale), sharex=True) + [ax[0].plot(rp.iteration[rp.labels == i][::sparse], + rp.weights[rp.labels == i][::sparse], '.', + label=f'{i}', color=cmap(i)) + for i in np.unique(rp.labels)] + ax[0].set_yscale('log') + ax[0].set_ylabel(r'weight') + [ax[1].plot(rp.iteration[rp.labels == i][::sparse], + rp.rates[rp.labels == i][::sparse], '.', label=f'{i}', + color=cmap(i)) for i in np.unique(rp.labels)] + ax[1].set_yscale('log') + ax[1].set_ylabel(r'rate ($ns^{-1}$)') + ax[1].set_xlabel('sample') + ax[0].legend(title='component') + ax[1].legend(title='component') + plt.tight_layout() + if save: + plt.savefig('plot_results.png', bbox_inches='tight') + plt.savefig('plot_results.pdf', bbox_inches='tight') + plt.show() + + + def _estimate_params(self): + rp = self.processed_results + + ds = [rp.rates[rp.labels == i] for i in range(rp.ncomp)] + bounds = np.array([confidence_interval(d) for d in ds]) + params = np.array([np.mean(d) for d in ds]) + + setattr(rp, 'parameters', params) + setattr(rp, 'intervals', bounds) + + + def estimate_tau(self): + rp = self.processed_results + index = np.argmin(rp.parameters) + taus = 1 / rp.rates[rp.labels == index] + ci = confidence_interval(taus) + bins = np.exp(np.linspace(np.log(taus.min()), np.log(taus.max()), 100)) + H = np.histogram(taus, bins=bins) + indmax = np.where(H[0] == H[0].max())[0] + val = 0.5 * (H[1][:-1][indmax] + H[1][1:][indmax])[0] + return [ci[0], val, ci[1]] + + +if __name__ == '__main__': + print('do nothing') \ No newline at end of file diff --git a/basicrta/tests/conftest.py b/basicrta/tests/conftest.py new file mode 100644 index 0000000..1563450 --- /dev/null +++ b/basicrta/tests/conftest.py @@ -0,0 +1,20 @@ +""" +Global pytest fixtures +""" + +# Use this file if you need to share any fixtures +# across multiple modules +# More information at +# https://docs.pytest.org/en/stable/how-to/fixtures.html#scope-sharing-fixtures-across-classes-modules-packages-or-session + +import pytest + +from basicrta.data.files import MDANALYSIS_LOGO + + +@pytest.fixture +def mdanalysis_logo_text() -> str: + """Example fixture demonstrating how data files can be accessed""" + with open(MDANALYSIS_LOGO, "r", encoding="utf8") as f: + logo_text = f.read() + return logo_text diff --git a/basicrta/tests/test_basicrta.py b/basicrta/tests/test_basicrta.py new file mode 100644 index 0000000..b58c63d --- /dev/null +++ b/basicrta/tests/test_basicrta.py @@ -0,0 +1,19 @@ +""" +Unit and regression test for the basicrta package. +""" + +# Import package, test suite, and other packages as needed +import basicrta +import pytest +import sys + + +def test_basicrta_imported(): + """Sample test, will always pass so long as import statement worked""" + assert "basicrta" in sys.modules + + +def test_mdanalysis_logo_length(mdanalysis_logo_text): + """Example test using a fixture defined in conftest.py""" + logo_lines = mdanalysis_logo_text.split("\n") + assert len(logo_lines) == 46, "Logo file does not have 46 lines!" diff --git a/basicrta/tests/utils.py b/basicrta/tests/utils.py new file mode 100644 index 0000000..63a2e16 --- /dev/null +++ b/basicrta/tests/utils.py @@ -0,0 +1,77 @@ +from typing import Tuple + +import MDAnalysis as mda +from MDAnalysis.coordinates.memory import MemoryReader +import numpy as np + + +def make_Universe( + extras: Tuple[str] = tuple(), + size: Tuple[int, int, int] = (125, 25, 5), + n_frames: int = 0, + velocities: bool = False, + forces: bool = False +) -> mda.Universe: + """Make a dummy reference Universe + + Allows the construction of arbitrary-sized Universes. Suitable for + the generation of structures for output. + + Preferable for testing core components because: + * minimises dependencies within the package + * very fast compared to a "real" Universe + + Parameters + ---------- + extras : tuple of strings, optional + extra attributes to add to Universe: + u = make_Universe(('masses', 'charges')) + Creates a lightweight Universe with only masses and charges. + size : tuple of int, optional + number of elements of the Universe (n_atoms, n_residues, n_segments) + n_frames : int + If positive, create a fake Reader object attached to Universe + velocities : bool, optional + if the fake Reader provides velocities + force : bool, optional + if the fake Reader provides forces + + Returns + ------- + MDAnalysis.core.universe.Universe object + + """ + + n_atoms, n_residues, n_segments = size + trajectory = n_frames > 0 + u = mda.Universe.empty( + # topology things + n_atoms=n_atoms, + n_residues=n_residues, + n_segments=n_segments, + atom_resindex=np.repeat( + np.arange(n_residues), n_atoms // n_residues), + residue_segindex=np.repeat( + np.arange(n_segments), n_residues // n_segments), + # trajectory things + trajectory=trajectory, + velocities=velocities, + forces=forces, + ) + if extras is None: + extras = [] + for ex in extras: + u.add_TopologyAttr(ex) + + if trajectory: + pos = np.arange(3 * n_atoms * n_frames).reshape(n_frames, n_atoms, 3) + vel = pos + 100 if velocities else None + fcs = pos + 10000 if forces else None + reader = MemoryReader( + pos, + velocities=vel, + forces=fcs, + ) + u.trajectory = reader + + return u diff --git a/basicrta/util.py b/basicrta/util.py new file mode 100644 index 0000000..c9b846e --- /dev/null +++ b/basicrta/util.py @@ -0,0 +1,780 @@ +"""Functions used by other modules.""" + +from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, + AutoMinorLocator) +from matplotlib.patches import Rectangle +from matplotlib.collections import PatchCollection +import ast, multiprocessing, os +import numpy as np +import matplotlib as mpl +import matplotlib.pyplot as plt +import pickle, bz2 +from glob import glob +import seaborn as sns +from tqdm import tqdm +import MDAnalysis as mda +from scipy.optimize import linear_sum_assignment as lsa + + +mpl.rcParams['pdf.fonttype'] = 42 + + +def siground(x, dec): + return float(f'%.{dec - 1}e' % x) + + +def slice_trajectory(u, nslices): + if type(u) == MDAnalysis.coordinates.base.FrameIteratorSliced: + frames = np.arange(u.start, u.stop, u.step) + elif type(u) == MDAnalysis.coordinates.base.FrameIteratorIndices: + frames = u.frames + else: + frames = np.arange(len(u.trajectory)) + + sliced_frames = np.array_split(frames, nslices) + return sliced_frames + + +def KL_resort(r): + mcweights, mcrates = r.mcweights.copy(), r.mcrates.copy() + indicator[:] = indicator_bak + Ls, niter = [L], 0 + for j in tqdm(range(r.niter)): + sorts = mcweights[j].argsort()[::-1] + mcweights[j] = mcweights[j][sorts] + mcrates[j] = mcrates[j][sorts] + + while niter<10: + Z = np.zeros_like(z) + for j in tqdm(range(2000, 3000), desc='recomputing Q'): + tmp = mcweights[j]*mcrates[j]*np.exp(np.outer(-mcrates[j],x)).T + z = (tmp.T/tmp.sum(axis=1)).T + Z += z + Z = Z/1000 + + for j in tqdm(range(2000, 3000), desc='resorting'): + tmp = mcweights[j]*mcrates[j]*np.exp(np.outer(-mcrates[j],x)).T + z = (tmp.T/tmp.sum(axis=1)).T + + tmpsum = np.ones((ncomp,ncomp), dtype=np.float64) + for k in range(ncomp): + tmpsum[k] = np.sum(z[:,k]*np.log(z[:,k]/Z.T), axis=1) + + tmpsum[tmpsum!=tmpsum] = 1e20 + sorts = lsa(tmpsum)[1] + mcweights[j] = mcweights[j][sorts] + mcrates[j] = mcrates[j][sorts] + niter += 1 + + +def tm(Prot,i): + dif = Prot['tm{0}'.format(i)][1]-Prot['tm{0}'.format(i)][0] + return [Prot['tm{0}'.format(i)],dif] + + +def confidence_interval(data, percentage=95): + ds = np.sort(data) + perc = np.arange(1, len(ds)+1)/(len(ds)) + lower, upper = (100-percentage)/200, (percentage+(100-percentage)/2)/100 + + try: + l = ds[np.where(perc<=lower)[0][-1]] + except IndexError: + l = ds[0] + + try: + u = ds[np.where(perc>=upper)[0][0]] + except IndexError: + u = ds[-1] + + return [l, u] + + +def get_bars(tau): + maxs = tau[:,1] + lb, ub = tau[:,0], tau[:,2] + return np.array([maxs-lb, ub-maxs]) + + +def unique_rates(ncomp, mcrates): + mclen = len(mcrates)*9//10 + means = mcrates[mclen:].mean(axis=0) + stds = mcrates[mclen:].std(axis=0) + lb, ub = means-stds, means+stds + bools = np.empty([ncomp, ncomp]) + for j, mean in enumerate(means): + for i in range(ncomp): + bools[j, i] = ((mean < ub[i]) & (mean > lb[i])) + sums = bools.sum(axis=0) + deg_rts = sums[np.where(sums != 1)] + return ncomp-len(deg_rts) + + +def get_s(x, ts): + Bins = get_bins(x, ts) + Hist = np.histogram(x, bins=Bins) + t, s = make_surv(Hist) + return t, s + + +def plot_r_vs_w(r, rrange=None, wrange=None): + plt.close() + plt.figure(figsize=(4,3)) + for k in range(r.ncomp): + plt.plot(r.mcrates[:, k], r.mcweights[:, k], label=f'{k}') + plt.yscale('log') + plt.xscale('log') + if rrange: + plt.xlim(*rrange) + if wrange: + plt.ylim(*wrange) + plt.ylabel('weight') + plt.xlabel('rate') + plt.legend(loc='upper left') + plt.savefig(f'{r.name}/figs/k{r.ncomp}_r_vs_w.png') + plt.savefig(f'{r.name}/figs/k{r.ncomp}_r_vs_w.pdf') + + +def plot_r_vs_w(weights, rates, labels, rrange=None, wrange=None): + plt.close() + plt.figure(figsize=(4, 3)) + ncomp = len(np.unique(labels)) + for k in range(ncomp): + inds = np.where(labels == k)[0] + plt.plot(rates[inds], weights[inds], '.', label=f'{k}') + plt.yscale('log') + plt.xscale('log') + if rrange: + plt.xlim(*rrange) + if wrange: + plt.ylim(*wrange) + plt.ylabel('weight') + plt.xlabel('rate') + plt.legend(loc='upper left') + plt.tight_layout() + plt.show() + + +def get_color(i): + if i<0: + color = i + else: + color = i % 20 + return color + + +def plot_results(results, cond='ml', save=False, show=False): + outdir = results.name + sortinds = np.argsort([line.mean() for line in results.rates]) + + weight_posts = np.array(getattr(results, 'weights'), dtype=object)[sortinds] + rate_posts = np.array(getattr(results, 'rates'), dtype=object)[sortinds] + w_hists = [plt.hist(post, density=True) for post in weight_posts] + r_hists = [plt.hist(post, density=True) for post in rate_posts] + plt.close('all') + if cond == 'mean': + weights = np.array([w.mean() for w in results.weights]) + weights = weights/weights.sum() + rates = np.array([r.mean() for r in results.rates]) + elif cond == 'ml': + mlw, mlr = [], [] + for i in range(results.ncomp): + mlw.append(w_hists[i][1][w_hists[i][0].argmax()]) + mlr.append(r_hists[i][1][r_hists[i][0].argmax()]) + mlw = np.array(mlw) + weights = mlw/mlw.sum() + rates = np.array(mlr) + else: + raise ValueError('Only implemented for most likely (ml) and mean') + + fig, axs = plt.subplots(figsize=(4,3)) + plt.scatter(results.t, results.s, s=15, label='data') + plt.plot(results.t, np.inner(weights, np.exp(np.outer(results.t, -rates))),\ + label='fit', color='y', ls='dashed', lw=3) + for i in range(results.ncomp): + plt.plot(results.t, weights[i] * np.exp(results.t * -rates[i]), \ + label=f'Comp.{i}', color=f'C{i}') + plt.plot([], [], ' ', label=rf'$\tau$={np.round(1/rates.min(), 1)} ns') + plt.yscale('log') + plt.ylim(0.8*results.s[-2], 2) + plt.xlim(-0.05*results.t[-2], 1.1*results.t[-2]) + plt.legend() + plt.ylabel('s').set_rotation(0) + plt.xlabel('time (ns)') + plt.tight_layout() + sns.despine(offset=3, ax=axs) + if save: + plt.savefig(f'{outdir}/figs/k{results.ncomp}-{cond}_results.png') + plt.savefig(f'{outdir}/figs/k{results.ncomp}-{cond}_results.pdf') + if show: + plt.show() + plt.close('all') + + +def all_post_hist(results, save=False, show=False, wlims=None, rlims=None): + outdir = results.name + for attr, unit in [['rates', ' (ns$^{-1}$)'], ['weights', '']]: + Attr = getattr(results, attr) + plt.figure(figsize=(4,3)) + for i in range(results.ncomp): + plt.hist(Attr[i], density=True, bins=15, label=f'comp. {i}', \ + alpha=0.5) + plt.legend() + plt.xlabel(f'{attr}{unit}'), plt.ylabel('p').set_rotation(0) + plt.yscale('log'), plt.xscale('log') + if attr=='rates' and rlims: + plt.xlim(rlims[0]) + plt.ylim(rlims[1]) + if attr=='weights' and wlims: + plt.xlim(wlims[0]) + plt.ylim(wlims[1]) + if save: + name = f'{outdir}/figs/k{results.ncomp}-posterior_{attr}_comp-all' + plt.savefig(f'{name}.png', bbox_inches='tight') + plt.savefig(f'{name}.pdf', bbox_inches='tight') + if show: + plt.show() + plt.close('all') + + +def plot_post(results, attr, comp=None, save=False, show=False): + outdir = results.name + Attr = getattr(results, attr) + if attr == 'rates': + unit=' (ns$^{-1}$)' + else: + unit='' + + if comp: + [plt.hist(Attr[i], density=True, bins=50, label=f'comp. {i}') for i in comp] + plt.legend() + if save: + plt.savefig(f'{outdir}/figs/k{results.ncomp}-posterior_{attr}_\ + comps-{"-".join([str(i) for i in comp])}.png') + plt.savefig(f'{outdir}/figs/k{results.ncomp}-posterior_{attr}_\ + comps-{"-".join([str(i) for i in comp])}.pdf') + if show: + plt.show() + plt.close('all') + else: + for i in range(results.ncomp): + plt.close() + fig, ax = plt.subplots(figsize=(4,3)) + plt.hist(Attr[i], density=True, bins=15, label=f'comp. {i}') + #plt.legend() + plt.ylabel('p').set_rotation(0) + plt.xlabel(rf'{attr[:-1]} {unit}') + ax.xaxis.major.formatter._useMathText = True + if save: + plt.savefig(f'{outdir}/figs/k{results.ncomp}-posterior_{attr}_' + 'comp-{i}.png', bbox_inches='tight') + plt.savefig(f'{outdir}/figs/k{results.ncomp}-posterior_{attr}_' + 'comp-{i}.pdf', bbox_inches='tight') + if show: + plt.show() + + +def plot_trace(results, attr, comp=None, xrange=None, yrange=None, save=False, + show=False): + outdir = results.name + if attr=='weights': + tmp = getattr(results, 'mcweights') + elif attr=='rates': + tmp = getattr(results, 'mcrates') + if not comp: + plt.figure(figsize=(4,3)) + for j in range(results.ncomp): + plt.plot(range(tmp.shape[0]), tmp[:, j], label=f'Comp. {j}') + plt.xlabel('iteration') + plt.ylabel(f'{attr}') + plt.legend() + if xrange!=None: + plt.xlim(xrange[0], xrange[1]) + if yrange!=None: + plt.ylim(yrange[0], yrange[1]) + if save: + plt.savefig(f'{outdir}/figs/k{results.ncomp}-trace_{attr}.png') + plt.savefig(f'{outdir}/figs/k{results.ncomp}-trace_{attr}.pdf') + if comp: + plt.figure(figsize=(4,3)) + for i in comp: + plt.plot(range(tmp.shape[0]), tmp[:, i], label=f'Comp. {i}') + plt.xlabel('iteration') + plt.ylabel(f'{attr}') + plt.legend() + if xrange!=None: + plt.xlim(xrange[0], xrange[1]) + if yrange!=None: + plt.ylim(yrange[0], yrange[1]) + if save: + plt.savefig(f'{outdir}/figs/k{results.ncomp}-trace_{attr}_comps-\ + {"-".join([str(i) for i in comp])}.png') + plt.savefig(f'{outdir}/figs/k{results.ncomp}-trace_{attr}_comps-\ + {"-".join([str(i) for i in comp])}.pdf') + if show: + plt.show() + plt.close('all') + + +def collect_results(ncomp=None): + """returns (residues, tslow, stds) + """ + dirs = np.array(glob('?[0-9]*')) + sorted_inds = np.array([int(adir[1:]) for adir in dirs]).argsort() + dirs = dirs[sorted_inds] + t_slow = np.zeros(len(dirs)) + sd = np.zeros((len(dirs),2)) + residues = np.empty((len(dirs)), dtype=object) + indicators = [] + for i, adir in enumerate(tqdm(dirs, desc='Collecting results')): + residues[i] = adir + try: + tmp_res = pickle.load(bz2.BZ2File(f'{adir}/results_20000.pkl.bz2', 'rb')) + tmp_res, rpinds = process_gibbs(tmp_res) + # with open(f'{adir}/processed_results_10000.pkl', 'rb') as f: + # tmp_res = pickle.load(f) + # results = glob(f'{adir}/*results.pkl') + # results.sort() + # if ncomp and ncomp-1<=len(results): + # max_comp_res = results[ncomp-2] + # else: + # max_comp_res = results[-1] + except FileNotFoundError: + t_slow[i]=0 + continue + #with open(max_comp_res, 'rb') as W: + # tmp_res = pickle.load(W) + + + means = np.array([(1/post).mean() for post in tmp_res.rates.T]) + if len(means) == 0: + continue + ind = np.where(means == means.max())[0][0] + t_slow[i] = means[ind] + sd[i] = get_bars(1/tmp_res.rates.T[ind]) + indicators.append((tmp_res.indicator.T/tmp_res.indicator.sum(axis=1)).T) + return residues, t_slow, sd.T, indicators + + +def collect_n_plot(resids, comps): + dirs = np.array(glob('?[0-9]*')) + tmpresids = np.array([int(adir[1:]) for adir in dirs]) + sorted_inds = tmpresids.argsort() + tmpresids.sort() + dirs = dirs[sorted_inds] + idinds = np.array([np.where(tmpresids == resid)[0][0] for resid in resids]) + dirs = dirs[idinds] + + for i, adir in enumerate(tqdm(dirs, desc='Collecting results')): + results = glob(f'{adir}/*results.pkl') + results.sort() + #max_comp_res = results[-1] + for res in results: + with open(res, 'rb') as W: + tmp_res = pickle.load(W) + + make_residue_plots(tmp_res, comps) + all_post_hist(tmp_res, save=True, rlims=[[1e-3,10],[1e-2, 1e3]], \ + wlims=[[1e-4, 1.1],[1e-1, 1e4]]) + plot_r_vs_w(tmp_res, rrange=[1e-3, 10], wrange=[1e-4, 5]) + + +def make_residue_plots(results, comps=None, show=False): + r = results + + if not os.path.exists(f'{r.name}/figs'): + os.mkdir(f'{r.name}/figs/') + + plot_results(r, cond='mean', save=True, show=show) + plot_results(r, cond='ml', save=True, show=show) + plot_post(r, 'weights', comp=comps, save=True, show=show) + plot_post(r, 'rates', comp=comps, save=True, show=show) + plot_trace(r, 'weights', comp=comps, save=True, show=show, yrange=[-0.1,1.1]) + plot_trace(r, 'rates', comp=comps, save=True, show=show, yrange=[-0.1,6]) + + +def plot_protein(residues, t_slow, bars, prot): + with open('../../../../tm_dict.txt', 'r') as f: + contents = f.read() + prots = ast.literal_eval(contents) + + if not os.path.exists('figs'): + os.mkdir('figs') + + height, width = 3, 4 + fig, axs = plt.subplots(2,1,figsize=(width, height),sharex=True) + p =[Rectangle((tm(prots[prot]['helices'],i+1)[0][0],0),\ + tm(prots[prot]['helices'],i+1)[1],1,fill=True) for i in range(7)] + patches = PatchCollection(p) + patches.set_color('C0') + resids = np.array([int(res[1:]) for res in residues]) + max_inds = np.where(t_slow > 3 * t_slow.mean()) + axs[0].plot(resids, t_slow, '.', color='C0') + axs[0].errorbar(resids, t_slow, yerr=bars, fmt='none', color='C0') + [axs[0].text(resids[ind], t_slow[ind], residues[ind]) for ind in max_inds[0]] + axs[1].add_collection(patches) + #if (prot=='cck1r') or (prot=='cck2r'): + # axs[0].set_ylim(0, 1300) + #else: + # axs[0].set_ylim(0, 500) + axs[0].set_ylabel(r'$\tau_{slow}$ ' + '\n (ns) ',rotation=0) + axs[1].set_xlabel(r'residue') + axs[0].get_xaxis().set_visible(False) + axs[1].get_yaxis().set_visible(False) + axs[1].xaxis.set_major_locator(MultipleLocator(50)) + axs[1].xaxis.set_minor_locator(MultipleLocator(10)) + axs[1].set_aspect(7) + axs[0].margins(x=0) + plt.subplots_adjust(hspace=-0.45,top=0.92) + sns.despine(offset=10,ax=axs[0],bottom=True) + sns.despine(ax=axs[1],top=True,bottom=False,left=True) + plt.savefig('figs/t_slow.png', bbox_inches='tight') + plt.savefig('figs/t_slow.pdf', bbox_inches='tight') + + +# def plot_frame_comp(indicators, trajtimes): +# if not os.path.exists('figs'): +# os.mkdir('figs') +# +# plt.scatter(np.concatenate([*trajtimes]), indicators, s=2) +# plt.ylabel('Component') +# plt.xlabel('Frame') +# sns.despine(offset=10) +# plt.tight_layout() +# plt.savefig('figs/frame_comp.png') +# plt.savefig('figs/frame_comp.pdf') +# ## plt.show() + + +# def run(gib): +# gib.run() + + +def run_residue(residue, time, proc, ncomp, niter): + x = np.array(time) + # if len(x) != 0: + # try: + # proc = int(multiprocessing.current_process().name.split('-')[-1]) + # except ValueError: + # proc = 1 + # + gib = gibbs(x, residue, proc, ncomp=ncomp, niter=niter) + gib.run() + + +def check_results(residues, times, ts): + if not os.path.exists('result_check'): + os.mkdir('result_check') + for time, residue in zip(times, residues): + if os.path.exists(residue): + kmax = glob(f'{residue}/K*_results.pkl')[-1].split('/')[-1].\ + split('/')[-1].split('_')[0][1:] + os.popen(f'cp {residue}/figs/k{kmax}-mean_results.png result_check/\ + {residue}-k{kmax}-results.png') + else: + t, s = get_s(np.array(time), ts) + plt.scatter(t, s, label='data') + plt.ylabel('s') + plt.xlabel('t (ns)') + plt.legend() + plt.title('Results unavailable') + plt.savefig(f'result_check/{residue}-s-vs-t.png') + plt.close('all') + + +def get_dec(ts): + if len(str(float(ts)).split('.')[1].rstrip('0')) == 0: + dec = -len(str(ts)) + 1 + else: + dec = len(str(float(ts)).split('.')[1].rstrip('0')) + return dec + + +def get_start_stop_frames(simtime, timelen, ts): + dec = get_dec(ts) + framec = (np.round(timelen, dec)/ts).astype(int) + frame = (np.round(simtime, dec)/ts).astype(int) + return frame, frame+framec + + +def get_write_frames(u, time, trajtime, lipind, comp): + dt, comp = u.trajectory.ts.dt/1000, comp-2 #nanoseconds + bframes, eframes = get_start_stop_frames(trajtime, time, dt) + sortinds = bframes.argsort() + bframes.sort() + eframes, lind = eframes[sortinds], lipind[sortinds] + tmp = [np.arange(b, e) for b, e in zip(bframes, eframes)] + tmpL = [np.ones_like(np.arange(b, e))*l for b, e, l in zip(bframes, eframes, lind)] + write_frames, write_Linds = np.concatenate([*tmp]), np.concatenate([*tmpL]).astype(int) + return write_frames, write_Linds + + +def write_trajs(u, time, trajtime, indicator, residue, lipind, step): + try: + proc = int(multiprocessing.current_process().name[-1]) + except ValueError: + proc = 1 + + prot, chol = u.select_atoms('protein'), u.select_atoms('resname CHOL') + # dt = u.trajectory.ts.dt/1000 #nanoseconds + inds = np.array([np.where(indicator.argmax(axis=0) == i)[0] for i in range(8)], dtype=object) + lens = np.array([len(ind) for ind in inds]) + for comp in np.where(lens != 0)[0]: + write_frames, write_Linds = get_write_frames(u, time, trajtime, lipind, comp+2) + if len(write_frames) > step: + write_frames = write_frames[::step] + write_Linds = write_Linds[::step] + with mda.Writer(f"{residue}/comp{comp}_traj.xtc", \ + len((prot+chol.residues[0].atoms).atoms)) as W: + for i, ts in tqdm(enumerate(u.trajectory[write_frames]), \ + desc=f"{residue}-comp{comp}", position=proc, \ + leave=False, total=len(write_frames)): + ag = prot+chol.residues[write_Linds[i]].atoms + W.write(ag) + + +def plot_hists(timelens, indicators, residues): + for timelen, indicator, residue in tqdm(zip(timelens, indicators, residues), + total=len(timelens), + desc='ploting hists'): + ncomps = indicator[:,0].shape[0] + + plt.close() + for i in range(ncomps): + h, edges = np.histogram(timelen, density=True, bins=50, \ + weights=indicator[i]) + m = 0.5*(edges[1:]+edges[:-1]) + plt.plot(m, h, '.', label=i, alpha=0.5) + plt.ylabel('p') + plt.xlabel('time (ns)') + plt.xscale('log') + plt.yscale('log') + plt.ylim(1e-6, 1) + sns.despine(offset=5) + plt.legend() + plt.savefig(f'result_check/{residue}_hists_{ncomps}.png') + plt.savefig(f'result_check/{residue}_hists_{ncomps}.pdf') + + +def get_remaining_residue_inds(residues, invert=True): + dirs = np.array(glob('?[0-9]*')) + resids = np.array([int(res[1:]) for res in residues]) + rem_residues = np.setdiff1d(residues, dirs) + rem_resids = np.array([int(res[1:]) for res in rem_residues]) + rem_inds = np.in1d(resids, rem_resids, invert=invert) + return rem_inds + + +def simulate_hn(n, weights, rates): + n = int(n) + x = np.zeros(n) + p = np.random.rand(n) + + tmpw = np.concatenate(([0], np.cumsum(weights))) + for i in range(len(weights)): + x[(p > tmpw[i]) & (p <= tmpw[i+1])] = \ + -np.log(np.random.rand(len(p[(p > tmpw[i]) & (p <= tmpw[i+1])])))/\ + rates[i] + x.sort() + return x + + +def make_surv(ahist): + y = ahist[0][ahist[0] != 0] + tmpbin = ahist[1][:-1] + t = tmpbin[ahist[0] != 0] + t = np.insert(t, 0, 0) + y = np.cumsum(y) + y = np.insert(y, 0, 0) + y = y/y[-1] + s = 1-y + return t, s + + +def expand_times(contacts): + a = contacts + prots = np.unique(a[:, 0]) + lips = np.unique(a[:, 1]) + + restimes = [] + Ns = [] + for i in tqdm(prots, desc='expanding times'): + liptimes = [] + lipNs = [] + for j in lips: + tmp = a[(a[:, 0] == i) & (a[:, 1] == j)] + liptimes.append(np.round(tmp[:, 2], 1)) + lipNs.append(tmp[:, 3]) + restimes.append(liptimes) + Ns.append(lipNs) + times = np.asarray(restimes) + Ns = np.asarray(Ns) + + alltimes = [] + for res in tqdm(range(times.shape[0])): + restimes = [] + for lip in range(times.shape[1]): + for i in range(times[res, lip].shape[0]): + [restimes.append(j) for j in [times[res, lip][i]]*\ + Ns[res, lip][i].astype(int)] + alltimes.append(restimes) + return np.asarray(alltimes) + + +def get_bins(x, ts): + if isinstance(x, list): + x = np.asarray(x) + elif isinstance(x, np.ndarray): + pass + else: + raise TypeError('Input should be a list or array') + return np.arange(1, int(x.max()//ts)+3)*ts + + +def mixture_and_plot(gibbs, method, **kwargs): + from sklearn import mixture + from scipy import stats + + clu = getattr(mixture, method) + keyvalpairs = [f'{key}_{val}' for key,val in zip(kwargs.keys(), + kwargs.values())] + kwarg_str = '_'.join(keyvalpairs) + + burnin_ind = gibbs.burnin // gibbs.g + weights, rates = gibbs.mcweights[burnin_ind:], gibbs.mcrates[burnin_ind:] + lens = np.array([len(row[row > 1e-4]) for row in weights]) + lmin, lmode, lmax = lens.min(), stats.mode(lens).mode, lens.max() + train_param = lmode + + train_inds = np.where(lens == train_param)[0] + train_weights = (weights[train_inds][weights[train_inds]>1e-4]. + reshape(-1, train_param)) + train_rates = (rates[train_inds][weights[train_inds]>1e-4]. + reshape(-1, train_param)) + + inds = np.where(weights > 1e-4) + aweights, arates = weights[inds], rates[inds] + data = np.stack((aweights, arates), axis=1) + + tweights, trates = train_weights.flatten(), train_rates.flatten() + train_data = np.stack((tweights, trates), axis=1) + + tmpw, tmpr = np.delete(weights, train_inds), np.delete(rates, train_inds) + pweights, prates = tmpw[tmpw > 1e-4], tmpr[tmpw > 1e-4] + predict_data = np.stack((pweights, prates), axis=1) + + r = clu(**kwargs) + labels = r.fit_predict(np.log(train_data)) + uniq_labels = np.unique(labels) + leg_labels = np.array([f'{i}' for i in uniq_labels]) + predict_labels = r.predict(np.log(predict_data)) + + # sorts = r.precisions_.argsort()[::-1] + # tinds = np.array([np.where(labels == i)[0] for i in uniq_labels], + # dtype=object) + # pinds = np.array([np.where(predict_labels == i)[0] for i in uniq_labels], + # dtype=object) + # for i in uniq_labels: + # labels[tinds[i]] = sorts[i] + # predict_labels[pinds[i]] = sorts[i] + tinds = [np.where(labels == i)[0] for i in uniq_labels] + pinds = [np.where(predict_labels == i)[0] for i in uniq_labels] + + train_data_inds = np.array([np.where(data == col)[0][0] for col in + train_data]) + predict_data_inds = np.array([np.where(data == col)[0][0] for col in + predict_data]) + all_labels = r.predict(np.log(data)) + + cmap = mpl.colormaps['tab10'] + cmap.set_under() + scale, sparse = 3, 1 + + fig, ax = plt.subplots(2, 2, figsize=(4*scale, 3*scale)) + for i in uniq_labels: + bins = np.exp(np.linspace(np.log(trates[tinds[i]].min()), + np.log(trates[tinds[i]].max()), 50)) + ax[0, 0].hist(prates[pinds[i]], bins=bins, label=leg_labels[i], + color=cmap(get_color(i)), zorder=1) + ax[0, 0].hist(trates[tinds[i]], bins=bins, label=leg_labels[i], + color=cmap(get_color(i)), zorder=2, alpha=0.5, + edgecolor='k') + + ax[0, 0].set_xscale('log') + ax[0, 0].set_xlabel(r'rate ($ns^{-1}$)') + ax[0, 0].set_ylabel('count') + ax[0, 0].set_xlim(1e-3, 10) + + row, col = gibbs.mcweights[burnin_ind:].shape + iter_arr = np.mgrid[:row, :col][0] + iters = iter_arr[inds] + titer, piter = iters[train_data_inds], iters[predict_data_inds] + for i in uniq_labels: + ax[0, 1].plot(piter[pinds[i]], pweights[pinds[i]][::sparse], '.', + label=leg_labels[i], color=cmap(get_color(i)), zorder=1) + ax[1, 1].plot(piter[pinds[i]], prates[pinds[i]][::sparse], '.', + label=leg_labels[i], color=cmap(get_color(i)), zorder=1) + ax[0, 1].plot(titer[tinds[i]], tweights[tinds[i]][::sparse], '.', + label=leg_labels[i], color=cmap(get_color(i)), zorder=2, + alpha=0.5, mec='k', mew=1) + ax[1, 1].plot(titer[tinds[i]], trates[tinds[i]][::sparse], '.', + label=leg_labels[i], color=cmap(get_color(i)), zorder=2, + alpha=0.5, mec='k', mew=1) + + ax[0, 1].set_yscale('log') + ax[0, 1].set_ylabel(r'weight') + ax[1, 1].set_yscale('log') + ax[1, 1].set_ylabel(r'rate ($ns^{-1}$)') + ax[1, 1].set_xlabel('sample') + ax[0, 1].set_xlabel('sample') + ax[0, 1].set_ylim(1e-4, 1) + ax[1, 1].set_xlabel('sample') + ax[1, 1].set_ylim(1e-3, 10) + + for i in uniq_labels: + ax[1, 0].plot(prates[pinds[i]], pweights[pinds[i]], '.', + label=leg_labels[i], + color=cmap(get_color(i)), zorder=1) + ax[1, 0].plot(trates[tinds[i]], tweights[tinds[i]], '.', + label=leg_labels[i], + color=cmap(get_color(i)), zorder=2, alpha=0.5, + mec='k', mew=1) + + ax[1, 0].set_yscale('log') + ax[1, 0].set_xscale('log') + ax[1, 0].set_ylabel('weight') + ax[1, 0].set_xlabel(r'rate ($ns^{-1}$)') + ax[1, 0].set_xlim(1e-3, 10) + ax[1, 0].set_ylim(1e-4, 1) + + handles, plot_labels = ax[0, 0].get_legend_handles_labels() + # sorts = np.argsort([int(i) for i in plot_labels]) + # handles = np.array(handles)[sorts] + # plot_labels = np.array(plot_labels)[sorts] + [handle.set_color(cmap(get_color(int(i)))) for i, handle in + zip(plot_labels, handles)] + [handle.set_edgecolor('k') for i, handle in zip(plot_labels, handles)] + fig.legend(handles, plot_labels, loc='lower center', + ncols=len(plot_labels)/2, title='cluster') + fig.suptitle(f'{method} '+' '.join(keyvalpairs), fontsize=16) + plt.tight_layout(rect=(0, 0.05, 1, 1)) + plt.savefig(f"{gibbs.residue}/results_{method}_{kwarg_str}.png", + bbox_inches='tight') + plt.show() + # tparams, pparams = [], [] + # for i in uniq_labels: + # tinds = np.where(labels == i)[0] + # pinds = np.where(predict_labels == i)[0] + # tparams.append(trates[tinds].mean()) + # pparams.append(prates[pinds].mean()) + # tindex = np.where(tparams == np.min(tparams))[0] + # pindex = np.where(pparams == np.min(pparams))[0] + # clu_rates = np.concatenate([trates[labels == tindex], + # prates[predict_labels == pindex]]) + # all_results = [(1/clu_rates).mean(), confidence_interval(1/clu_rates)] + # train_results = [(1/trates[labels == tindex]).mean(), + # confidence_interval(1/trates[labels == tindex])] + # predict_results = [(1/prates[predict_labels == pindex]).mean(), + # confidence_interval(1/prates[predict_labels == pindex])] + return r, all_labels + diff --git a/basicrta/wdensity.py b/basicrta/wdensity.py index c74c8fb..6d84ecd 100644 --- a/basicrta/wdensity.py +++ b/basicrta/wdensity.py @@ -10,7 +10,11 @@ class WDensityAnalysis(AnalysisBase): +<<<<<<< HEAD r"""Volumetric density analysis. +======= + r"""Weighted volumetric density analysis. +>>>>>>> basicrta-kit/master The trajectory is read, frame by frame, and the atoms in `atomgroup` are histogrammed on a 3D grid with spacing `delta`. Parameters diff --git a/basicrta/weighted_density.py b/basicrta/weighted_density.py new file mode 100644 index 0000000..1876ae2 --- /dev/null +++ b/basicrta/weighted_density.py @@ -0,0 +1,103 @@ +from basicrta.wdensity import WDensityAnalysis +import numpy as np +import MDAnalysis as mda +import os +from tqdm import tqdm +from basicrta.util import get_start_stop_frames + + +class WeightedDensity(object): + def __init__(self, gibbs, u, contacts, step=1, N=1000): + self.gibbs, self.u, self.N = gibbs, u, N + self.contacts, self.step = contacts, step + self.cutoff = float(contacts.split('/')[-1].strip('.npy'). + split('_')[-1]) + self.write_sel = None + self.dataname = (f'basicrta-{self.cutoff}/{self.gibbs.residue}/' + f'den_write_data_step{self.step}.npy') + self.trajname = (f'basicrta-{self.cutoff}/{self.gibbs.residue}/' + f'chol_traj_step{self.step}.xtc') + + + def _create_data(self): + contacts = np.load(self.contacts, mmap_mode='r') + resid = int(self.gibbs.residue[1:]) + ncomp = self.gibbs.processed_results.ncomp + + times = np.array(contacts[contacts[:, 0] == resid][:, 3]) + trajtimes = np.array(contacts[contacts[:, 0] == resid][:, 2]) + lipinds = np.array(contacts[contacts[:, 0] == resid][:, 1]) + dt = self.u.trajectory.ts.dt/1000 #nanoseconds + + indicators = self.gibbs.processed_results.indicator + + bframes, eframes = get_start_stop_frames(trajtimes, times, dt) + tmp = [np.arange(b, e) for b, e in zip(bframes, eframes)] + tmpL = [np.ones_like(np.arange(b, e)) * l for b, e, l in + zip(bframes, eframes, lipinds)] + tmpI = [indic * np.ones((len(np.arange(b, e)), ncomp)) + for b, e, indic in zip(bframes, eframes, indicators.T)] + + write_frames = np.concatenate([*tmp]).astype(int) + write_linds = np.concatenate([*tmpL]).astype(int) + write_indics = np.concatenate([*tmpI]) + + darray = np.zeros((len(write_frames), ncomp + 2)) + darray[:, 0], darray[:, 1], darray[:, 2:] = (write_frames, write_linds, + write_indics) + np.save(self.dataname, darray) + + + def _create_traj(self): + protein = self.u.select_atoms('protein') + chol = self.u.select_atoms('resname CHOL') + write_sel = protein + chol.residues[0].atoms + + if not os.path.exists(self.dataname): + self._create_data() + + tmp = np.load(self.dataname) + wf, wl, wi = tmp[:, 0], tmp[:, 1], tmp[:, 2:] + + if not os.path.exists(self.trajname): + with mda.Writer(self.trajname, len(write_sel.atoms)) as W: + for i, ts in tqdm(enumerate(self.u.trajectory[wf[::self.step]]), + total=len(wf)//(self.step+1), + desc='writing trajectory'): + W.write(protein+chol.residues[wl[::self.step][i]].atoms) + + + def run(self): + if not os.path.exists(self.trajname): + self._create_traj() + + resid = int(self.gibbs.residue[1:]) + tmp = np.load(self.dataname) + wi = tmp[:, 2:] + + # filter_inds = np.where(wi > filterP) + # wi = wi[filter_inds[0]][::self.step] + # comp_inds = [np.where(filter_inds[1] == i)[0] for i in + # range(self.gibbs.processed_results.ncomp)] + + u_red = mda.Universe('prot_chol.gro', self.trajname) + chol_red = u_red.select_atoms('resname CHOL') + + sortinds = [wi[:, i].argsort()[::-1] for i in + range(self.gibbs.processed_results.ncomp)] + for i in range(self.gibbs.processed_results.ncomp): + D = WDensityAnalysis(chol_red, wi[sortinds[i], i], + gridcenter=u_red.select_atoms(f'protein and ' + f'resid {resid}') + .center_of_geometry(), xdim=40, ydim=40, + zdim=40) + D.run(verbose=True, frames=sortinds[i]) + D.results.density.export(f'basicrta-{self.cutoff}/' + f'{self.gibbs.processed_results.residue}/' + f'wcomp{i}_top{self.N}.dx') + + + +if __name__ == "__main__": + print('not implemented') + diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index c79a37b..0a6546e 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -1,3 +1,4 @@ +<<<<<<< HEAD name: test channels: @@ -15,6 +16,27 @@ dependencies: - codecov # Pip-only installs +======= +name: basicrta-test +channels: + - conda-forge + - defaults +dependencies: + # Base depends + - python + - pip + + # MDAnalysis + - MDAnalysis + + # Testing + - pytest + - pytest-cov + - pytest-xdist + - codecov + + # Pip-only installs +>>>>>>> basicrta-kit/master #- pip: # - codecov diff --git a/docs/Makefile b/docs/Makefile index 1092318..bf8c6ef 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -5,7 +5,11 @@ SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = basicrta +<<<<<<< HEAD SOURCEDIR = . +======= +SOURCEDIR = source +>>>>>>> basicrta-kit/master BUILDDIR = _build # Put it first so that "make" without argument is like "make help". diff --git a/docs/requirements.yaml b/docs/requirements.yaml index 939a290..554e18f 100644 --- a/docs/requirements.yaml +++ b/docs/requirements.yaml @@ -1,12 +1,26 @@ +<<<<<<< HEAD name: docs channels: +======= +name: basicrta-docs +channels: + + - conda-forge + +>>>>>>> basicrta-kit/master dependencies: # Base depends - python - pip +<<<<<<< HEAD # Pip-only installs #- pip: +======= + - mdanalysis-sphinx-theme >=1.0.1 + # Pip-only installs + #- pip: +>>>>>>> basicrta-kit/master diff --git a/docs/source/_static/README.md b/docs/source/_static/README.md new file mode 100644 index 0000000..8d30b2b --- /dev/null +++ b/docs/source/_static/README.md @@ -0,0 +1,16 @@ +# Static Doc Directory + +Add any paths that contain custom static files (such as style sheets) here, +relative to the `conf.py` file's directory. +They are copied after the builtin static files, +so a file named "default.css" will overwrite the builtin "default.css". + +The path to this folder is set in the Sphinx `conf.py` file in the line: +```python +html_static_path = ['_static'] +``` + +## Examples of file to add to this directory +* Custom Cascading Style Sheets +* Custom JavaScript code +* Static logo images diff --git a/docs/source/_static/logo/placeholder_favicon.svg b/docs/source/_static/logo/placeholder_favicon.svg new file mode 100644 index 0000000..cf62228 --- /dev/null +++ b/docs/source/_static/logo/placeholder_favicon.svg @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + diff --git a/docs/source/_static/logo/placeholder_logo.png b/docs/source/_static/logo/placeholder_logo.png new file mode 100644 index 0000000..77e9056 Binary files /dev/null and b/docs/source/_static/logo/placeholder_logo.png differ diff --git a/docs/source/_templates/README.md b/docs/source/_templates/README.md new file mode 100644 index 0000000..e8e210a --- /dev/null +++ b/docs/source/_templates/README.md @@ -0,0 +1,14 @@ +# Templates Doc Directory + +Add any paths that contain templates here, relative to +the `conf.py` file's directory. +They are copied after the builtin template files, +so a file named "page.html" will overwrite the builtin "page.html". + +The path to this folder is set in the Sphinx `conf.py` file in the line: +```python +templates_path = ['_templates'] +``` + +## Examples of file to add to this directory +* HTML extensions of stock pages like `page.html` or `layout.html` diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..78b8953 --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,8 @@ +API Documentation +================= + +.. autosummary:: + :toctree: autosummary + :recursive: + + basicrta diff --git a/docs/source/autosummary/basicrta.data.files.rst b/docs/source/autosummary/basicrta.data.files.rst new file mode 100644 index 0000000..3975e2a --- /dev/null +++ b/docs/source/autosummary/basicrta.data.files.rst @@ -0,0 +1,23 @@ +basicrta.data.files +=================== + +.. automodule:: basicrta.data.files + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/autosummary/basicrta.data.rst b/docs/source/autosummary/basicrta.data.rst new file mode 100644 index 0000000..2f744d6 --- /dev/null +++ b/docs/source/autosummary/basicrta.data.rst @@ -0,0 +1,31 @@ +basicrta.data +============= + +.. automodule:: basicrta.data + + + + + + + + + + + + + + + + + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + basicrta.data.files + diff --git a/docs/source/autosummary/basicrta.gibbs.rst b/docs/source/autosummary/basicrta.gibbs.rst new file mode 100644 index 0000000..979f90e --- /dev/null +++ b/docs/source/autosummary/basicrta.gibbs.rst @@ -0,0 +1,29 @@ +basicrta.gibbs +============== + +.. automodule:: basicrta.gibbs + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + + gibbs + + + + + + + + + diff --git a/docs/source/autosummary/basicrta.rst b/docs/source/autosummary/basicrta.rst new file mode 100644 index 0000000..856210f --- /dev/null +++ b/docs/source/autosummary/basicrta.rst @@ -0,0 +1,32 @@ +basicrta +======== + +.. automodule:: basicrta + + + + + + + + + + + + + + + + + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + basicrta.data + basicrta.gibbs + diff --git a/docs/source/conf.py b/docs/source/conf.py index 66748df..4e15247 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,16 +12,26 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. +<<<<<<< HEAD # Incase the project was not installed import os import sys sys.path.insert(0, os.path.abspath('..')) import basicrta +======= +# In case the project was not installed +import os +import sys +sys.path.insert(0, os.path.abspath("../..")) +import basicrta # noqa + +>>>>>>> basicrta-kit/master # -- Project information ----------------------------------------------------- +<<<<<<< HEAD project = 'basicrta' copyright = ("2022, Ricky Sexton. Project structure based on the " "Computational Molecular Science Python Cookiecutter version 1.6") @@ -31,12 +41,27 @@ version = '' # The full version, including alpha/beta/rc tags release = '' +======= +project = "basicrta" +copyright = ( + "2024, Ricky Sexton. " + "Project structure based on the " + "MDAnalysis Cookiecutter version 0.1" +) +author = "Ricky Sexton" + +# The short X.Y version +version = "" +# The full version, including alpha/beta/rc tags +release = "" +>>>>>>> basicrta-kit/master # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # +<<<<<<< HEAD # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be @@ -53,36 +78,84 @@ ] autosummary_generate = True +======= +needs_sphinx = "6.2.1" + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named "sphinx.ext.*") or your custom +# ones. +extensions = [ + "sphinx.ext.autosummary", + "sphinx.ext.autodoc", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx.ext.extlinks", + "mdanalysis_sphinx_theme", +] + +autosummary_generate = True +# This skips generating an autodoc of the test module +# when using the autosummary directive that is included +# by default in api.rst +autodoc_mock_imports = [ + 'basicrta.tests' +] +>>>>>>> basicrta-kit/master napoleon_google_docstring = False napoleon_use_param = False napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. +<<<<<<< HEAD templates_path = ['_templates'] +======= +templates_path = ["_templates"] +>>>>>>> basicrta-kit/master # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # +<<<<<<< HEAD # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' +======= +# source_suffix = [".rst", ".md"] +source_suffix = ".rst" + +# The master toctree document. +master_doc = "index" +>>>>>>> basicrta-kit/master # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. +<<<<<<< HEAD language = None +======= +language = "en" +>>>>>>> basicrta-kit/master # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . +<<<<<<< HEAD exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'default' +======= +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "default" +>>>>>>> basicrta-kit/master # -- Options for HTML output ------------------------------------------------- @@ -90,26 +163,51 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # +<<<<<<< HEAD html_theme = 'sphinx_rtd_theme' +======= +html_theme = "mdanalysis_sphinx_theme" +>>>>>>> basicrta-kit/master # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # +<<<<<<< HEAD # html_theme_options = {} +======= +html_theme_options = { + + "mda_official": False, + +} +# Set your logo and favicon here -- replace the placeholders! +html_logo = "_static/logo/placeholder_logo.png" +html_favicon = "_static/logo/placeholder_favicon.svg" + +>>>>>>> basicrta-kit/master # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". +<<<<<<< HEAD html_static_path = ['_static'] +======= +html_static_path = ["_static"] +>>>>>>> basicrta-kit/master # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by +<<<<<<< HEAD # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. +======= +# default: ``["localtoc.html", "relations.html", "sourcelink.html", +# "searchbox.html"]``. +>>>>>>> basicrta-kit/master # # html_sidebars = {} @@ -117,12 +215,17 @@ # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. +<<<<<<< HEAD htmlhelp_basename = 'basicrtadoc' +======= +htmlhelp_basename = "basicrtadoc" +>>>>>>> basicrta-kit/master # -- Options for LaTeX output ------------------------------------------------ latex_elements = { +<<<<<<< HEAD # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', @@ -138,14 +241,36 @@ # Latex figure (float) alignment # # 'figure_align': 'htbp', +======= + # The paper size ("letterpaper" or "a4paper"). + # + # "papersize": "letterpaper", + + # The font size ("10pt", "11pt" or "12pt"). + # + # "pointsize": "10pt", + + # Additional stuff for the LaTeX preamble. + # + # "preamble": "", + + # Latex figure (float) alignment + # + # "figure_align": "htbp", +>>>>>>> basicrta-kit/master } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ +<<<<<<< HEAD (master_doc, 'basicrta.tex', 'basicrta Documentation', 'basicrta', 'manual'), +======= + (master_doc, "basicrta.tex", "basicrta Documentation", + "basicrta", "manual"), +>>>>>>> basicrta-kit/master ] @@ -154,7 +279,11 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ +<<<<<<< HEAD (master_doc, 'basicrta', 'basicrta Documentation', +======= + (master_doc, "basicrta", "basicrta Documentation", +>>>>>>> basicrta-kit/master [author], 1) ] @@ -165,10 +294,23 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ +<<<<<<< HEAD (master_doc, 'basicrta', 'basicrta Documentation', author, 'basicrta', 'Bayesian single-cutoff residence time analysis', 'Miscellaneous'), +======= + (master_doc, "basicrta", "basicrta Documentation", + author, "basicrta", "A package to extract binding kinetics from molecular dynamics simulations", + "Miscellaneous"), +>>>>>>> basicrta-kit/master ] # -- Extension configuration ------------------------------------------------- +<<<<<<< HEAD +======= +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), + "mdanalysis": ("https://docs.mdanalysis.org/stable/", None), +} +>>>>>>> basicrta-kit/master diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst new file mode 100644 index 0000000..3318efe --- /dev/null +++ b/docs/source/getting_started.rst @@ -0,0 +1,10 @@ +Getting Started +=============== + +To use ``basicrta``, install from source. + +.. code-block:: bash + + git clone git@github.com:rsexton2/basicrta.git + cd basicrta + pip install . diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..f1c3b57 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,28 @@ +.. basicrta documentation master file, created by + sphinx-quickstart on Thu Mar 15 13:55:56 2018. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to basicrta's documentation! +========================================================= +``basicrtakit`` is a package to analyze binding kinetics in molecular dynamics +simulations. The analysis uses an exponential mixture model and Bayesian +nonparametric inference to determine time-scales of the underlying binding +processes. The output is processed to give frames belonging to a given process +with an associated time-scale, which can be further analyzed. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + getting_started + api + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1d56761 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,92 @@ +[build-system] +requires = [ + "setuptools >=61.2", + "versioningit", +] +build-backend = "setuptools.build_meta" + +[project] +name = "basicrta" +description = "A package to extract binding kinetics from molecular dynamics simulations" +license = {file = "LICENSE" } +authors = [ + {name = "Ricky Sexton", email = "risexto878@gmail.com"}, +] +maintainers = [ + {name = "Ricky Sexton", email = "risexto878@gmail.com"}, +] +readme = "README.md" +requires-python = ">=3.9" +dependencies = [ + "MDAnalysis>=2.0.0", +] +keywords = [ + "molecular simulations", +] +dynamic = [ + "version", +] + +[project.optional-dependencies] +test = [ + "pytest>=6.0", + "pytest-xdist>=2.5", + "pytest-cov>=3.0", +] +doc = [ + "sphinx", + "sphinx_rtd_theme", +] + +# [project.urls] +# source = "https://github.com/rsexton2/basicrta" +# documentation = "https://basicrta.readthedocs.io" + +[tool.setuptools] +py-modules = [] + +[tool.pytest.ini_options] +minversion = "6.0" +testpaths = [ + "basicrta/tests", +] + +[tool.black] +line-length = 80 + +[tool.versioningit] +default-version = "1+unknown" + +[tool.versioningit.vcs] +method = "git" +# the below line expects tags to look like '1.0.2'. +# if prefixing with a v, e.g. 'v1.0.2', change it to ["v*"] +match = ["*"] + +[tool.versioningit.format] +distance = "{base_version}+{distance}.{vcs}{rev}" +dirty = "{base_version}+{distance}.{vcs}{rev}.dirty" +distance-dirty = "{base_version}+{distance}.{vcs}{rev}.dirty" + +[tool.coverage.run] +omit = [ + # Omit the tests + "*/tests/*", +] + +[tool.coverage.report] +exclude_also = [ + "if TYPE_CHECKING:", +] + +[tool.isort] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +line_length = 80 + +[tool.yapf] +COLUMN_LIMIT = 80 +INDENT_WIDTH = 4 +USE_TABS = false diff --git a/readthedocs.yaml b/readthedocs.yaml new file mode 100644 index 0000000..cab6477 --- /dev/null +++ b/readthedocs.yaml @@ -0,0 +1,16 @@ +# readthedocs.yaml + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "mambaforge-4.10" + +python: + install: + - method: pip + path: . + +conda: + environment: docs/requirements.yaml