diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 000000000..c965828c6 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,12 @@ +# Note: You can use any Debian/Ubuntu based image you want. +FROM mcr.microsoft.com/devcontainers/python:3.7-bullseye + +RUN \ + apt update && \ + apt-get install bash-completion graphviz default-mysql-client -y && \ + pip install flake8 black faker ipykernel nose nose-cov datajoint && \ + pip uninstall datajoint -y + +ENV DJ_HOST fakeservices.datajoint.io +ENV DJ_USER root +ENV DJ_PASS simple \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..3727855ae --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +{ + "name": "Development", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + // Use this environment variable if you need to bind mount your local source code into a new container. + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + // https://containers.dev/features + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:2": {}, + "ghcr.io/devcontainers/features/git:1": {}, + "ghcr.io/eitsupi/devcontainer-features/jq-likes:1": {}, + "ghcr.io/guiyomh/features/vim:0": {} + }, + "onCreateCommand": "pip install -e .", + "postStartCommand": "MYSQL_VER=5.7 MINIO_VER=RELEASE.2022-08-11T04-37-28Z docker compose -f local-docker-compose.yml up --build -d", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml new file mode 100644 index 000000000..a456ed151 --- /dev/null +++ b/.devcontainer/docker-compose.yaml @@ -0,0 +1,10 @@ +version: "3" +services: + app: + build: . + extra_hosts: + - fakeservices.datajoint.io:127.0.0.1 + volumes: + - ../..:/workspaces:cached + entrypoint: /usr/local/share/docker-init.sh + command: tail -f /dev/null diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index cc0e4491a..acf523812 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -2,23 +2,23 @@ name: Development on: push: branches: - - '**' # every branch - - '!gh-pages' # exclude gh-pages branch - - '!stage*' # exclude branches beginning with stage + - "**" # every branch + - "!gh-pages" # exclude gh-pages branch + - "!stage*" # exclude branches beginning with stage tags: - '\d+\.\d+\.\d+' # only semver tags pull_request: branches: - - '**' # every branch - - '!gh-pages' # exclude gh-pages branch - - '!stage*' # exclude branches beginning with stage + - "**" # every branch + - "!gh-pages" # exclude gh-pages branch + - "!stage*" # exclude branches beginning with stage jobs: build: runs-on: ubuntu-latest strategy: matrix: include: - - py_ver: '3.9' + - py_ver: "3.9" distro: debian image: djbase env: @@ -77,6 +77,7 @@ jobs: - name: Run primary tests env: PY_VER: ${{matrix.py_ver}} + DJ_PASS: simple MYSQL_VER: ${{matrix.mysql_ver}} DISTRO: alpine MINIO_VER: RELEASE.2021-09-03T03-56-13Z @@ -119,7 +120,7 @@ jobs: strategy: matrix: include: - - py_ver: '3.9' + - py_ver: "3.9" distro: debian image: djbase env: diff --git a/.gitignore b/.gitignore index 1c60cd8f7..eac4f5671 100644 --- a/.gitignore +++ b/.gitignore @@ -21,9 +21,12 @@ build/ *.env docker-compose.yml notebook -.vscode __main__.py jupyter_custom.js .eggs *.code-workspace docs/site + + +!.vscode/settings.json +!.devcontainer/devcontainer.json \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100755 index 000000000..b9bd71a69 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "editor.formatOnPaste": false, + "editor.formatOnSave": true, + "editor.rulers": [ + 94 + ], + "python.formatting.provider": "black", + "[python]": { + "editor.defaultFormatter": null + } +} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 77a84589c..8bdc38f3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,290 +1,292 @@ ## Release notes ### 0.14.0 -- TBA -* Bugfix - Activating a schema requires all tables to exist even if `create_tables=False` PR [#1058](https://github.com/datajoint/datajoint-python/pull/1058) -* Update - Populate call with `reserve_jobs=True` to exclude `error` and `ignore` keys - PR [#1062](https://github.com/datajoint/datajoint-python/pull/1062) -* Add - Support for inserting data with CSV files - PR [#1067](https://github.com/datajoint/datajoint-python/pull/1067) -* Update - Switch testing image from `pydev` to `djtest` PR [#1012](https://github.com/datajoint/datajoint-python/pull/1012) +- Fixed - Activating a schema requires all tables to exist even if `create_tables=False` PR [#1058](https://github.com/datajoint/datajoint-python/pull/1058) +- Changed - Populate call with `reserve_jobs=True` to exclude `error` and `ignore` keys - PR [#1062](https://github.com/datajoint/datajoint-python/pull/1062) +- Added - Support for inserting data with CSV files - PR [#1067](https://github.com/datajoint/datajoint-python/pull/1067) +- Changed - Switch testing image from `pydev` to `djtest` PR [#1012](https://github.com/datajoint/datajoint-python/pull/1012) +- Added - DevContainer development environment compatible with GH Codespaces PR [1071](https://github.com/datajoint/datajoint-python/pull/1071) ### 0.13.8 -- Sep 21, 2022 -* Add - New documentation structure based on markdown PR [#1052](https://github.com/datajoint/datajoint-python/pull/1052) -* Bugfix - Fix queries with backslashes ([#999](https://github.com/datajoint/datajoint-python/issues/999)) PR [#1052](https://github.com/datajoint/datajoint-python/pull/1052) +- Added - New documentation structure based on markdown PR [#1052](https://github.com/datajoint/datajoint-python/pull/1052) +- Fixed - Fix queries with backslashes ([#999](https://github.com/datajoint/datajoint-python/issues/999)) PR [#1052](https://github.com/datajoint/datajoint-python/pull/1052) ### 0.13.7 -- Jul 13, 2022 -* Bugfix - Fix networkx incompatable change by version pinning to 2.6.3 (#1035) PR #1036 -* Add - Support for serializing numpy datetime64 types (#1022) PR #1036 -* Update - Add traceback to default logging PR #1036 +- Fixed - Fix networkx incompatable change by version pinning to 2.6.3 (#1035) PR #1036 +- Added - Support for serializing numpy datetime64 types (#1022) PR #1036 +- Changed - Add traceback to default logging PR #1036 ### 0.13.6 -- Jun 13, 2022 -* Add - Config option to set threshold for when to stop using checksums for filepath stores. PR #1025 -* Add - Unified package level logger for package (#667) PR #1031 -* Update - Swap various datajoint messages, warnings, etc. to use the new logger. (#667) PR #1031 -* Bugfix - Fix query caching deleting non-datajoint files PR #1027 -* Update - Minimum Python version for Datajoint-Python is now 3.7 PR #1027 +- Added - Config option to set threshold for when to stop using checksums for filepath stores. PR #1025 +- Added - Unified package level logger for package (#667) PR #1031 +- Changed - Swap various datajoint messages, warnings, etc. to use the new logger. (#667) PR #1031 +- Fixed - Fix query caching deleting non-datajoint files PR #1027 +- Changed - Minimum Python version for Datajoint-Python is now 3.7 PR #1027 ### 0.13.5 -- May 19, 2022 -* Update - Import ABC from collections.abc for Python 3.10 compatibility -* Bugfix - Fix multiprocessing value error (#1013) PR #1026 +- Changed - Import ABC from collections.abc for Python 3.10 compatibility +- Fixed - Fix multiprocessing value error (#1013) PR #1026 ### 0.13.4 -- Mar, 28 2022 -* Add - Allow reading blobs produced by legacy 32-bit compiled mYm library for matlab. PR #995 -* Bugfix - Add missing `jobs` argument for multiprocessing PR #997 -* Add - Test for multiprocessing PR #1008 -* Bugfix - Fix external store key name doesn't allow '-' (#1005) PR #1006 -* Add - Adopted black formatting into code base PR #998 +- Added - Allow reading blobs produced by legacy 32-bit compiled mYm library for matlab. PR #995 +- Fixed - Add missing `jobs` argument for multiprocessing PR #997 +- Added - Test for multiprocessing PR #1008 +- Fixed - Fix external store key name doesn't allow '-' (#1005) PR #1006 +- Added - Adopted black formatting into code base PR #998 ### 0.13.3 -- Feb 9, 2022 -* Bugfix - Fix error in listing ancestors, descendants with part tables. -* Bugfix - Fix Python 3.10 compatibility (#983) PR #972 -* Bugfix - Allow renaming non-conforming attributes in proj (#982) PR #972 -* Add - Expose proxy feature for S3 external stores (#961) PR #962 -* Add - implement multiprocessing in populate (#695) PR #704, #969 -* Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 -* Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 -* Bugfix - Deletes and drops must include the master of each part. (#151, #374) PR #957 -* Bugfix - `ExternalTable.delete` should not remove row on error (#953) PR #956 -* Bugfix - Fix error handling of remove_object function in `s3.py` (#952) PR #955 -* Bugfix - Fix regression issue with `DISTINCT` clause and `GROUP_BY` (#914) PR #963 -* Bugfix - Fix sql code generation to comply with sql mode `ONLY_FULL_GROUP_BY` (#916) PR #965 -* Bugfix - Fix count for left-joined `QueryExpressions` (#951) PR #966 -* Bugfix - Fix assertion error when performing a union into a join (#930) PR #967 -* Update `~jobs.error_stack` from blob to mediumblob to allow error stacks >64kB in jobs (#984) PR #986 -* Bugfix - Fix error when performing a union on multiple tables (#926) PR #964 -* Add - Allow optional keyword arguments for `make()` in `populate()` PR #971 +- Fixed - Fix error in listing ancestors, descendants with part tables. +- Fixed - Fix Python 3.10 compatibility (#983) PR #972 +- Fixed - Allow renaming non-conforming attributes in proj (#982) PR #972 +- Added - Expose proxy feature for S3 external stores (#961) PR #962 +- Added - implement multiprocessing in populate (#695) PR #704, #969 +- Fixed - Dependencies not properly loaded on populate. (#902) PR #919 +- Fixed - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 +- Fixed - Deletes and drops must include the master of each part. (#151, #374) PR #957 +- Fixed - `ExternalTable.delete` should not remove row on error (#953) PR #956 +- Fixed - Fix error handling of remove_object function in `s3.py` (#952) PR #955 +- Fixed - Fix regression issue with `DISTINCT` clause and `GROUP_BY` (#914) PR #963 +- Fixed - Fix sql code generation to comply with sql mode `ONLY_FULL_GROUP_BY` (#916) PR #965 +- Fixed - Fix count for left-joined `QueryExpressions` (#951) PR #966 +- Fixed - Fix assertion error when performing a union into a join (#930) PR #967 +- Changed `~jobs.error_stack` from blob to mediumblob to allow error stacks >64kB in jobs (#984) PR #986 +- Fixed - Fix error when performing a union on multiple tables (#926) PR #964 +- Added - Allow optional keyword arguments for `make()` in `populate()` PR #971 ### 0.13.2 -- May 7, 2021 -* Update `setuptools_certificate` dependency to new name `otumat` -* Bugfix - Explicit calls to `dj.Connection` throw error due to missing `host_input` (#895) PR #907 -* Bugfix - Correct count of deleted items. (#897) PR #912 +- Changed `setuptools_certificate` dependency to new name `otumat` +- Fixed - Explicit calls to `dj.Connection` throw error due to missing `host_input` (#895) PR #907 +- Fixed - Correct count of deleted items. (#897) PR #912 ### 0.13.1 -- Apr 16, 2021 -* Add `None` as an alias for `IS NULL` comparison in `dict` restrictions (#824) PR #893 -* Drop support for MySQL 5.6 since it has reached EOL PR #893 -* Bugfix - `schema.list_tables()` is not topologically sorted (#838) PR #893 -* Bugfix - Diagram part tables do not show proper class name (#882) PR #893 -* Bugfix - Error in complex restrictions (#892) PR #893 -* Bugfix - WHERE and GROUP BY clases are dropped on joins with aggregation (#898, #899) PR #893 +- Added `None` as an alias for `IS NULL` comparison in `dict` restrictions (#824) PR #893 +- Changed - Drop support for MySQL 5.6 since it has reached EOL PR #893 +- Fixed - `schema.list_tables()` is not topologically sorted (#838) PR #893 +- Fixed - Diagram part tables do not show proper class name (#882) PR #893 +- Fixed - Error in complex restrictions (#892) PR #893 +- Fixed - WHERE and GROUP BY clases are dropped on joins with aggregation (#898, #899) PR #893 ### 0.13.0 -- Mar 24, 2021 -* Re-implement query transpilation into SQL, fixing issues (#386, #449, #450, #484, #558). PR #754 -* Re-implement cascading deletes for better performance. PR #839 -* Add support for deferred schema activation to allow for greater modularity. (#834) PR #839 -* Add query caching mechanism for offline development (#550) PR #839 -* Add table method `.update1` to update a row in the table with new values (#867) PR #763, #889 -* Python datatypes are now enabled by default in blobs (#761). PR #859 -* Added permissive join and restriction operators `@` and `^` (#785) PR #754 -* Support DataJoint datatype and connection plugins (#715, #729) PR 730, #735 -* Add `dj.key_hash` alias to `dj.hash.key_hash` (#804) PR #862 -* Default enable_python_native_blobs to True -* Bugfix - Regression error on joins with same attribute name (#857) PR #878 -* Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878 -* Bugfix - Error when cascading deletes in tables with many, complex keys (#883, #886) PR #839 -* Add deprecation warning for `_update`. PR #889 -* Add `purge_query_cache` utility. PR #889 -* Add tests for query caching and permissive join and restriction. PR #889 -* Drop support for Python 3.5 (#829) PR #861 +- Re-implement query transpilation into SQL, fixing issues (#386, #449, #450, #484, #558). PR #754 +- Re-implement cascading deletes for better performance. PR #839 +- Add support for deferred schema activation to allow for greater modularity. (#834) PR #839 +- Add query caching mechanism for offline development (#550) PR #839 +- Add table method `.update1` to update a row in the table with new values (#867) PR #763, #889 +- Python datatypes are now enabled by default in blobs (#761). PR #859 +- Added permissive join and restriction operators `@` and `^` (#785) PR #754 +- Support DataJoint datatype and connection plugins (#715, #729) PR 730, #735 +- Add `dj.key_hash` alias to `dj.hash.key_hash` (#804) PR #862 +- Default enable_python_native_blobs to True +- Bugfix - Regression error on joins with same attribute name (#857) PR #878 +- Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878 +- Bugfix - Error when cascading deletes in tables with many, complex keys (#883, #886) PR #839 +- Add deprecation warning for `_update`. PR #889 +- Add `purge_query_cache` utility. PR #889 +- Add tests for query caching and permissive join and restriction. PR #889 +- Drop support for Python 3.5 (#829) PR #861 ### 0.12.9 -- Mar 12, 2021 -* Fix bug with fetch1 with `dj.config['fetch_format']="frame"`. (#876) PR #880 +- Fix bug with fetch1 with `dj.config['fetch_format']="frame"`. (#876) PR #880 ### 0.12.8 -- Jan 12, 2021 -* table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 -* Load dependencies before querying dependencies. (#179) PR #833 -* Fix display of part tables in `schema.save`. (#821) PR #833 -* Add `schema.list_tables`. (#838) PR #844 -* Fix minio new version regression. PR #847 -* Add more S3 logging for debugging. (#831) PR #832 -* Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 +- table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 +- Load dependencies before querying dependencies. (#179) PR #833 +- Fix display of part tables in `schema.save`. (#821) PR #833 +- Add `schema.list_tables`. (#838) PR #844 +- Fix minio new version regression. PR #847 +- Add more S3 logging for debugging. (#831) PR #832 +- Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 ### 0.12.7 -- Oct 27, 2020 -* Fix case sensitivity issues to adapt to MySQL 8+. PR #819 -* Fix pymysql regression bug (#814) PR #816 -* Adapted attribute types now have dtype=object in all recarray results. PR #811 +- Fix case sensitivity issues to adapt to MySQL 8+. PR #819 +- Fix pymysql regression bug (#814) PR #816 +- Adapted attribute types now have dtype=object in all recarray results. PR #811 ### 0.12.6 -- May 15, 2020 -* Add `order_by` to `dj.kill` (#668, #779) PR #775, #783 -* Add explicit S3 bucket and file storage location existence checks (#748) PR #781 -* Modify `_update` to allow nullable updates for strings/date (#664) PR #760 -* Avoid logging events on auxiliary tables (#737) PR #753 -* Add `kill_quick` and expand display to include host (#740) PR #741 -* Bugfix - pandas insert fails due to additional `index` field (#666) PR #776 -* Bugfix - `delete_external_files=True` does not remove from S3 (#686) PR #781 -* Bugfix - pandas fetch throws error when `fetch_format='frame'` PR #774 +- Add `order_by` to `dj.kill` (#668, #779) PR #775, #783 +- Add explicit S3 bucket and file storage location existence checks (#748) PR #781 +- Modify `_update` to allow nullable updates for strings/date (#664) PR #760 +- Avoid logging events on auxiliary tables (#737) PR #753 +- Add `kill_quick` and expand display to include host (#740) PR #741 +- Bugfix - pandas insert fails due to additional `index` field (#666) PR #776 +- Bugfix - `delete_external_files=True` does not remove from S3 (#686) PR #781 +- Bugfix - pandas fetch throws error when `fetch_format='frame'` PR #774 ### 0.12.5 -- Feb 24, 2020 -* Rename module `dj.schema` into `dj.schemas`. `dj.schema` remains an alias for class `dj.Schema`. (#731) PR #732 -* `dj.create_virtual_module` is now called `dj.VirtualModule` (#731) PR #732 -* Bugfix - SSL `KeyError` on failed connection (#716) PR #725 -* Bugfix - Unable to run unit tests using nosetests (#723) PR #724 -* Bugfix - `suppress_errors` does not suppress loss of connection error (#720) PR #721 +- Rename module `dj.schema` into `dj.schemas`. `dj.schema` remains an alias for class `dj.Schema`. (#731) PR #732 +- `dj.create_virtual_module` is now called `dj.VirtualModule` (#731) PR #732 +- Bugfix - SSL `KeyError` on failed connection (#716) PR #725 +- Bugfix - Unable to run unit tests using nosetests (#723) PR #724 +- Bugfix - `suppress_errors` does not suppress loss of connection error (#720) PR #721 ### 0.12.4 -- Jan 14, 2020 -* Support for simple scalar datatypes in blobs (#690) PR #709 -* Add support for the `serial` data type in declarations: alias for `bigint unsigned auto_increment` PR #713 -* Improve the log table to avoid primary key collisions PR #713 -* Improve documentation in README PR #713 +- Support for simple scalar datatypes in blobs (#690) PR #709 +- Add support for the `serial` data type in declarations: alias for `bigint unsigned auto_increment` PR #713 +- Improve the log table to avoid primary key collisions PR #713 +- Improve documentation in README PR #713 ### 0.12.3 -- Nov 22, 2019 -* Bugfix - networkx 2.4 causes error in diagrams (#675) PR #705 -* Bugfix - include table definition in doc string and help (#698, #699) PR #706 -* Bugfix - job reservation fails when native python datatype support is disabled (#701) PR #702 +- Bugfix - networkx 2.4 causes error in diagrams (#675) PR #705 +- Bugfix - include table definition in doc string and help (#698, #699) PR #706 +- Bugfix - job reservation fails when native python datatype support is disabled (#701) PR #702 ### 0.12.2 -- Nov 11, 2019 -* Bugfix - Convoluted error thrown if there is a reference to a non-existent table attribute (#691) PR #696 -* Bugfix - Insert into external does not trim leading slash if defined in `dj.config['stores']['']['location']` (#692) PR #693 +- Bugfix - Convoluted error thrown if there is a reference to a non-existent table attribute (#691) PR #696 +- Bugfix - Insert into external does not trim leading slash if defined in `dj.config['stores']['']['location']` (#692) PR #693 ### 0.12.1 -- Nov 2, 2019 -* Bugfix - AttributeAdapter converts into a string (#684) PR #688 +- Bugfix - AttributeAdapter converts into a string (#684) PR #688 ### 0.12.0 -- Oct 31, 2019 -* Dropped support for Python 3.4 -* Support secure connections with TLS (aka SSL) PR #620 -* Convert numpy array from python object to appropriate data type if all elements are of the same type (#587) PR #608 -* Remove expression requirement to have additional attributes (#604) PR #604 -* Support for filepath datatype (#481) PR #603, #659 -* Support file attachment datatype (#480, #592, #637) PR #659 -* Fetch return a dict array when specifying `as_dict=True` for specified attributes. (#595) PR #593 -* Support of ellipsis in `proj`: `query_expression.proj(.., '-movie')` (#499) PR #578 -* Expand support of blob serialization (#572, #520, #427, #392, #244, #594) PR #577 -* Support for alter (#110) PR #573 -* Support for `conda install datajoint` via `conda-forge` channel (#293) -* `dj.conn()` accepts a `port` keyword argument (#563) PR #571 -* Support for UUID datatype (#562) PR #567 -* `query_expr.fetch("KEY", as_dict=False)` returns results as `np.recarray`(#414) PR #574 -* `dj.ERD` is now called `dj.Diagram` (#255, #546) PR #565 -* `dj.Diagram` underlines "distinguished" classes (#378) PR #557 -* Accept alias for supported MySQL datatypes (#544) PR #545 -* Support for pandas in `fetch` (#459, #537) PR #534 -* Support for ordering by "KEY" in `fetch` (#541) PR #534 -* Add config to enable python native blobs PR #672, #676 -* Add secure option for external storage (#663) PR #674, #676 -* Add blob migration utility from DJ011 to DJ012 PR #673 -* Improved external storage - a migration script needed from version 0.11 (#467, #475, #480, #497) PR #532 -* Increase default display rows (#523) PR #526 -* Bugfixes (#521, #205, #279, #477, #570, #581, #597, #596, #618, #633, #643, #644, #647, #648, #650, #656) -* Minor improvements (#538) +- Dropped support for Python 3.4 +- Support secure connections with TLS (aka SSL) PR #620 +- Convert numpy array from python object to appropriate data type if all elements are of the same type (#587) PR #608 +- Remove expression requirement to have additional attributes (#604) PR #604 +- Support for filepath datatype (#481) PR #603, #659 +- Support file attachment datatype (#480, #592, #637) PR #659 +- Fetch return a dict array when specifying `as_dict=True` for specified attributes. (#595) PR #593 +- Support of ellipsis in `proj`: `query_expression.proj(.., '-movie')` (#499) PR #578 +- Expand support of blob serialization (#572, #520, #427, #392, #244, #594) PR #577 +- Support for alter (#110) PR #573 +- Support for `conda install datajoint` via `conda-forge` channel (#293) +- `dj.conn()` accepts a `port` keyword argument (#563) PR #571 +- Support for UUID datatype (#562) PR #567 +- `query_expr.fetch("KEY", as_dict=False)` returns results as `np.recarray`(#414) PR #574 +- `dj.ERD` is now called `dj.Diagram` (#255, #546) PR #565 +- `dj.Diagram` underlines "distinguished" classes (#378) PR #557 +- Accept alias for supported MySQL datatypes (#544) PR #545 +- Support for pandas in `fetch` (#459, #537) PR #534 +- Support for ordering by "KEY" in `fetch` (#541) PR #534 +- Add config to enable python native blobs PR #672, #676 +- Add secure option for external storage (#663) PR #674, #676 +- Add blob migration utility from DJ011 to DJ012 PR #673 +- Improved external storage - a migration script needed from version 0.11 (#467, #475, #480, #497) PR #532 +- Increase default display rows (#523) PR #526 +- Bugfixes (#521, #205, #279, #477, #570, #581, #597, #596, #618, #633, #643, #644, #647, #648, #650, #656) +- Minor improvements (#538) ### 0.11.3 -- Jul 26, 2019 -* Fix incompatibility with pyparsing 2.4.1 (#629) PR #631 +- Fix incompatibility with pyparsing 2.4.1 (#629) PR #631 ### 0.11.2 -- Jul 25, 2019 -* Fix #628 - incompatibility with pyparsing 2.4.1 +- Fix #628 - incompatibility with pyparsing 2.4.1 ### 0.11.1 -- Nov 15, 2018 -* Fix ordering of attributes in proj (#483, #516) -* Prohibit direct insert into auto-populated tables (#511) +- Fix ordering of attributes in proj (#483, #516) +- Prohibit direct insert into auto-populated tables (#511) ### 0.11.0 -- Oct 25, 2018 -* Full support of dependencies with renamed attributes using projection syntax (#300, #345, #436, #506, #507) -* Rename internal class and module names to comply with terminology in documentation (#494, #500) -* Full support of secondary indexes (#498, 500) -* ERD no longer shows numbers in nodes corresponding to derived dependencies (#478, #500) -* Full support of unique and nullable dependencies (#254, #301, #493, #495, #500) -* Improve memory management in `populate` (#461, #486) -* Fix query errors and redundancies (#456, #463, #482) - -### 0.10.1 -- Aug 28, 2018 -* Fix ERD Tooltip message (#431) -* Networkx 2.0 support (#443) -* Fix insert from query with skip_duplicates=True (#451) -* Sped up queries (#458) -* Bugfix in restriction of the form (A & B) * B (#463) -* Improved error messages (#466) +- Full support of dependencies with renamed attributes using projection syntax (#300, #345, #436, #506, #507) +- Rename internal class and module names to comply with terminology in documentation (#494, #500) +- Full support of secondary indexes (#498, 500) +- ERD no longer shows numbers in nodes corresponding to derived dependencies (#478, #500) +- Full support of unique and nullable dependencies (#254, #301, #493, #495, #500) +- Improve memory management in `populate` (#461, #486) +- Fix query errors and redundancies (#456, #463, #482) + +### 0.10.1 -- Aug 28, 2018 +- Fix ERD Tooltip message (#431) +- Networkx 2.0 support (#443) +- Fix insert from query with skip_duplicates=True (#451) +- Sped up queries (#458) +- Bugfix in restriction of the form (A & B) \* B (#463) +- Improved error messages (#466) ### 0.10.0 -- Jan 10, 2018 -* Deletes are more efficient (#424) -* ERD shows table definition on tooltip hover in Jupyter (#422) -* S3 external storage -* Garbage collection for external sorage -* Most operators and methods of tables can be invoked as class methods rather than instance methods (#407) -* The schema decorator object no longer requires locals() to specify the context -* Compatibility with pymysql 0.8.0+ -* More efficient loading of dependencies (#403) +- Deletes are more efficient (#424) +- ERD shows table definition on tooltip hover in Jupyter (#422) +- S3 external storage +- Garbage collection for external sorage +- Most operators and methods of tables can be invoked as class methods rather than instance methods (#407) +- The schema decorator object no longer requires locals() to specify the context +- Compatibility with pymysql 0.8.0+ +- More efficient loading of dependencies (#403) ### 0.9.0 -- Nov 17, 2017 -* Made graphviz installation optional -* Implement file-based external storage -* Implement union operator + -* Implement file-based external storage +- Made graphviz installation optional +- Implement file-based external storage +- Implement union operator + +- Implement file-based external storage ### 0.8.0 -- Jul 26, 2017 Documentation and tutorials available at https://docs.datajoint.io and https://tutorials.datajoint.io -* improved the ERD graphics and features using the graphviz libraries (#207, #333) -* improved password handling logic (#322, #321) -* the use of the `contents` property to populate tables now only works in `dj.Lookup` classes (#310). -* allow suppressing the display of size of query results through the `show_tuple_count` configuration option (#309) -* implemented renamed foreign keys to spec (#333) -* added the `limit` keyword argument to populate (#329) -* reduced the number of displayed messages (#308) -* added `size_on_disk` property for dj.Schema() objects (#323) -* job keys are entered in the jobs table (#316, #243) -* simplified the `fetch` and `fetch1` syntax, deprecating the `fetch[...]` syntax (#319) -* the jobs tables now store the connection ids to allow identifying abandoned jobs (#288, #317) + +- improved the ERD graphics and features using the graphviz libraries (#207, #333) +- improved password handling logic (#322, #321) +- the use of the `contents` property to populate tables now only works in `dj.Lookup` classes (#310). +- allow suppressing the display of size of query results through the `show_tuple_count` configuration option (#309) +- implemented renamed foreign keys to spec (#333) +- added the `limit` keyword argument to populate (#329) +- reduced the number of displayed messages (#308) +- added `size_on_disk` property for dj.Schema() objects (#323) +- job keys are entered in the jobs table (#316, #243) +- simplified the `fetch` and `fetch1` syntax, deprecating the `fetch[...]` syntax (#319) +- the jobs tables now store the connection ids to allow identifying abandoned jobs (#288, #317) ### 0.5.0 (#298) -- Mar 8, 2017 -* All fetched integers are now 64-bit long and all fetched floats are double precision. -* Added `dj.create_virtual_module` +- All fetched integers are now 64-bit long and all fetched floats are double precision. +- Added `dj.create_virtual_module` ### 0.4.10 (#286) -- Feb 6, 2017 -* Removed Vagrant and Readthedocs support -* Explicit saving of configuration (issue #284) +- Removed Vagrant and Readthedocs support +- Explicit saving of configuration (issue #284) ### 0.4.9 (#285) -- Feb 2, 2017 -* Fixed setup.py for pip install +- Fixed setup.py for pip install ### 0.4.7 (#281) -- Jan 24, 2017 -* Fixed issues related to order of attributes in projection. +- Fixed issues related to order of attributes in projection. ### 0.4.6 (#277) -- Dec 22, 2016 -* Proper handling of interruptions during populate +- Proper handling of interruptions during populate ### 0.4.5 (#274) -- Dec 20, 2016 -* Populate reports how many keys remain to be populated at the start. +- Populate reports how many keys remain to be populated at the start. -### 0.4.3 (#271) -- Dec 6, 2016 -* Fixed aggregation issues (#270) -* datajoint no longer attempts to connect to server at import time -* dropped support of view (reversed #257) -* more elegant handling of insufficient privileges (#268) +### 0.4.3 (#271) -- Dec 6, 2016 +- Fixed aggregation issues (#270) +- datajoint no longer attempts to connect to server at import time +- dropped support of view (reversed #257) +- more elegant handling of insufficient privileges (#268) -### 0.4.2 (#267) -- Dec 6, 2016 -* improved table appearance in Jupyter +### 0.4.2 (#267) -- Dec 6, 2016 +- improved table appearance in Jupyter ### 0.4.1 (#266) -- Oct 28, 2016 -* bugfix for very long error messages +- bugfix for very long error messages ### 0.3.9 -- Sep 27, 2016 -* Added support for datatype `YEAR` -* Fixed issues with `dj.U` and the `aggr` operator (#246, #247) +- Added support for datatype `YEAR` +- Fixed issues with `dj.U` and the `aggr` operator (#246, #247) -### 0.3.8 -- Aug 2, 2016 -* added the `_update` method in `base_relation`. It allows updating values in existing tuples. -* bugfix in reading values of type double. Previously it was cast as float32. +### 0.3.8 -- Aug 2, 2016 +- added the `_update` method in `base_relation`. It allows updating values in existing tuples. +- bugfix in reading values of type double. Previously it was cast as float32. -### 0.3.7 -- Jul 31, 2016 -* added parameter `ignore_extra_fields` in `insert` -* `insert(..., skip_duplicates=True)` now relies on `SELECT IGNORE`. Previously it explicitly checked if tuple already exists. -* table previews now include blob attributes displaying the string +### 0.3.7 -- Jul 31, 2016 +- added parameter `ignore_extra_fields` in `insert` +- `insert(..., skip_duplicates=True)` now relies on `SELECT IGNORE`. Previously it explicitly checked if tuple already exists. +- table previews now include blob attributes displaying the string -### 0.3.6 -- Jul 30, 2016 -* bugfix in `schema.spawn_missing_classes`. Previously, spawned part classes would not show in ERDs. -* dj.key now causes fetch to return as a list of dicts. Previously it was a recarray. +### 0.3.6 -- Jul 30, 2016 +- bugfix in `schema.spawn_missing_classes`. Previously, spawned part classes would not show in ERDs. +- dj.key now causes fetch to return as a list of dicts. Previously it was a recarray. ### 0.3.5 -* `dj.set_password()` now asks for user confirmation before changing the password. -* fixed issue #228 +- `dj.set_password()` now asks for user confirmation before changing the password. +- fixed issue #228 ### 0.3.4 -* Added method the `ERD.add_parts` method, which adds the part tables of all tables currently in the ERD. -* `ERD() + arg` and `ERD() - arg` can now accept table classes as arg. +- Added method the `ERD.add_parts` method, which adds the part tables of all tables currently in the ERD. +- `ERD() + arg` and `ERD() - arg` can now accept table classes as arg. ### 0.3.3 -* Suppressed warnings (redirected them to logging). Previoiusly, scipy would throw warnings in ERD, for example. -* Added ERD.from_sequence as a shortcut to combining the ERDs of multiple sources -* ERD() no longer text the context argument. -* ERD.draw() now takes an optional context argument. By default uses the caller's locals. - -### 0.3.2. -* Fixed issue #223: `insert` can insert relations without fetching. -* ERD() now takes the `context` argument, which specifies in which context to look for classes. The default is taken from the argument (schema or table). -* ERD.draw() no longer has the `prefix` argument: class names are shown as found in the context. +- Suppressed warnings (redirected them to logging). Previoiusly, scipy would throw warnings in ERD, for example. +- Added ERD.from_sequence as a shortcut to combining the ERDs of multiple sources +- ERD() no longer text the context argument. +- ERD.draw() now takes an optional context argument. By default uses the caller's locals. + +### 0.3.2 +- Fixed issue #223: `insert` can insert relations without fetching. +- ERD() now takes the `context` argument, which specifies in which context to look for classes. The default is taken from the argument (schema or table). +- ERD.draw() no longer has the `prefix` argument: class names are shown as found in the context. diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index bb8736f11..a78206a6c 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -1,14 +1,14 @@ -# docker compose -f LNX-docker-compose.yml --env-file LNX.env up --exit-code-from app --build -version: '2.4' +# PY_VER=3.8 MYSQL_VER=5.7 DISTRO=alpine MINIO_VER=RELEASE.2022-08-11T04-37-28Z HOST_UID=$(id -u) docker compose -f LNX-docker-compose.yml up --exit-code-from app --build +version: "2.4" x-net: &net networks: - - main + - main services: db: <<: *net image: datajoint/mysql:${MYSQL_VER} environment: - - MYSQL_ROOT_PASSWORD=simple + - MYSQL_ROOT_PASSWORD=${DJ_PASS} # ports: # - "3306:3306" # volumes: @@ -34,12 +34,12 @@ services: <<: *net image: datajoint/nginx:v0.2.4 environment: - - ADD_db_TYPE=DATABASE - - ADD_db_ENDPOINT=db:3306 - - ADD_minio_TYPE=MINIO - - ADD_minio_ENDPOINT=minio:9000 - - ADD_minio_PORT=80 # allow unencrypted connections - - ADD_minio_PREFIX=/datajoint + - ADD_db_TYPE=DATABASE + - ADD_db_ENDPOINT=db:3306 + - ADD_minio_TYPE=MINIO + - ADD_minio_ENDPOINT=minio:9000 + - ADD_minio_PORT=80 # allow unencrypted connections + - ADD_minio_PREFIX=/datajoint # ports: # - "80:80" # - "443:443" @@ -58,7 +58,7 @@ services: environment: - DJ_HOST=fakeservices.datajoint.io - DJ_USER=root - - DJ_PASS=simple + - DJ_PASS - DJ_TEST_HOST=fakeservices.datajoint.io - DJ_TEST_USER=datajoint - DJ_TEST_PASSWORD=datajoint diff --git a/README.md b/README.md index 8934d5b53..fc290fe53 100644 --- a/README.md +++ b/README.md @@ -6,161 +6,28 @@ [![Slack](https://img.shields.io/badge/slack-chat-green.svg)](https://datajoint.slack.com/) # Welcome to DataJoint for Python! + DataJoint for Python is a framework for scientific workflow management based on relational principles. DataJoint is built on the foundation of the relational data model and prescribes a consistent method for organizing, populating, computing, and querying data. DataJoint was initially developed in 2009 by Dimitri Yatsenko in Andreas Tolias' Lab at Baylor College of Medicine for the distributed processing and management of large volumes of data streaming from regular experiments. Starting in 2011, DataJoint has been available as an open-source project adopted by other labs and improved through contributions from several developers. -Presently, the primary developer of DataJoint open-source software is the company DataJoint (https://datajoint.com). Related resources are listed at https://datajoint.org. - -## Installation -``` -pip3 install datajoint -``` +Presently, the primary developer of DataJoint open-source software is the company DataJoint (https://datajoint.com). -If you already have an older version of DataJoint installed using `pip`, upgrade with -```bash -pip3 install --upgrade datajoint -``` +- [Getting Started](https://datajoint.com/docs/core/datajoint-python/latest/getting-started/) +- [DataJoint Elements](https://datajoint.com/docs/elements/) - Catalog of example pipelines +- [DataJoint CodeBook](https://codebook.datajoint.io) - Interactive online tutorials +- Contribute -## Documentation and Tutorials + - [Development Environment](https://datajoint.com/docs/core/datajoint-python/latest/develop/) + - [Guidelines](https://datajoint.com/docs/community/contribute/) -* https://datajoint.org -- start page -* https://docs.datajoint.org -- up-to-date documentation -* https://tutorials.datajoint.io -- step-by-step tutorials -* https://elements.datajoint.org -- catalog of example pipelines -* https://codebook.datajoint.io -- interactive online tutorials +- Legacy Resources (To be replaced by above) + - [Documentation](https://docs.datajoint.org) + - [Tutorials](https://tutorials.datajoint.org) ## Citation -+ If your work uses DataJoint for Python, please cite the following Research Resource Identifier (RRID) and manuscript. - -+ DataJoint ([RRID:SCR_014543](https://scicrunch.org/resolver/SCR_014543)) - DataJoint for Python (version ``) - -+ Yatsenko D, Reimer J, Ecker AS, Walker EY, Sinz F, Berens P, Hoenselaar A, Cotton RJ, Siapas AS, Tolias AS. DataJoint: managing big scientific data using MATLAB or Python. bioRxiv. 2015 Jan 1:031658. doi: https://doi.org/10.1101/031658 - -## Python Native Blobs -
-Click to expand details - -DataJoint 0.12 adds full support for all native python data types in blobs: tuples, lists, sets, dicts, strings, bytes, `None`, and all their recursive combinations. -The new blobs are a superset of the old functionality and are fully backward compatible. -In previous versions, only MATLAB-style numerical arrays were fully supported. -Some Python datatypes such as dicts were coerced into numpy recarrays and then fetched as such. - -However, since some Python types were coerced into MATLAB types, old blobs and new blobs may now be fetched as different types of objects even if they were inserted the same way. -For example, new `dict` objects will be returned as `dict` while the same types of objects inserted with `datajoint 0.11` will be recarrays. - -Since this is a big change, we chose to temporarily disable this feature by default in DataJoint for Python 0.12.x, allowing users to adjust their code if necessary. -From 13.x, the flag will default to True (on), and will ultimately be removed when corresponding decode support for the new format is added to datajoint-matlab (see: datajoint-matlab #222, datajoint-python #765). - -The flag is configured by setting the `enable_python_native_blobs` flag in `dj.config`. - -```python -import datajoint as dj -dj.config["enable_python_native_blobs"] = True -``` - -You can safely enable this setting if both of the following are true: - - * The only kinds of blobs your pipeline have inserted previously were numerical arrays. - * You do not need to share blob data between Python and MATLAB. - -Otherwise, read the following explanation. - -DataJoint v0.12 expands DataJoint's blob serialization mechanism with -improved support for complex native python datatypes, such as dictionaries -and lists of strings. - -Prior to DataJoint v0.12, certain python native datatypes such as -dictionaries were 'squashed' into numpy structured arrays when saved into -blob attributes. This facilitated easier data sharing between MATLAB -and Python for certain record types. However, this created a discrepancy -between insert and fetch datatypes which could cause problems in other -portions of users pipelines. - -DataJoint v0.12, removes the squashing behavior, instead encoding native python datatypes in blobs directly. -However, this change creates a compatibility problem for pipelines -which previously relied on the type squashing behavior since records -saved via the old squashing format will continue to fetch -as structured arrays, whereas new record inserted in DataJoint 0.12 with -`enable_python_native_blobs` would result in records returned as the -appropriate native python type (dict, etc). -Furthermore, DataJoint for MATLAB does not yet support unpacking native Python datatypes. - -With `dj.config["enable_python_native_blobs"]` set to `False`, -any attempt to insert any datatype other than a numpy array will result in an exception. -This is meant to get users to read this message in order to allow proper testing -and migration of pre-0.12 pipelines to 0.12 in a safe manner. - -The exact process to update a specific pipeline will vary depending on -the situation, but generally the following strategies may apply: - - * Altering code to directly store numpy structured arrays or plain - multidimensional arrays. This strategy is likely best one for those - tables requiring compatibility with MATLAB. - * Adjust code to deal with both structured array and native fetched data - for those tables that are populated with `dict`s in blobs in pre-0.12 version. - In this case, insert logic is not adjusted, but downstream consumers - are adjusted to handle records saved under the old and new schemes. - * Migrate data into a fresh schema, fetching the old data, converting blobs to - a uniform data type and re-inserting. - * Drop/Recompute imported/computed tables to ensure they are in the new - format. - -As always, be sure that your data is safely backed up before modifying any -important DataJoint schema or records. - -
- -### API docs - -The API documentation can be built with mkdocs using the docker compose file in -`docs/` with the following command: - -``` bash -MODE="LIVE" PACKAGE=datajoint UPSTREAM_REPO=https://github.com/datajoint/datajoint-python.git HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build -``` - -The site will then be available at `http://localhost/`. When finished, be sure to run -the same command as above, but replace `up --build` with `down`. - -## Running Tests Locally -
-Click to expand details - -* Create an `.env` with desired development environment values e.g. -``` sh -PY_VER=3.9 -MYSQL_VER=5.7 -DISTRO=alpine -MINIO_VER=RELEASE.2022-01-03T18-22-58Z -HOST_UID=1000 -``` -* `cp local-docker-compose.yml docker-compose.yml` -* `docker-compose up -d` (Note configured `JUPYTER_PASSWORD`) -* Select a means of running Tests e.g. Docker Terminal, or Local Terminal (see bottom) -* Add entry in `/etc/hosts` for `127.0.0.1 fakeservices.datajoint.io` -* Run desired tests. Some examples are as follows: - -| Use Case | Shell Code | -| ---------------------------- | ------------------------------------------------------------------------------ | -| Run all tests | `nosetests -vsw tests --with-coverage --cover-package=datajoint` | -| Run one specific class test | `nosetests -vs --tests=tests.test_fetch:TestFetch.test_getattribute_for_fetch1` | -| Run one specific basic test | `nosetests -vs --tests=tests.test_external_class:test_insert_and_fetch` | - - -### Launch Docker Terminal -* Shell into `datajoint-python_app_1` i.e. `docker exec -it datajoint-python_app_1 sh` - - -### Launch Local Terminal -* See `datajoint-python_app` environment variables in `local-docker-compose.yml` -* Launch local terminal -* `export` environment variables in shell -* Add entry in `/etc/hosts` for `127.0.0.1 fakeservices.datajoint.io` +- If your work uses DataJoint for Python, please cite the following Research Resource Identifier (RRID) and manuscript. -### Launch Jupyter Notebook for Interactive Use -* Navigate to `localhost:8888` -* Input Jupyter password -* Launch a notebook i.e. `New > Python 3` +- DataJoint ([RRID:SCR_014543](https://scicrunch.org/resolver/SCR_014543)) - DataJoint for Python (version ``) -
\ No newline at end of file +- Yatsenko D, Reimer J, Ecker AS, Walker EY, Sinz F, Berens P, Hoenselaar A, Cotton RJ, Siapas AS, Tolias AS. DataJoint: managing big scientific data using MATLAB or Python. bioRxiv. 2015 Jan 1:031658. doi: https://doi.org/10.1101/031658 diff --git a/docs/docker-compose.yaml b/docs/docker-compose.yaml index 1d0e395b6..b599ccd49 100644 --- a/docs/docker-compose.yaml +++ b/docs/docker-compose.yaml @@ -1,6 +1,4 @@ # MODE="LIVE|QA|BUILD" PACKAGE=datajoint UPSTREAM_REPO=https://github.com/datajoint/datajoint-python.git HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build -# -# navigate to http://localhost/ version: "2.4" services: docs: @@ -18,7 +16,7 @@ services: - ..:/main user: ${HOST_UID}:anaconda ports: - - 80:80 + - 8080:80 command: - sh - -c diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml index 2fcc36b2b..6a5f6883d 100644 --- a/docs/mkdocs.yaml +++ b/docs/mkdocs.yaml @@ -8,14 +8,15 @@ nav: - Getting Started: getting-started/index.md - Existing Pipelines: concepts/existing-pipelines.md - Query Language: - - Common Commands: query-lang/common-commands.md - - Operators: query-lang/operators.md - - Iteration: query-lang/iteration.md - - Query Caching: query-lang/query-caching.md + - Common Commands: query-lang/common-commands.md + - Operators: query-lang/operators.md + - Iteration: query-lang/iteration.md + - Query Caching: query-lang/query-caching.md - Reproducibility: - - Table Tiers: reproduce/table-tiers.md - - Make Method: reproduce/make-method.md + - Table Tiers: reproduce/table-tiers.md + - Make Method: reproduce/make-method.md - Tutorials: tutorials.md + - Develop: develop.md - Changelog: about/changelog.md - API: api/ # defer to gen-files + literate-nav @@ -59,12 +60,12 @@ plugins: filters: - "!^_" docstring_style: sphinx # Replaces google default pending docstring updates - members_order: source + members_order: source group_by_category: false line_length: 88 - gen-files: scripts: - - ./src/api/make_pages.py + - ./src/api/make_pages.py - literate-nav: nav_file: navigation.md - exclude-search: diff --git a/docs/src/develop.md b/docs/src/develop.md new file mode 100644 index 000000000..4627547f4 --- /dev/null +++ b/docs/src/develop.md @@ -0,0 +1,115 @@ +# Develop + +Included with the codebase is the recommended development environment configured using [DevContainer](https://containers.dev/). + +## Launch Development Environment + +Here are some options that provide a great developer experience: + +- **Cloud-based IDE**: (*recommended*) Launch using the [GitHub Codespaces](https://github.com/features/codespaces) named `Development`. +- **Local IDE**: + - Ensure you have [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) + - Ensure you have [Docker](https://docs.docker.com/get-docker/) + - Ensure you have [VSCode](https://code.visualstudio.com/) + - Install the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) + - `git clone` the codebase repository and open it in VSCode + - Use the `Dev Containers extension` to `Reopen in Container` (More info in the `Getting started` included with the extension) + - Your environment will finish loading once the file tree is populated and the terminal become active + +## Features + +Once you've successfully launched the development environment, you'll be able to take advantage of our developer tooling to help improve productivity. + +### Syntax Tests + +The following will verify that there are no syntax errors. + +``` +flake8 datajoint --count --select=E9,F63,F7,F82 --show-source --statistics +``` + +### Integration Tests + +The following will verify there are no regression errors by running our test suite of unit and integration tests. + +- Entire test suite: + ``` + nosetests -vw tests + ``` +- A single functional test: + ``` + nosetests -vs --tests=tests.test_external_class:test_insert_and_fetch + ``` +- A single class test: + ``` + nosetests -vs --tests=tests.test_fetch:TestFetch.test_getattribute_for_fetch1 + ``` + +### Style Tests + +The following will verify that there are no code styling errors. + +``` +flake8 --ignore=E203,E722,W503 datajoint --count --max-complexity=62 --max-line-length=127 --statistics +``` + +The following will ensure the codebase has been formatted with [black](https://black.readthedocs.io/en/stable/). + +``` +black datajoint --check -v +``` + +The following will ensure the test suite has been formatted with [black](https://black.readthedocs.io/en/stable/). + +``` +black tests --check -v +``` + +### Jupyter + +Jupyter notebooks are supported in this environment. This means that when you `import datajoint`, it will use the current state of the source. + +Be sure to see the reference documenation if you are new to [running Jupyter notebooks w/ VSCode](https://code.visualstudio.com/docs/datascience/jupyter-notebooks#_create-or-open-a-jupyter-notebook). + +### Debugger + +[VSCode Debugger](https://code.visualstudio.com/docs/editor/debugging) is a powerful tool that can really accelerate fixes. + +Try it as follows: + +- Create a python script of your choice +- `import datajoint` (This will use the current state of the source) +- Add breakpoints by adding red dots next to line numbers +- Select the `Run and Debug` tab +- Start by clicking the button `Run and Debug` + +### MySQL CLI + +It is often useful in development to connect to DataJoint's relational database backend directly using the MySQL CLI. + +Connect as follows to the database running within your developer environment: + +``` +mysql -hfakeservices.datajoint.io -uroot -psimple +``` + +### Documentation + +Our documentation is built using [MkDocs Material](https://squidfunk.github.io/mkdocs-material/). The easiest way to improve the documentation is by using the `docs/docker-compose.yaml` environment. The source can be modified in `docs/src` using markdown. + +The docs environment can be run using 3 modes: + +- **LIVE**: (*recommended*) This serves the docs locally. It supports live reloading on saves to `docs/src` files but does not support the docs version dropdown. Useful to see changes live. + ``` + MODE="LIVE" PACKAGE=datajoint UPSTREAM_REPO=https://github.com/datajoint/datajoint-python.git HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build + ``` +- **QA**: This serves the docs locally. It supports the docs version dropdown but does not support live reloading. Useful as a final check. + ``` + MODE="QA" PACKAGE=datajoint UPSTREAM_REPO=https://github.com/datajoint/datajoint-python.git HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build + ``` +- **BUILD**: This compiles the docs. Most useful for the docs deployment automation. Other modes are more useful to new contributors. + ``` + MODE="BUILD" PACKAGE=datajoint UPSTREAM_REPO=https://github.com/datajoint/datajoint-python.git HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build + ``` + +When the docs are served locally, use the VSCode `PORTS` tab (next to `TERMINAL`) to manage access to the forwarded ports. Docs are served on port `8080`. diff --git a/local-docker-compose.yml b/local-docker-compose.yml index 760f7ce37..470e5cb94 100644 --- a/local-docker-compose.yml +++ b/local-docker-compose.yml @@ -1,14 +1,14 @@ -# docker compose -f local-docker-compose.yml --env-file LNX.env up --build -version: '2.4' +# MYSQL_VER=5.7 MINIO_VER=RELEASE.2022-08-11T04-37-28Z docker compose -f local-docker-compose.yml up --build +version: "2.4" x-net: &net networks: - - main + - main services: db: <<: *net image: datajoint/mysql:${MYSQL_VER} environment: - - MYSQL_ROOT_PASSWORD=simple + - MYSQL_ROOT_PASSWORD=${DJ_PASS} # ports: # - "3306:3306" # To persist MySQL data @@ -36,15 +36,15 @@ services: <<: *net image: datajoint/nginx:v0.2.4 environment: - - ADD_db_TYPE=DATABASE - - ADD_db_ENDPOINT=db:3306 - - ADD_minio_TYPE=MINIO - - ADD_minio_ENDPOINT=minio:9000 - - ADD_minio_PORT=80 # allow unencrypted connections - - ADD_minio_PREFIX=/datajoint - - ADD_browser_TYPE=MINIOADMIN - - ADD_browser_ENDPOINT=minio:9000 - - ADD_browser_PORT=80 # allow unencrypted connections + - ADD_db_TYPE=DATABASE + - ADD_db_ENDPOINT=db:3306 + - ADD_minio_TYPE=MINIO + - ADD_minio_ENDPOINT=minio:9000 + - ADD_minio_PORT=80 # allow unencrypted connections + - ADD_minio_PREFIX=/datajoint + - ADD_browser_TYPE=MINIOADMIN + - ADD_browser_ENDPOINT=minio:9000 + - ADD_browser_PORT=80 # allow unencrypted connections ports: - "80:80" - "443:443" @@ -55,59 +55,5 @@ services: condition: service_healthy minio: condition: service_healthy - app: - <<: *net - image: datajoint/djtest:py${PY_VER}-${DISTRO} - depends_on: - fakeservices.datajoint.io: - condition: service_healthy - environment: - - DJ_HOST=fakeservices.datajoint.io - - DJ_USER=root - - DJ_PASS=simple - - DJ_TEST_HOST=fakeservices.datajoint.io - - DJ_TEST_USER=datajoint - - DJ_TEST_PASSWORD=datajoint - # If running tests locally, make sure to add entry in /etc/hosts for 127.0.0.1 fakeservices.datajoint.io - - S3_ENDPOINT=fakeservices.datajoint.io - - S3_ACCESS_KEY=datajoint - - S3_SECRET_KEY=datajoint - - S3_BUCKET=datajoint.test - - PYTHON_USER=dja - - JUPYTER_PASSWORD=datajoint - - DISPLAY - working_dir: /src - command: - - sh - - -c - - | - set -e - pip install --user nose nose-cov - pip install -e . - pip list --format=freeze | grep datajoint - ## You may run the below tests once sh'ed into container i.e. docker exec -it datajoint-python_app_1 sh - # nosetests -vsw tests; #run all tests - # nosetests -vs --tests=tests.test_external_class:test_insert_and_fetch; #run specific basic test - # nosetests -vs --tests=tests.test_fetch:TestFetch.test_getattribute_for_fetch1; #run specific Class test - # flake8 datajoint --count --select=E9,F63,F7,F82 --show-source --statistics - # flake8 --ignore=E203,E722,W503 datajoint --count --max-complexity=62 --max-line-length=127 --statistics - # black datajoint --check -v - ## Remote debugger - set +e - while true - do - python -m ptvsd --host 0.0.0.0 --port 5678 --wait . - sleep 2 - done - ports: - - "8888:8888" - - "5678:5678" - user: ${HOST_UID}:anaconda - volumes: - - .:/src - - /tmp/.X11-unix:/tmp/.X11-unix:rw - # Additional mounted notebooks may go here - # - ./notebook:/home/dja/notebooks - # - ../dj-python-101/ch1:/home/dja/tutorials networks: main: