From 8c10ef97bc54881bdd1098d6cb765d2d5842116f Mon Sep 17 00:00:00 2001 From: Mariella CC Date: Thu, 23 Nov 2023 16:18:04 +0100 Subject: [PATCH] initial commit --- .cruft.json | 18 + .envrc.disabled | 3 + .git_archival.txt | 1 + .gitattributes | 1 + .github/ISSUE_TEMPLATE/bug_report.md | 38 ++ .github/ISSUE_TEMPLATE/suggest-a-module.md | 20 + .github/workflows/build-darwin.yaml | 26 ++ .github/workflows/build-linux.yaml | 190 +++++++++ .github/workflows/build-windows.yaml | 21 + .gitignore | 67 ++++ .pre-commit-config.yaml | 58 +++ AUTHORS.md | 3 + CHANGELOG.md | 7 + LICENSE | 373 ++++++++++++++++++ MANIFEST.in | 15 + Makefile | 83 ++++ README.md | 107 +++++ ci/conda/conda-pkg-patch.yaml | 18 + commitlint.config.js | 1 + docs/SUMMARY.md | 5 + docs/development.md | 64 +++ docs/index.md | 22 ++ docs/stylesheets/extra.css | 5 + docs/usage.md | 4 + examples/data/Readme.md | 1 + examples/data/journals/JournalEdges1902.csv | 322 +++++++++++++++ examples/data/journals/JournalNodes1902.csv | 277 +++++++++++++ examples/data/journals/Readme.md | 1 + examples/jobs/Readme.md | 1 + .../jobs/example_job_topic_modelling.yaml | 3 + examples/pipelines/Readme.md | 1 + .../example_pipeline_topic_modelling.yaml | 25 ++ mkdocs.yml | 72 ++++ pixi.toml | 18 + pyproject.toml | 243 ++++++++++++ scripts/documentation/gen_api_doc_pages.py | 32 ++ scripts/documentation/gen_info_pages.py | 20 + scripts/documentation/gen_module_doc.py | 65 +++ src/kiara_plugin/topic_modelling/__init__.py | 80 ++++ .../topic_modelling/data_types.py | 4 + src/kiara_plugin/topic_modelling/models.py | 10 + .../topic_modelling/modules/__init__.py | 72 ++++ .../topic_modelling/pipelines/.gitkeep | 0 .../topic_modelling/pipelines/__init__.py | 4 + src/kiara_plugin/topic_modelling/py.typed | 0 .../topic_modelling/resources/.gitkeep | 0 tests/conftest.py | 74 ++++ .../example_job_topic_modelling/outputs.py | 22 ++ .../example_job_topic_modelling/outputs.yaml | 2 + tests/resources/.gitkeep | 0 tests/test_job_descs.py | 41 ++ tests/test_kiara_modules_default.py | 12 + 52 files changed, 2552 insertions(+) create mode 100644 .cruft.json create mode 100644 .envrc.disabled create mode 100644 .git_archival.txt create mode 100644 .gitattributes create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/suggest-a-module.md create mode 100644 .github/workflows/build-darwin.yaml create mode 100644 .github/workflows/build-linux.yaml create mode 100644 .github/workflows/build-windows.yaml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 AUTHORS.md create mode 100644 CHANGELOG.md create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 ci/conda/conda-pkg-patch.yaml create mode 100644 commitlint.config.js create mode 100644 docs/SUMMARY.md create mode 100644 docs/development.md create mode 100644 docs/index.md create mode 100644 docs/stylesheets/extra.css create mode 100644 docs/usage.md create mode 100644 examples/data/Readme.md create mode 100644 examples/data/journals/JournalEdges1902.csv create mode 100644 examples/data/journals/JournalNodes1902.csv create mode 100644 examples/data/journals/Readme.md create mode 100644 examples/jobs/Readme.md create mode 100644 examples/jobs/example_job_topic_modelling.yaml create mode 100644 examples/pipelines/Readme.md create mode 100644 examples/pipelines/example_pipeline_topic_modelling.yaml create mode 100644 mkdocs.yml create mode 100644 pixi.toml create mode 100644 pyproject.toml create mode 100644 scripts/documentation/gen_api_doc_pages.py create mode 100644 scripts/documentation/gen_info_pages.py create mode 100644 scripts/documentation/gen_module_doc.py create mode 100644 src/kiara_plugin/topic_modelling/__init__.py create mode 100644 src/kiara_plugin/topic_modelling/data_types.py create mode 100644 src/kiara_plugin/topic_modelling/models.py create mode 100644 src/kiara_plugin/topic_modelling/modules/__init__.py create mode 100644 src/kiara_plugin/topic_modelling/pipelines/.gitkeep create mode 100644 src/kiara_plugin/topic_modelling/pipelines/__init__.py create mode 100644 src/kiara_plugin/topic_modelling/py.typed create mode 100644 src/kiara_plugin/topic_modelling/resources/.gitkeep create mode 100644 tests/conftest.py create mode 100644 tests/job_tests/example_job_topic_modelling/outputs.py create mode 100644 tests/job_tests/example_job_topic_modelling/outputs.yaml create mode 100644 tests/resources/.gitkeep create mode 100644 tests/test_job_descs.py create mode 100755 tests/test_kiara_modules_default.py diff --git a/.cruft.json b/.cruft.json new file mode 100644 index 0000000..e743d96 --- /dev/null +++ b/.cruft.json @@ -0,0 +1,18 @@ +{ + "template": "https://github.com/DHARPA-Project/kiara_plugin.develop.git", + "commit": "e2d43d57283f53eaf5144915075567eff38d738a", + "checkout": null, + "context": { + "cookiecutter": { + "full_name": "Mariella De Crouy Chanel", + "email": "mariella.decrouychanel@uni.lu", + "project_name": "topic_modelling", + "project_slug": "topic_modelling", + "project_short_description": "A Kiara plugin toho create a Topic Modelling workflow.", + "github_user": "DHARPA-Project", + "anaconda_user": "dharpa", + "_template": "https://github.com/DHARPA-Project/kiara_plugin.develop.git" + } + }, + "directory": null +} diff --git a/.envrc.disabled b/.envrc.disabled new file mode 100644 index 0000000..1bb8d7c --- /dev/null +++ b/.envrc.disabled @@ -0,0 +1,3 @@ +layout python3 +# or, uncomment for specific 'base'-version of python, e.g.: +# layout python "$HOME/.asdf/installs/python/3.9.2/bin/python" diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 0000000..95cb3ee --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1 @@ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..00a7b00 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.git_archival.txt export-subst diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..dd84ea7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/suggest-a-module.md b/.github/ISSUE_TEMPLATE/suggest-a-module.md new file mode 100644 index 0000000..58ee6cd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/suggest-a-module.md @@ -0,0 +1,20 @@ +--- +name: Suggest a module +about: Suggest a new module for kiara +title: '' +labels: '' +assignees: '' + +--- + +**Module description** +A description of the module, this will be copied into the actual module later on, so make sure you explain the module purpose with kiara users in mind. Use a short one-sentence overview as the first paragraph, then use as many paragraphs as you need to explain the module purpose and what it does. + +**Inputs** +List all the inputs this module would need, along with their types, a short description, and whether they are required or should have default values. It can be assumed that a graph will always be an input, so you can skip that. + +**Outupts** +List all the outputs this module would produce, along with their types and a short description of what they are. + +**Example code** +If you have example code how to do what you are proposing to do, copy and paste it here. diff --git a/.github/workflows/build-darwin.yaml b/.github/workflows/build-darwin.yaml new file mode 100644 index 0000000..e89f567 --- /dev/null +++ b/.github/workflows/build-darwin.yaml @@ -0,0 +1,26 @@ +name: "darwin tests for 'kiara_plugin.topic_modelling'" +# This workflow is triggered on pushes to the repository. +on: [push] +env: + DEVELOPER_DIR: /Applications/Xcode_12.4.app/Contents/Developer + MACOSX_DEPLOYMENT_TARGET: 10.15 + +jobs: + test-darwin: + name: pytest on darwin + runs-on: macos-11 + strategy: + matrix: + python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v4 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v3 + - name: install kiara_plugin.topic_modelling + run: pip install -U --extra-index-url https://pypi.fury.io/dharpa/ .[all,dev_testing] + - name: display installed kiara and module package versions + run: pip list | grep kiara + - name: Test with pytest + run: make test diff --git a/.github/workflows/build-linux.yaml b/.github/workflows/build-linux.yaml new file mode 100644 index 0000000..f31bc7f --- /dev/null +++ b/.github/workflows/build-linux.yaml @@ -0,0 +1,190 @@ +name: "linux tests and documentation builds for 'kiara_plugin.topic_modelling'" +# This workflow is triggered on pushes to the repository. +on: [push] + +jobs: + +# commitlint: +# name: lint commit message +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v2 +# with: +# fetch-depth: 0 +# - uses: wagoid/commitlint-github-action@v4 + + test-linux: + name: pytest on linux + runs-on: ubuntu-latest + strategy: + matrix: + python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v4 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v3 + - name: install kiara_plugin.topic_modelling + run: pip install -U .[all,dev_testing] + - name: display installed kiara and module package versions + run: pip list | grep kiara + - name: Test with pytest + run: make test + +# Uncomment this if you have coveralls.io setup with this repo +# coverage: +# name: create and publish test coverage +# runs-on: ubuntu-latest +# steps: +# - name: "Set up Python 3.9" +# uses: actions/setup-python@v4 +# with: +# python-version: "3.9" +# - uses: actions/checkout@v3 +# - name: install kiara +# run: pip install -U .[all,dev_testing] +# - name: display installed kiara and module package versions +# run: pip list | grep kiara +# - name: Run coverage +# run: coverage run -m pytest tests +# - name: coveralls +# uses: coverallsapp/github-action@v2 + +# Uncomment this if you want to run mypy +# mypy-linux: +# name: mypy check on linux +# runs-on: ubuntu-latest +# strategy: +# matrix: +# python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] +# steps: +# - name: "Set up Python ${{ matrix.python_version }}" +# uses: actions/setup-python@v4 +# with: +# python-version: "${{ matrix.python_version }}" +# - uses: actions/checkout@v3 +# - name: install kiara_plugin.topic_modelling +# run: pip install -U .[all,dev_testing] +# - name: Test with mypy +# run: make mypy + + linting-linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: pip cache + id: pip-cache + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.*') }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -U ruff + # Include `--format=github` to enable automatic inline annotations. + - name: Run Ruff + run: ruff --output-format=github src/ + + build_python_package: + name: build python package + runs-on: ubuntu-latest + needs: + - test-linux +# - mypy-linux +# - linting-linux + steps: + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: install pip + run: pip install -U pip setuptools setuptools_scm build + - name: create packages + run: python -m build + - name: upload artifacts + uses: actions/upload-artifact@v3 + with: + name: build-dists + path: dist/ + + release_python_package: + name: publish python package to pypi + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + runs-on: ubuntu-latest + needs: + - build_python_package + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + steps: + - name: Retrieve build distributions + uses: actions/download-artifact@v3 + with: + name: build-dists + path: dist/ + - name: publish to PyPI # make sure you have pypi trusted publishing configured for this repo + uses: pypa/gh-action-pypi-publish@release/v1 + + build_and_release_conda_package: + name: conda package build (and upload if release) + runs-on: ubuntu-latest + needs: + - test-linux + # - mypy-linux # uncomment if this step is enabled + # - linting-linux # uncomment if this step is enabled + steps: + - name: "Set up Python 3.11" + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: pip cache + id: pip-cache + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.*') }} + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: install kiara + run: pip install kiara + - name: install required plugin packages + run: pip install git+https://github.com/DHARPA-Project/kiara_plugin.develop.git@develop + - name: build conda package + if: ${{ ( github.ref == 'refs/heads/develop') }} + run: kiara conda build-package --patch-data ci/conda/conda-pkg-patch.yaml . + - name: extract tag name + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: build & publish conda package + if: ${{ startsWith(github.ref, 'refs/tags/') }} + run: kiara conda build-package --publish --user dharpa --token ${{ secrets.ANACONDA_PUSH_TOKEN }} --patch-data ci/conda/conda-pkg-patch.yaml . + + merge_tag_to_main: + name: merge current tag to main branch + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + needs: + - release_python_package + - build_and_release_conda_package + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - run: git config --global user.email "markus@frkl.io" + - run: git config --global user.name "Markus Binsteiner" + - name: extract tag name + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: checkout main branch + run: git checkout main + - name: merge tag + run: git merge "${RELEASE_VERSION}" + - name: push updated main branch + run: git push https://${{ secrets.GITHUB_TOKEN }}@github.com/DHARPA-Project/kiara_plugin.topic_modelling.git diff --git a/.github/workflows/build-windows.yaml b/.github/workflows/build-windows.yaml new file mode 100644 index 0000000..0859397 --- /dev/null +++ b/.github/workflows/build-windows.yaml @@ -0,0 +1,21 @@ +name: "windows tests for 'kiara_plugin.topic_modelling'" +# This workflow is triggered on pushes to the repository. +on: [push] + +jobs: + test-windows: + name: pytest on windows + runs-on: windows-latest + strategy: + matrix: + python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v4 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v3 + - name: install kiara_plugin.topic_modelling + run: pip install -U --extra-index-url https://pypi.fury.io/dharpa/ .[all,dev_testing] + - name: Test with pytest + run: make test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..709aadb --- /dev/null +++ b/.gitignore @@ -0,0 +1,67 @@ +# Temporary and binary files +*~ +*.py[cod] +*.so +*.cfg +!.isort.cfg +!setup.cfg +*.orig +*.log +*.pot +__pycache__/* +.cache/* +.*.swp +*/.ipynb_checkpoints/* + +# Project files +.ropeproject +.project +.pydevproject +.settings +.idea +tags + +# Package files +*.egg +*.eggs/ +.installed.cfg +*.egg-info + +# Unittest and coverage +htmlcov/* +.coverage +.coverage.* +.tox +junit.xml +coverage.xml +.pytest_cache/ + +# Build and docs folder/files +/build/* +/dist/* +sdist/* +cover/* +MANIFEST + +# Per-project virtualenvs +.venv*/ +pip-wheel-metadata/ +.python-version +src/kiara_plugin/topic_modelling/version.txt +.direnv +public +site +.dephell_report +.mypy_cache +.env +docs/api-documentation.md +.frkl +.envrc +build.sh +onefile.spec +*_complete.zsh.zwc +ci/conda/**/build +ci/conda/kiara_plugin.topic_modelling/meta.yaml +.pixi +pixi.lock +dev.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..0e13d1b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,58 @@ +default_language_version: + python: python3 + +repos: + +- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook + rev: 'v9.3.0' + hooks: + - id: commitlint + stages: [commit-msg] + additional_dependencies: ['@commitlint/config-conventional'] + +- repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + +- repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v1.6.1' # Use the sha / tag you want to point at + hooks: + - id: mypy + files: "^src/" + pass_filenames: true + args: ["--config-file", "pyproject.toml", "--ignore-missing-imports"] + additional_dependencies: [pydantic>=2.0.0, rich>=10.0.0, ruamel.yaml, anyio>=3.0.0, pyzmq>=22.0.3, bidict, sqlalchemy-stubs, types-python-slugify, types-setuptools, types-python-dateutil, dag_cbor, multiformats, textual, regex, types-pytz, types-orjson] + +- repo: https://github.com/charliermarsh/ruff-pre-commit + # Ruff version. + rev: 'v0.1.4' + hooks: + - id: ruff + +- repo: https://github.com/Kludex/no-optional + rev: 0.4.0 + hooks: + - id: no_optional + +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: 'v4.3.0' + hooks: + - id: trailing-whitespace + exclude: 'setup.cfg' + - id: check-added-large-files + - id: check-ast + - id: check-json + - id: check-merge-conflict + - id: check-xml + - id: check-yaml + exclude: 'tests/\*' + - id: debug-statements + - id: end-of-file-fixer + exclude: '.*.json' + - id: requirements-txt-fixer + - id: fix-encoding-pragma + - id: mixed-line-ending + args: ['--fix=no'] + #- id: no-commit-to-branch + # args: [--branch, master] diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..3aa5309 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,3 @@ +# Contributors + +* Mariella De Crouy Chanel diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4fbf4ed --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +========= +Changelog +========= + +## Version 0.0.1 (Upcoming) + +- first release of *kiara_plugin.topic_modelling* diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a612ad9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..df3c234 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,15 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.md + +include src/kiara_plugin/topic_modelling/py.typed + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.md *.jpg *.png *.gif +recursive-include src/kiara_plugin/topic_modelling/resources * +recursive-include src/kiara_plugin/topic_modelling/pipelines * diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7bdcec2 --- /dev/null +++ b/Makefile @@ -0,0 +1,83 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +docs: ## build documentation + mkdocs build + +serve-docs: ## serve and watch documentation + mkdocs serve -a 0.0.0.0:8000 + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + rm -fr .mypy_cache + +init: clean ## initialize a development environment (to be run in virtualenv) + git init + git checkout -b develop || true + pip install -U pip + pip install --extra-index-url https://pypi.fury.io/dharpa/ -U -e '.[dev_utils]' + pre-commit install + pre-commit install --hook-type commit-msg + setup-cfg-fmt setup.cfg || true + git add "*" ".*" + pre-commit run --all-files || true + git add "*" ".*" + +update-dependencies: ## update all development dependencies + pip install -U pip + pip install --extra-index-url https://pypi.fury.io/dharpa/ -U -e '.[all_dev]' + + +setup-cfg-fmt: # format setup.cfg + setup-cfg-fmt setup.cfg || true + +black: ## run black + black --config pyproject.toml setup.py src/kiara_plugin/topic_modelling tests + +flake: ## check style with flake8 + flake8 src/kiara_plugin/topic_modelling tests + +mypy: ## run mypy + mypy --namespace-packages --explicit-package-base src/kiara_plugin/topic_modelling + +test: ## run tests quickly with the default Python + py.test + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run -m pytest tests + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +check: black flake mypy test ## run dev-related checks + +pre-commit: ## run pre-commit on all files + pre-commit run --all-files + +dist: clean ## build source and wheel packages + python setup.py sdist + python setup.py bdist_wheel + ls -l dist diff --git a/README.md b/README.md new file mode 100644 index 0000000..f5b17b2 --- /dev/null +++ b/README.md @@ -0,0 +1,107 @@ +[![PyPI status](https://img.shields.io/pypi/status/kiara_plugin.topic_modelling.svg)](https://pypi.python.org/pypi/kiara_plugin.topic_modelling/) +[![PyPI version](https://img.shields.io/pypi/v/kiara_plugin.topic_modelling.svg)](https://pypi.python.org/pypi/kiara_plugin.topic_modelling/) +[![PyPI pyversions](https://img.shields.io/pypi/pyversions/kiara_plugin.topic_modelling.svg)](https://pypi.python.org/pypi/kiara_plugin.topic_modelling/) +[![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2FDHARPA-Project%2Fkiara%2Fbadge%3Fref%3Ddevelop&style=flat)](https://actions-badge.atrox.dev/DHARPA-Project/kiara_plugin.topic_modelling/goto?ref=develop) +[![Coverage Status](https://coveralls.io/repos/github/DHARPA-Project/kiara_plugin.topic_modelling/badge.svg?branch=develop)](https://coveralls.io/github/DHARPA-Project/kiara_plugin.topic_modelling?branch=develop) +[![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black) + +# [**kiara**](https://dharpa.org/kiara.documentation) plugin: (topic_modelling) + +A Kiara plugin toho create a Topic Modelling workflow. + + - Documentation: [https://DHARPA-Project.github.io/kiara_plugin.topic_modelling](https://DHARPA-Project.github.io/kiara_plugin.topic_modelling) + - Code: [https://github.com/DHARPA-Project/kiara_plugin.topic_modelling](https://github.com/DHARPA-Project/kiara_plugin.topic_modelling) + - `kiara`: [https://dharpa.org/kiara.documentation](https://dharpa.org/kiara.documentation) + +## Description + +TODO + +## Development + +### Requirements + +- Python (version >= 3.8) +- pip, virtualenv +- git +- make (on Linux / Mac OS X -- optional) + + +### Prepare development environment + +If you only want to work on the modules, and not the core *Kiara* codebase, follow the instructions below. Otherwise, please +check the notes on how to setup a *Kiara* development environment under (TODO). + +#### Linux & Mac OS X (using make) + +For *NIX-like operating system, setting up a development environment is relatively easy: + +```console +git clone https://github.com/DHARPA-Project/kiara_plugin.topic_modelling.git +cd kiara_plugin.topic_modelling +python3 -m venv .venv +source .venv/bin/activate +make init +``` + +#### Windows (or manual pip install) + +It's impossible to lay out all the ways Python can be installed on a machine, and virtual- (or conda-)envs can be created, so I'll assume you know how to do this. +One simple way is to install the [Anaconda (individual edition)](https://docs.anaconda.com/anaconda/install/index.html), then use the Anaconda navigator to create a new environment, install the 'git' package in it (if your system does not already have it), and use the 'Open Terminal' option of that environment to start up a terminal that has that virtual-/conda-environment activated. + +Once that is done, `cd` into a directory where you want this project folder to live, and do: + +```console +# make sure your virtual env is activated!!! +git clone https://github.com/DHARPA-Project/kiara_plugin.topic_modelling.git +cd kiara_plugin.topic_modelling +pip install --extra-index-url https://pypi.fury.io/dharpa/ -U -e .[all_dev] +``` + +#### Try it out + +After this is done, you should be able to run the included example module via: + +```console +kiara run topic_modelling_example text_1="xxx" text_2="yyy" +... +... +``` + +### Re-activate the development environment + +The 'prepare' step from above only has to be done once. After that, to re-enable your virtual environment, +you'll need to navigate to the directory again (wherever that is, in your case), and run the ``source`` command from before again: + +```console +cd path/to/kiara_plugin.topic_modelling +source .venv/bin/activate # if it isn't activated already, for example by the Anaconda navigator +kiara --help # or whatever, point is, kiara should be available for you now, +``` + +### ``make`` targets (Linux & Mac OS X) + +- ``init``: init development project (install project & dev dependencies into virtualenv, as well as pre-commit git hook) +- ``update-dependencies``: update development dependencies (mainly the core ``kiara`` package from git) +- ``flake``: run *flake8* tests +- ``mypy``: run mypy tests +- ``test``: run unit tests +- ``docs``: create static documentation pages (under ``build/site``) +- ``serve-docs``: serve documentation pages (incl. auto-reload) for getting direct feedback when working on documentation +- ``clean``: clean build directories + +For details (and other, minor targets), check the ``Makefile``. + + +### Running tests + +``` console +> make test +# or +> make coverage +``` + + +## Copyright & license + +This project is MPL v2.0 licensed, for the license text please check the [LICENSE](/LICENSE) file in this repository. diff --git a/ci/conda/conda-pkg-patch.yaml b/ci/conda/conda-pkg-patch.yaml new file mode 100644 index 0000000..f361296 --- /dev/null +++ b/ci/conda/conda-pkg-patch.yaml @@ -0,0 +1,18 @@ +channels: + - conda-forge + - dharpa + +host_requirements: + - pip + - python + - setuptools<=63 + - setuptools_scm + +test: + imports: + - kiara_plugin.topic_modelling + source_files: + - tests + - examples + commands: + - kiara module list diff --git a/commitlint.config.js b/commitlint.config.js new file mode 100644 index 0000000..28fe5c5 --- /dev/null +++ b/commitlint.config.js @@ -0,0 +1 @@ +module.exports = {extends: ['@commitlint/config-conventional']} diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md new file mode 100644 index 0000000..ceb1fa6 --- /dev/null +++ b/docs/SUMMARY.md @@ -0,0 +1,5 @@ +* [Home](index.md) +* [Package contents](info/) +* [Usage](usage.md) +* [Development](development.md) +* [API reference](reference/) diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 0000000..a0dca9a --- /dev/null +++ b/docs/development.md @@ -0,0 +1,64 @@ +# Development + + +## Prepare development environment + +### Using conda (recommended) + +``` +conda create -n topic_modelling python=3.9 +conda activate topic_modelling +conda install -c conda-forge mamba # this is optional, but makes everything install related much faster, if you don't use it, replace 'mamba' with 'conda' below +mamba install -c conda-forge -c dharpa kiara +mamba install -c conda-forge -c dharpa kiara_plugin.core_types kiara_plugin.tabular # optional, adjust which plugin packages you depend on, those two are quite common +``` + +### Using Python venv + +Later, alligator. + + +## Check out the source code + +First, fork the [kiara_plugin.topic_modelling](https://github.com/DHARPA-Project/kiara_plugin.topic_modelling) repository into your personal Github account. + +Then, use the resulting url (in my case: https://github.com/makkus/kiara_modules.topic_modelling.git) to clone the repository locally: + +``` +https://github.com//kiara_plugin.topic_modelling +``` + +## Install the kiara plugin package into it + +``` +cd kiara_plugin.topic_modelling +pip install -e '.[all_dev]' +``` + +Here we use the `-e` option for the `pip install` command. This installs the local folder as a package in development mode into the current environment. Development mode makes it so that if you change any of the files in this folder, the Python environment will pick it up automatically, and whenever you run anything in this environment the latest version of your code/files are used. + +We also install a few additional requirements (the `[all_dev]` part in the command above) that are not strictly necessary for `kiara` itself, or this package, but help with various development-related tasks. + +## Install some pre-commit check tooling (optional) + +This step is optional, but helps with keeping the code clean and CI from failing. By installing [pre-commit](https://pre-commit.com/) hooks like here, +whenever you do a `git commit` in this repo, a series of checks and cleanup tasks are run, until everything is in a state +that will hopefully make Github Actions not complain when you push your changes. + +``` +pre-commit install +pre-commit install --hook-type commit-msg +``` + +In addition to some Python-specific checks and cleanup tasks, this will also check your commit message so it's in line with the suggested format: +https://www.conventionalcommits.org/en/v1.0.0/ + +## Run kiara + +To check if everything works as expected and you can start adding/changing code in this repository, run any `kiara` command: + +``` +kiara operation list -t topic_modelling +``` + +If everything is set up correctly, the output of this command should contain a few operations that are implemented in this repository. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..c78ef9e --- /dev/null +++ b/docs/index.md @@ -0,0 +1,22 @@ +# [**kiara**](https://dharpa.org/kiara.documentation) plugin: topic_modelling + +This package contains a set of commonly used/useful modules, pipelines, types and metadata schemas for [*Kiara*](https://github.com/DHARPA-project/kiara). + +## Description + +A Kiara plugin toho create a Topic Modelling workflow. + +## Package content + +{% for item_type, item_group in get_context_info().get_all_info().items() %} + +### {{ item_type }} +{% for item, details in item_group.item_infos.items() %} +- [`{{ item }}`][kiara_info.{{ item_type }}.{{ item }}]: {{ details.documentation.description }} +{% endfor %} +{% endfor %} + +## Links + + - Documentation: [https://DHARPA-Project.github.io/kiara_plugin.topic_modelling](https://DHARPA-Project.github.io/kiara_plugin.topic_modelling) + - Code: [https://github.com/DHARPA-Project/kiara_plugin.topic_modelling](https://github.com/DHARPA-Project/kiara_plugin.topic_modelling) diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..10031fe --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,5 @@ +div.doc-contents:not(.first) { + padding-left: 25px; + border-left: .05rem solid var(--md-default-fg-color--lightest); + margin-bottom: 80px; +} diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..186c672 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,4 @@ +# Usage + + +TO BE DONE diff --git a/examples/data/Readme.md b/examples/data/Readme.md new file mode 100644 index 0000000..6923952 --- /dev/null +++ b/examples/data/Readme.md @@ -0,0 +1 @@ +A folder to place example data that is relevant for this plugin. It can be used subsequently for unit tests, and in documentation generation. diff --git a/examples/data/journals/JournalEdges1902.csv b/examples/data/journals/JournalEdges1902.csv new file mode 100644 index 0000000..6439db7 --- /dev/null +++ b/examples/data/journals/JournalEdges1902.csv @@ -0,0 +1,322 @@ +Source,Target,weight +1,1,11 +1,5,1 +1,7,6 +1,8,15 +1,10,24 +1,13,1 +1,14,2 +1,15,8 +1,18,7 +1,20,48 +1,21,7 +1,22,4 +1,23,75 +1,24,1 +1,26,8 +1,29,1 +1,30,14 +1,35,16 +1,36,23 +1,37,4 +1,38,5 +1,39,4 +1,40,10 +1,41,2 +1,42,4 +1,43,2 +1,44,1 +1,45,5 +1,46,7 +1,47,2 +1,56,1 +1,58,34 +1,61,9 +1,63,12 +1,71,3 +1,72,9 +1,73,3 +1,74,5 +1,75,6 +1,76,2 +1,78,1 +1,79,8 +1,81,1 +1,83,1 +1,84,1 +1,85,1 +1,86,1 +1,87,1 +1,88,1 +1,89,1 +1,90,2 +1,91,1 +1,92,1 +1,258,1 +3,20,1 +3,35,1 +20,1,9 +20,3,1 +20,7,8 +20,8,7 +20,9,9 +20,21,6 +20,23,2 +20,24,1 +20,26,4 +20,35,1 +20,36,1 +20,40,1 +20,41,6 +20,43,2 +20,46,4 +20,47,1 +20,58,1 +20,64,5 +20,72,1 +20,78,2 +20,79,12 +20,82,3 +20,83,2 +20,88,1 +20,90,5 +20,100,4 +20,104,1 +20,105,1 +20,109,1 +20,118,6 +20,120,4 +20,131,1 +20,167,2 +20,208,1 +20,210,4 +20,273,1 +20,280,2 +20,281,1 +20,282,2 +20,283,1 +20,284,1 +20,285,1 +20,287,4 +20,288,2 +20,289,1 +20,290,1 +20,291,1 +20,292,1 +20,293,1 +23,1,6 +23,5,2 +23,7,6 +23,8,1 +23,9,1 +23,10,2 +23,13,1 +23,14,1 +23,18,8 +23,20,3 +23,22,3 +23,26,1 +23,33,1 +23,39,2 +23,40,9 +23,42,2 +23,56,1 +23,63,3 +23,64,1 +23,72,1 +23,74,1 +23,75,1 +23,87,1 +23,90,1 +23,92,1 +23,100,2 +23,101,1 +23,115,1 +23,118,3 +23,147,1 +23,161,1 +23,274,3 +23,275,2 +23,276,1 +23,277,1 +23,279,2 +23,294,1 +23,295,1 +23,296,1 +23,297,1 +23,298,2 +23,299,3 +23,300,1 +23,301,1 +23,302,2 +26,1,25 +26,3,7 +26,7,37 +26,8,15 +26,9,11 +26,15,45 +26,18,9 +26,19,15 +26,20,17 +26,21,22 +26,23,30 +26,36,26 +26,40,28 +26,46,29 +26,47,8 +26,58,18 +26,62,31 +26,63,15 +26,64,22 +26,79,9 +26,90,14 +26,104,11 +26,106,11 +26,115,19 +26,131,12 +26,208,5 +26,293,8 +26,305,10 +26,306,29 +27,1,14 +27,2,1 +27,3,1 +27,4,1 +27,5,1 +27,6,4 +27,7,1 +27,8,2 +27,9,2 +27,10,4 +27,13,1 +27,14,4 +27,15,5 +27,16,3 +27,17,4 +27,18,3 +27,19,1 +27,20,2 +27,21,1 +27,22,2 +27,23,2 +27,24,2 +27,25,2 +27,26,1 +27,28,1 +27,29,1 +27,30,1 +27,31,1 +27,33,1 +27,34,1 +27,58,2 +27,80,1 +27,82,2 +27,92,1 +27,109,1 +27,125,6 +27,127,7 +27,129,2 +27,130,9 +27,132,8 +27,134,4 +27,135,3 +27,139,3 +27,145,4 +27,146,4 +27,156,5 +27,157,1 +27,182,3 +27,186,1 +27,215,2 +27,217,3 +27,219,1 +27,220,1 +27,226,1 +27,227,2 +27,228,2 +27,230,5 +27,235,5 +27,236,7 +27,237,1 +27,241,12 +27,254,1 +27,259,1 +27,262,7 +27,263,1 +27,264,1 +27,265,1 +27,266,1 +27,267,1 +27,268,1 +27,269,2 +27,270,1 +27,271,3 +27,272,1 +47,26,1 +51,1,17 +51,4,1 +51,6,1 +51,7,13 +51,8,4 +51,10,9 +51,14,6 +51,15,11 +51,16,1 +51,18,5 +51,19,5 +51,20,5 +51,21,7 +51,22,4 +51,23,14 +51,26,4 +51,29,1 +51,32,1 +51,35,4 +51,38,1 +51,44,5 +51,45,1 +51,47,4 +51,52,1 +51,56,14 +51,58,1 +51,60,2 +51,61,1 +51,62,20 +51,63,11 +51,64,3 +51,67,1 +51,73,3 +51,79,1 +51,84,1 +51,90,1 +51,93,1 +51,94,3 +51,95,2 +51,96,3 +51,97,3 +51,98,12 +51,99,1 +51,100,6 +51,101,4 +51,102,3 +51,103,2 +51,104,2 +51,105,3 +51,106,3 +51,107,1 +51,108,1 +51,109,5 +51,110,1 +51,111,1 +51,112,1 +51,113,1 +51,114,2 +51,115,2 +51,116,1 +51,118,3 +51,119,2 +51,120,1 +51,121,1 +63,102,1 +147,27,11 +147,241,1 diff --git a/examples/data/journals/JournalNodes1902.csv b/examples/data/journals/JournalNodes1902.csv new file mode 100644 index 0000000..cf6ce92 --- /dev/null +++ b/examples/data/journals/JournalNodes1902.csv @@ -0,0 +1,277 @@ +Id,Label,JournalType,City,CountryNetworkTime,PresentDayCountry,Latitude,Longitude,Language +75,Psychiatrische en neurologische bladen,specialized: psychiatry and neurology,Amsterdam,Netherlands,Netherlands,52.366667,4.9,Dutch +36,The American Journal of Insanity,specialized: psychiatry and neurology,Baltimore,United States,United States,39.289444,-76.615278,English +208,The American Journal of Psychology,specialized: psychology,Baltimore,United States,United States,39.289444,-76.615278,English +295,Die Krankenpflege,specialized: therapy,Berlin,German Empire,Germany,52.52,13.405,German +296,Die deutsche Klinik am Eingange des zwanzigsten Jahrhunderts,general medicine,Berlin,German Empire,Germany,52.52,13.405,German +300,Therapeutische Monatshefte,specialized: therapy,Berlin,German Empire,Germany,52.52,13.405,German +1,Allgemeine Zeitschrift für Psychiatrie,specialized: psychiatry and neurology,Berlin,German Empire,Germany,52.52,13.405,German +7,Archiv für Psychiatrie und Nervenkrankheiten,specialized: psychiatry and neurology,Berlin,German Empire,Germany,52.52,13.405,German +10,Berliner klinische Wochenschrift,general medicine,Berlin,German Empire,Germany,52.52,13.405,German +13,Charité Annalen,general medicine,Berlin,German Empire,Germany,52.52,13.405,German +21,Monatsschrift für Psychiatrie und Neurologie,specialized: psychiatry and neurology,Berlin,German Empire,Germany,52.52,13.405,German +29,Virchows Archiv,"specialized: anatomy, physiology and pathology",Berlin,German Empire,Germany,52.52,13.405,German +31,Zeitschrift für pädagogische Psychologie und Pathologie,specialized: psychology and pedagogy,Berlin,German Empire,Germany,52.52,13.405,German +42,Vierteljahrsschrift für gerichtliche Medizin und öffentliches Sanitätswesen,"specialized: anthropology, criminology and forensics",Berlin,German Empire,Germany,52.52,13.405,German +47,Centralblatt für Nervenheilkunde und Psychiatrie,specialized: psychiatry and neurology,Berlin,German Empire,Germany,52.52,13.405,German +50,Russische medicinische Rundschau,general medicine,Berlin,German Empire,Germany,52.52,13.405,German +76,Deutsche Aerzte-Zeitung,general medicine,Berlin,German Empire,Germany,52.52,13.405,German +87,Monatsschrift für Geburtshülfe und Gynäkologie,specialized: gynecology,Berlin,German Empire,Germany,52.52,13.405,German +108,Archiv für klinische Chirurgie,specialized: surgery,Berlin,German Empire,Germany,52.52,13.405,German +113,Zeitschrift für klinische Medicin,general medicine,Berlin,German Empire,Germany,52.52,13.405,German +159,Deutsche militärärztliche Zeitschrift,specialized: military medicine,Berlin,German Empire,Germany,52.52,13.405,German +162,Jahresbericht über die Leistungen und Fortschritte auf dem Gebiete der Neurologie und Psychiatrie,specialized: psychiatry and neurology,Berlin,German Empire,Germany,52.52,13.405,German +192,Ärztliche Sachverständigen-Zeitung,general medicine,Berlin,German Empire,Germany,52.52,13.405,German +198,Zeitschrift für die Behandlung Schwachsinniger und Epileptischer,specialized: psychiatry and neurology,Berlin,German Empire,Germany,52.52,13.405,German +258,Der Pfarrbote,news media,Berlin,German Empire,Germany,52.52,13.405,German +71,Correspondenz-Blatt für Schweizer Aerzte,general medicine,Bern,Switzerland,Switzerland,46.948056,7.4475,German +6,Archiv für mikroskopische Anatomie,"specialized: anatomy, physiology and pathology",Bonn,German Empire,Germany,50.733333,7.1,German +203,The Journal of Abnormal Psychology,specialized: psychology,Boston,United States,United States,42.358056,-71.063611,English +273,"Correspondenz-Blatt der Deutschen Gesellschaft für Anthropologie, Ethnologie und Urgeschichte","specialized: anthropology, criminology and forensics",Braunschweig,German Empire,Germany,52.266667,10.516667,German +303,Policlinique de Bruxelles,general medicine,Brussels,Belgium,Belgium,50.85,4.35,French +306,Annales de la Société Belge de Neurologie,specialized: psychiatry and neurology,Brussels,Belgium,Belgium,50.85,4.35,French +19,Journal de neurologie,specialized: psychiatry and neurology,Brussels,Belgium,Belgium,50.85,4.35,French +25,"Revue internationale d'électrothérapie, de physiologie, de médecine, de chirurgie, d'obstétrique, de thérapeutique, de chimie et de pharmacie",general medicine,Brussels,Belgium,Belgium,50.85,4.35,French +35,Bulletin de la Société de Médecine Mentale de Belgique,specialized: psychiatry and neurology,Brussels,Belgium,Belgium,50.85,4.35,French +299,Gyógyászat,general medicine,Budapest,Austro-Hungarian Empire,Hungary,47.4925,19.051389,Hungarian +80,Orvosi hetilap,general medicine,Budapest,Austro-Hungarian Empire,Hungary,47.4925,19.051389,Hungarian +204,Elme és idegkórtan,specialized: psychiatry and neurology,Budapest,Austro-Hungarian Empire,Hungary,47.4925,19.051389,Hungarian +275,Pester medizinisch-chirurgische Presse,general medicine,Budapest,Austro-Hungarian Empire,Hungary,47.4925,19.051389,German +129,千葉醫學專門學校校友雜誌,general medicine,Chiba,Japanese Empire,Japan,35.607278,140.106361,Japanese +110,The Journal of the American Medical Association,general medicine,Chicago,United States,United States,41.881944,-87.627778,English +174,Medical Century,general medicine,Chicago,United States,United States,41.881944,-87.627778,English +34,Dresdner Nachrichten,news media,Dresden,German Empire,Germany,51.033333,13.733333,German +167,The Scottish Medical and Surgical Journal,specialized: surgery,Edinburgh,British Empire,United Kingdom,55.953,-3.189,English +161,Giornale di psichiatria clinica e tecnica manicomiale,specialized: psychiatry and neurology,Ferrara,Italy,Italy,44.833333,11.616667,Italian +64,Rivista di patología nervosa e mentale,specialized: psychiatry and neurology,Florence,Italy,Italy,43.771389,11.254167,Italian +263,福井県医学会雑誌,general medicine,Fukui,Japanese Empire,Japan,36.064056,136.219583,Japanese +247,福岡醫科大學雜誌,general medicine,Fukuoka,Japanese Empire,Japan,33.583333,130.4,Japanese +262,杏林之栞,general medicine,Fukuoka,Japanese Empire,Japan,33.583333,130.4,Japanese +39,Archives de Psychologie de la Suisse Romande,specialized: psychology,Geneva,Switzerland,Switzerland,46.2,6.15,French +70,Annales de la Société de médecine de Gand,general medicine,Ghent,Belgium,Belgium,51.053611,3.725278,French +289,Glasgow medical journal,general medicine,Glasgow,British Empire,United Kingdom,55.860916,-4.251433,English +40,Psychiatrische Wochenschrift,specialized: psychiatry and neurology,Halle (Saale),German Empire,Germany,51.482778,11.969722,German +117,Klinik für psychische und nervöse Krankheiten,specialized: psychiatry and neurology,Halle (Saale),German Empire,Germany,51.482778,11.969722,German +46,Zeitschrift für Psychologie und Physiologie der Sinnesorgane,specialized: psychology,Hamburg,German Empire,Germany,53.565278,10.001389,German +282,The Quarterly Journal of Inebriety,specialized: alcoholism,Hartford (Connecticut),United States,United States,41.7625,-72.674167,English +74,Der Irrenfreund,specialized: psychiatry and neurology,Heilbronn,German Empire,Germany,49.15,9.216667,German +186,廣島衛生醫事月報,general medicine,Hiroshima,Japanese Empire,Japan,34.383333,132.45,Japanese +2,Anatomischer Anzeiger,"specialized: anatomy, physiology and pathology",Jena,German Empire,Germany,50.927222,11.586111,German +12,Centralblatt für allgemeine Pathologie und pathologische Anatomie,"specialized: anatomy, physiology and pathology",Jena,German Empire,Germany,50.927222,11.586111,German +188,Zeitschrift für Allgemeine Physiologie,"specialized: anatomy, physiology and pathology",Jena,German Empire,Germany,50.927222,11.586111,German +187,金澤醫學會會報,general medicine,Kanazawa,Japanese Empire,Japan,36.561056,136.656417,Japanese +189,十全会雜誌,general medicine,Kanazawa,Japanese Empire,Japan,36.561056,136.656417,Japanese +73,Неврологический вестник,specialized: psychiatry and neurology,Kazan,Russian Empire,Russia,55.796389,49.108889,Russian +121,Казанский медицинский журнал,general medicine,Kazan,Russian Empire,Russia,55.796389,49.108889,Russian +49,Русский журнал кожных и венерических болезней,specialized: veneral diseases,Kharkiv,Russian Empire,Ukraine,50.004444,36.231389,Russian +217,兵庫県醫學會雜誌,general medicine,Kobe,Japanese Empire,Japan,34.69,135.195556,Japanese +59,Сибирские врачебные ведомости,general medicine,Krasnoyarsk,Russian Empire,Russia,56.016667,92.866667,Russian +213,鎮西醫報,general medicine,Kumamoto,Japanese Empire,Japan,32.803056,130.707778,Japanese +267,鎭西醫報,general medicine,Kumamoto,Japanese Empire,Japan,32.803056,130.707778,Japanese +122,Вопросы нервно-психической медицыны,specialized: psychiatry and neurology,Kyiv,Russian Empire,Ukraine,50.45,30.523333,Russian +226,京都醫事衛生誌,general medicine,Kyoto,Japanese Empire,Japan,35.011667,135.768333,Japanese +227,京都醫學専門学校校友会雜誌,general medicine,Kyoto,Japanese Empire,Japan,35.011667,135.768333,Japanese +228,京都醫學会雜誌,general medicine,Kyoto,Japanese Empire,Japan,35.011667,135.768333,Japanese +248,耳鼻咽喉科京都臨床,specialized: oto-rhino-laryngology,Kyoto,Japanese Empire,Japan,35.011667,135.768333,Japanese +17,Die Kinderfehler,specialized: pedagogy and pediatrics,Langensalza,German Empire,Germany,51.108056,10.646667,German +276,Revue médicale de la Suisse romande,general medicine,Lausanne,Switzerland,Switzerland,46.519833,6.6335,French +4,Archiv für Anatomie und Physiologie. Anatomische Abteilung,"specialized: anatomy, physiology and pathology",Leipzig,German Empire,Germany,51.333333,12.383333,German +5,Archiv für Kriminal-Anthropologie und Kriminalistik,"specialized: anthropology, criminology and forensics",Leipzig,German Empire,Germany,51.333333,12.383333,German +11,Biologisches Centralblatt,specialized: biology,Leipzig,German Empire,Germany,51.333333,12.383333,German +15,Deutsche Zeitschrift für Nervenheilkunde,specialized: psychiatry and neurology,Leipzig,German Empire,Germany,51.333333,12.383333,German +16,Deutsches Archiv für klinische Medicin,general medicine,Leipzig,German Empire,Germany,51.333333,12.383333,German +23,Neurologisches Centralblatt,specialized: psychiatry and neurology,Leipzig,German Empire,Germany,51.333333,12.383333,German +32,Archiv für Anatomie und Physiologie. Physiologische Abteilung,"specialized: anatomy, physiology and pathology",Leipzig,German Empire,Germany,51.333333,12.383333,German +82,Psychologische Arbeiten,specialized: psychology,Leipzig,German Empire,Germany,51.333333,12.383333,German +85,Philosophische Studien,specialized: psychology,Leipzig,German Empire,Germany,51.333333,12.383333,German +96,Monatsschrift für Unfallheilkunde,specialized: surgery,Leipzig,German Empire,Germany,51.333333,12.383333,German +99,Centralblatt für innere Medicin,general medicine,Leipzig,German Empire,Germany,51.333333,12.383333,German +111,Monatshefte für praktische Dermatolgie,specialized: dermatology,Leipzig,German Empire,Germany,51.333333,12.383333,German +124,Journal für Psychologie und Neurologie,specialized: psychology and neurology,Leipzig,German Empire,Germany,51.333333,12.383333,German +171,Schmidt's Jahrbücher der in- und ausländischen gesammten Medicin,general medicine,Leipzig,German Empire,Germany,51.333333,12.383333,German +190,Archiv für die gesamte Psychologie,specialized: psychology,Leipzig,German Empire,Germany,51.333333,12.383333,German +200,Zeitschrift für Bahn- und Bahnkassenäzte,general medicine,Leipzig,German Empire,Germany,51.333333,12.383333,German +211,Vierteljahrsschrift für wissenschaftliche Philosophie,specialized: psychology,Leipzig,German Empire,Germany,51.333333,12.383333,German +260,Zeitschrift für Schulgesundheitspflege,specialized: pedagogy and pediatrics,Leipzig,German Empire,Germany,51.333333,12.383333,German +106,Le Névraxe,specialized: psychiatry and neurology,Leuven,Belgium,Belgium,50.883333,4.7,French +297,Proceedings of the Society for Psychical Research,specialized: psychology,London,British Empire,United Kingdom,51.507222,-0.1275,English +20,The journal of mental science,specialized: psychiatry and neurology,London,British Empire,United Kingdom,51.507222,-0.1275,English +28,The Journal of Physiology,"specialized: anatomy, physiology and pathology",London,British Empire,United Kingdom,51.507222,-0.1275,English +83,British Medical Journal,general medicine,London,British Empire,United Kingdom,51.507222,-0.1275,English +105,The Lancet,general medicine,London,British Empire,United Kingdom,51.507222,-0.1275,English +115,Brain. A journal of neurology,specialized: psychiatry and neurology,London,British Empire,United Kingdom,51.507222,-0.1275,English +283,"Philosophical transactions of the Royal Society of London. Series B, Biological sciences",specialized: biology,London,British Empire,United Kingdom,51.507222,-0.1275,English +3,Archives d'anthropologie criminelle,"specialized: anthropology, criminology and forensics",Lyon,France,France,45.76,4.84,French +304,Revista de medicina y cirugía prácticas,specialized: surgery,Madrid,Spain,Spain,40.416667,-3.716667,Spanish +244,茨城県医学会会誌,general medicine,Mito,Japanese Empire,Japan,36.365833,140.47125,Japanese +51,Журнал невропатологии и психиатрии имени С. С. Корсакова,specialized: psychiatry and neurology,Moscow,Russian Empire,Russia,55.755833,37.617222,Russian +69,Современная психиатрия,specialized: psychiatry and neurology,Moscow,Russian Empire,Russia,55.755833,37.617222,Russian +102,Медицинское обозрение,general medicine,Moscow,Russian Empire,Russia,55.755833,37.617222,Russian +196,Вопросы философии и психологии,specialized: psychology,Moscow,Russian Empire,Russia,55.755833,37.617222,Russian +22,Münchener medizinische Wochenschrift,general medicine,Munich,German Empire,Germany,48.133333,11.566667,German +33,Zeitschrift für praktische Ärzte,general medicine,Munich,German Empire,Germany,48.133333,11.566667,German +128,研瑶會雜誌,general medicine,Nagasaki,Japanese Empire,Japan,32.744722,129.873611,Japanese +156,中央醫學會雜誌,general medicine,Nagoya,Japanese Empire,Japan,35.183333,136.9,Japanese +212,愛知県立醫學専門學校同窓會雜誌,general medicine,Nagoya,Japanese Empire,Japan,35.183333,136.9,Japanese +225,好生館醫事研究会雜誌,general medicine,Nagoya,Japanese Empire,Japan,35.183333,136.9,Japanese +224,九州沖縄醫學會雜誌,general medicine,Naha,Japanese Empire,Japan,26.212222,127.679167,Japanese +290,Bollettino della societa ginecol. Di napoli,specialized: gynecology,Naples,Italy,Italy,40.845,14.258333,Italian +41,"Rivista mensile di psichiatria forense, antropologia criminale, e scienze affini","specialized: anthropology, criminology and forensics",Naples,Italy,Italy,40.845,14.258333,Italian +89,La Riforma medica,general medicine,Naples,Italy,Italy,40.845,14.258333,Italian +90,Il Manicomio,specialized: psychiatry and neurology,Naples,Italy,Italy,40.845,14.258333,Italian +284,Studies from the Yale Psychological Laboratory,specialized: psychology,New Haven (Connecticut),United States,United States,41.31,-72.923611,English +302,Archives of pediatrics : a monthly devoted to the diseases of infants and children,specialized: pedagogy and pediatrics,New York,United States,United States,40.71274,-74.005974,English +305,The Journal of Mental Pathology,specialized: psychiatry and neurology,New York,United States,United States,40.71274,-74.005974,English +24,New-York Medical Journal,general medicine,New York,United States,United States,40.71274,-74.005974,English +58,The Journal of Nervous and Mental Disease,specialized: psychiatry and neurology,New York,United States,United States,40.71274,-74.005974,English +91,The Medical Record,general medicine,New York,United States,United States,40.71274,-74.005974,English +131,The Psychological Review,specialized: psychology,New York,United States,United States,40.71274,-74.005974,English +259,The journal of comparative neurology,specialized: psychiatry and neurology,New York,United States,United States,40.71274,-74.005974,English +135,北越醫會會報,general medicine,Niigata,Japanese Empire,Japan,37.916111,139.036389,Japanese +37,Friedreich's Blätter für gerichtliche Medicin und Sanitätspolizei,"specialized: anthropology, criminology and forensics",Nuremberg,German Empire,Germany,49.45,11.083333,German +143,岡山醫學會雜誌,general medicine,Okayama,Japanese Empire,Japan,34.65,133.916667,Japanese +235,大阪醫學會雜誌,general medicine,Osaka,Japanese Empire,Japan,34.693889,135.502222,Japanese +266,緒方病院医事會報,general medicine,Osaka,Japanese Empire,Japan,34.693889,135.502222,Japanese +268,通俗衛生,specialized: hygiene,Osaka,Japanese Empire,Japan,34.693889,135.502222,Japanese +279,Norsk magasin för laegevidenskaben,general medicine,Oslo,Norway,Norway,59.913889,10.752222,Norwegian +298,Revue mensuelle des maladies de l'enfance,specialized: pedagogy and pediatrics,Paris,France,France,48.856613,2.352222,French +8,Archives de neurologie,specialized: psychiatry and neurology,Paris,France,France,48.856613,2.352222,French +38,Annales d’hygiène publique et de médecine légale,"specialized: anthropology, criminology and forensics",Paris,France,France,48.856613,2.352222,French +43,Revue Scientifique,natural sciences,Paris,France,France,48.856613,2.352222,French +62,Revue neurologique,specialized: psychiatry and neurology,Paris,France,France,48.856613,2.352222,French +63,Annales médico-psychologiques,specialized: psychology,Paris,France,France,48.856613,2.352222,French +78,Congrès international de médecine. Comptes rendus,general medicine,Paris,France,France,48.856613,2.352222,French +79,Revue de Psychiatrie,specialized: psychiatry and neurology,Paris,France,France,48.856613,2.352222,French +84,Archives générales de médecine,general medicine,Paris,France,France,48.856613,2.352222,French +94,La Presse médicale,general medicine,Paris,France,France,48.856613,2.352222,French +97,Gazette hebdomadaire de médecine et de chirurgie,specialized: surgery,Paris,France,France,48.856613,2.352222,French +98,Bulletins et mémoires de la Société médicale des hôpitaux de Paris.,general medicine,Paris,France,France,48.856613,2.352222,French +100,Gazette des hôpitaux,general medicine,Paris,France,France,48.856613,2.352222,French +101,Revue de médecine,general medicine,Paris,France,France,48.856613,2.352222,French +107,Bulletin de l'Académie de médecine,general medicine,Paris,France,France,48.856613,2.352222,French +109,Nouvelle iconographie de la Salpêtrière,specialized: psychiatry and neurology,Paris,France,France,48.856613,2.352222,French +112,Archives de médecine expérimentale et d'anatomie pathologique,"specialized: anatomy, physiology and pathology",Paris,France,France,48.856613,2.352222,French +118,Le progrès médical,general medicine,Paris,France,France,48.856613,2.352222,French +119,"Bulletin général de thérapeutique médicale, chirurgicale, obstétricale et pharmaceutique",general medicine,Paris,France,France,48.856613,2.352222,French +120,Revue de l'hypnotisme et de psychologie physiologique,specialized: psychology,Paris,France,France,48.856613,2.352222,French +170,La Semaine médicale,general medicine,Paris,France,France,48.856613,2.352222,French +194,L' Encéphale,specialized: psychiatry and neurology,Paris,France,France,48.856613,2.352222,French +210,Revue Philosophique de la France et de l'Etranger,philosophy,Paris,France,France,48.856613,2.352222,French +274,Revue de psychologie clinique & thérapeutique,specialized: psychology,Paris,France,France,48.856613,2.352222,French +281,Comptes rendus des séances et mémoires de la Société de Biologie,specialized: biology,Paris,France,France,48.856613,2.352222,French +286,The Medical news: a weekly medical journal,general medicine,Philadelphia,United States,United States,39.952778,-75.163611,English +287,The American journal of the medical sciences,general medicine,Philadelphia,United States,United States,39.952778,-75.163611,English +92,Philadelphia Medical Journal,general medicine,Philadelphia,United States,United States,39.952778,-75.163611,English +294,Archives italiennes de biologie,specialized: biology,Pisa,Italy,Italy,43.716667,10.4,Italian +285,La Clinica moderna,specialized: surgery,Pisa,Italy,Italy,43.716667,10.4,Italian +278,Prager medizinische Wochenschrift,general medicine,Prague,Austro-Hungarian Empire,Czech Republic,50.083333,14.416667,German +26,Rivista Sperimentale di Freniatria,specialized: psychiatry and neurology,Reggio Emilia,Italy,Italy,44.7,10.633333,Italian +291,Atti della Società Romana di Antropologia,"specialized: anthropology, criminology and forensics",Rome,Italy,Italy,41.883333,12.5,Italian +88,Rivista mensile di neuropatologia e psichiatria,specialized: psychiatry and neurology,Rome,Italy,Italy,41.883333,12.5,Italian +52,Русский врач,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +54,Военное время,news media,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +55,Военно-медицинский журнал,specialized: military medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +56,Обозрение психиатрии,specialized: psychiatry and neurology,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +57,Практическая медицина,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +60,"Русский архив патологии, клинической медицины и бактериологии",specialized: pathology and bacteriology,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +67,Врачебная газета,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +95,Русский медицинский вестник,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +103,Врач,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +116,"Медицынские прибавления к ""Морскому сборнику""",specialized: military medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +123,Психиатрическая газета,specialized: psychiatry and neurology,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +178,Новое время,news media,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +193,Практический врач,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +197,Новые идеи в философии,philosophy,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,Russian +201,Petersburger medizinische Wochenschrift,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,German +209,Revue der russischen medizinischen Zeitschriften,general medicine,Saint Petersburg,Russian Empire,Russia,59.9375,30.308611,German +240,東北醫學會會報,general medicine,Sendai,Japanese Empire,Japan,38.268222,140.869417,Japanese +140,静岡県醫學會會報,general medicine,Shizuoka,Japanese Empire,Japan,34.975556,138.382778,Japanese +292,American journal of dermatology and genito-urinary diseases,specialized: dermatology,St. Louis,United States,United States,38.627222,-90.197778,English +61,The Alienist and Neurologist,specialized: psychiatry and neurology,St. Louis,United States,United States,38.627222,-90.197778,English +14,Deutsche medizinische Wochenschrift,general medicine,Stuttgart,German Empire,Germany,48.782,9.184,German +114,Beiträge zur pathologischen Anatomie und zur allgemeinen Pathologie,"specialized: anatomy, physiology and pathology",Stuttgart,German Empire,Germany,48.782,9.184,German +280,Zeitschrift für Morphologie und Anthropologie,"specialized: anthropology, criminology and forensics",Stuttgart,German Empire,Germany,48.782,9.184,German +238,台湾醫學會雜誌,general medicine,Taipei,Japanese Empire,Taiwan,25.066667,121.516667,Japanese +265,台湾医事雑誌,general medicine,Taipei,Japanese Empire,Taiwan,25.066667,121.516667,Japanese +27,神經學雜誌,specialized: psychiatry and neurology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +125,國家醫學會雜誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +127,醫事新聞,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +130,中外醫事新報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +132,東京醫事新誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +133,産科婦人科學會雜誌,specialized: gynecology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +134,順天堂醫事研究會雜誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +136,済生學舎醫事新報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +137,治療新報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +138,中央醫事新報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +139,軍醫學會雜誌,specialized: military medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +141,日本醫學,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +142,人性,sociology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +144,日本私立衛生会雜誌,specialized: hygiene,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +145,成醫會月報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +146,大日本耳鼻咽喉科会会報,specialized: oto-rhino-laryngology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +147,Neurologia,specialized: psychiatry and neurology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,German +148,醫學中央雜誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +152,明治三十七八年戦役陸軍衛生史,specialized: military medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +157,大日本私立衛生会雜誌,specialized: hygiene,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +158,第二回日本聯合醫學會會誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +179,東洋學藝雜誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +180,日新醫學,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +181,第三回日本醫學會誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +182,婦人衛生雜誌,specialized: gynecology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +183,學校衛生,specialized: hygiene,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +184,藝備醫事,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +214,眼科臨床醫報,specialized: ophtamology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +215,皮膚科櫃泌尿器科雜誌,specialized: dermatology and urology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +216,北海醫報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +218,醫海時報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +219,児童研究,specialized: pedagogy and pediatrics,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +220,児科雜誌,specialized: pedagogy and pediatrics,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +221,鹿児島県醫學會雜誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +222,海軍醫事報告撮要,specialized: military medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +223,官報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +229,日本婦人科学会雜誌,specialized: gynecology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +230,日本眼科学会雜誌,specialized: ophtamology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +231,日本外科学会雜誌,specialized: surgery,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +232,日本醫事週報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +233,日本内科學會會誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +234,日本消化機病雜誌,specialized: gastroenterology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +236,細菌學雜誌,specialized: bacteriology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +237,歯科学報,specialized: dentistry,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +239,哲學雜誌,philosophy,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +241,東京醫學會雜誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +242,薬學雑誌,specialized: pharmacology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +243,衛生細菌學時報,specialized: bacteriology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +245,日本小児科學會東京地方會,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +249,監獄協會雑誌,specialized: penology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +250,教育學術界,specialized: pedagogy and pediatrics,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +251,教育時論,specialized: pedagogy and pediatrics,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +252,教育の実際,specialized: pedagogy and pediatrics,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +253,荘内醫學會會報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +254,東京人類學雜誌,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +255,東洋醫事新報,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +256,早稲田文學,literature,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +264,井上眼科同窓会会報,specialized: ophtamology,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +269,教育公報,specialized: pedagogy and pediatrics,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +270,助産之栞,specialized: hygiene,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +271,統計集誌,statistics,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +272,公衆醫事,general medicine,Tokyo,Japanese Empire,Japan,35.689722,139.692222,Japanese +293,Annali di freniatria e scienze affini,specialized: psychiatry and neurology,Turin,Italy,Italy,45.079167,7.676111,Italian +301,Archivio per le scienze mediche,general medicine,Turin,Italy,Italy,45.079167,7.676111,Italian +9,Archivio di psichiatria,specialized: psychiatry and neurology,Turin,Italy,Italy,45.079167,7.676111,Italian +104,Annali di nevrologia,specialized: psychiatry and neurology,Turin,Italy,Italy,45.079167,7.676111,Italian +277,L'arte medica,general medicine,Turin,Italy,Italy,45.079167,7.676111,Italian +288,Allgemeine österreichische Gerichts-Zeitung,"specialized: anthropology, criminology and forensics",Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +18,Jahrbücher für Psychiatrie,specialized: psychiatry and neurology,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +30,Wiener klinische Rundschau,general medicine,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +44,Wiener klinische Wochenschrift,general medicine,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +45,Wiener medizinische Wochenschrift,general medicine,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +72,Wiener medizinische Presse,general medicine,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +81,Monatsschrift für Gesundheitspflege,general medicine,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +93,Klinisch-therapeutische Wochenschrift,general medicine,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +151,Medicinisch-chirurgisches Centralblatt,specialized: surgery,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +199,Der Militärazt,specialized: military medicine,Vienna,Austro-Hungarian Empire,Austria,48.2,16.366667,German +261,Медицинская беседа,general medicine,Voronezh,Russian Empire,Russia,51.671667,39.210556,Russian +77,Medycyna,general medicine,Warsaw,Russian Empire,Poland,52.233333,21.016667,Polish +150,Kronika Lekarska,general medicine,Warsaw,Russian Empire,Poland,52.233333,21.016667,Polish +86,Grenzfragen des Nerven- und Seelenlebens,specialized: psychiatry and neurology,Wiesbaden,German Empire,Germany,50.0825,8.24,German +206,Ergebnisse der Allgemeinen Pathologie und Pathologischen Anatomie,"specialized: anatomy, physiology and pathology",Wiesbaden,German Empire,Germany,50.0825,8.24,German diff --git a/examples/data/journals/Readme.md b/examples/data/journals/Readme.md new file mode 100644 index 0000000..965874b --- /dev/null +++ b/examples/data/journals/Readme.md @@ -0,0 +1 @@ +Data created by [Lena Jaskov](https://github.com/yaslena) diff --git a/examples/jobs/Readme.md b/examples/jobs/Readme.md new file mode 100644 index 0000000..6916876 --- /dev/null +++ b/examples/jobs/Readme.md @@ -0,0 +1 @@ +A folder to place example pipelines that are relevant for this plugin. It can be used subsequently for unit tests, and in documentation generation. diff --git a/examples/jobs/example_job_topic_modelling.yaml b/examples/jobs/example_job_topic_modelling.yaml new file mode 100644 index 0000000..f87d8f2 --- /dev/null +++ b/examples/jobs/example_job_topic_modelling.yaml @@ -0,0 +1,3 @@ +operation: "${this_dir}/../pipelines/example_pipeline_topic_modelling.yaml" +inputs: + name: "beautiful world" diff --git a/examples/pipelines/Readme.md b/examples/pipelines/Readme.md new file mode 100644 index 0000000..6916876 --- /dev/null +++ b/examples/pipelines/Readme.md @@ -0,0 +1 @@ +A folder to place example pipelines that are relevant for this plugin. It can be used subsequently for unit tests, and in documentation generation. diff --git a/examples/pipelines/example_pipeline_topic_modelling.yaml b/examples/pipelines/example_pipeline_topic_modelling.yaml new file mode 100644 index 0000000..a3533fe --- /dev/null +++ b/examples/pipelines/example_pipeline_topic_modelling.yaml @@ -0,0 +1,25 @@ +pipeline_name: example_pipeline_topic_modelling +doc: Example pipeline for the topic_modelling plugin. +steps: + - step_id: add_hello_string + module_type: example_proj.example + module_config: + separator: " " + constants: + text_1: "Hello" + defaults: + text_2: "World" + - step_id: add_exclamation_mark + module_type: example_proj.example + module_config: + separator: "" + constants: + text_2: "!" + input_links: + text_1: add_hello_string.text + +input_aliases: + add_hello_string.text_2: name + +output_aliases: + add_exclamation_mark.text: greeting diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..8f07eb7 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,72 @@ +site_name: kiara_plugin.topic_modelling +repo_url: https://github.com/DHARPA-Project/kiara_plugin.topic_modelling +site_author: Mariella De Crouy Chanel +docs_dir: docs +site_dir: build/site + +theme: + name: material + features: + - navigation.instant + - navigation.tracking + +extra_css: + - stylesheets/extra.css + +markdown_extensions: +- attr_list +- admonition +- codehilite: + guess_lang: false +- toc: + permalink: true +- pymdownx.snippets: + base_path: docs +- pymdownx.highlight +- pymdownx.superfences + +extra: + version: + provider: mike + +plugins: +- search +- autorefs +- mkdocstrings: + default_handler: python + handlers: + python: + path: [src] + options: + heading_level: 2 + show_category_heading: true + members_order: source + show_submodules: false + docstring_style: google + show_if_no_docstring: true + show_signature_annotations: true + separate_signature: false + filters: + - "!^_" # exlude all members starting with _ + - "^_config_cls" + import: + - https://dharpa.org/kiara/latest/objects.inv + - https://dharpa.org/kiara_plugin.core_types/latest/objects.inv + - https://dharpa.org/kiara_plugin.tabular/latest/objects.inv + watch: + - "src" + enable_inventory: true + +- macros: + modules: + - kiara.doc.mkdocs_macros_cli + - kiara.doc.mkdocs_macros_kiara + +- gen-files: + scripts: + - scripts/documentation/gen_info_pages.py + - scripts/documentation/gen_api_doc_pages.py + +- literate-nav: + nav_file: SUMMARY.md +- section-index diff --git a/pixi.toml b/pixi.toml new file mode 100644 index 0000000..c8cb9e5 --- /dev/null +++ b/pixi.toml @@ -0,0 +1,18 @@ +[project] +name = "kiara-plugin-topic_modelling" +version = "0.1.0" +description = "A Kiara plugin toho create a Topic Modelling workflow." +authors = ["Mariella De Crouy Chanel "] +channels = ["conda-forge", "dharpa"] +platforms = ["linux-64", "win-64", "osx-64", "osx-arm64"] + +[tasks] +install-dev-env = "pip install -e '.[dev_utils]'" +pre-commit-check = ".pixi/env/bin/pre-commit run --all-files" +kiara = ".pixi/env/bin/kiara" +show-versions = ".pixi/env/bin/kiara --version" +delete-context = ".pixi/env/bin/kiara context delete" + +[dependencies] +python = "3.11.*" +pip = "23.3.*" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b989b4c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,243 @@ +[build-system] +requires = [ + "setuptools>=64", + "setuptools_scm[toml]>=8", + "wheel" +] +build-backend = "setuptools.build_meta" + +[options] +packages = { find = { where = "src", exclude = ["tests"] } } +include_package_data = true +package_dir = { "" = "src" } +zip_safe = false + +[tool.aliases] +build = "bdist_wheel" +release = "build upload" + +[tool.bdist_wheel] +universal = 0 + +[tool.devpi.upload] +no-vcs = 1 +formats = ["sdist", "bdist_wheel"] + +[project] +name = "kiara_plugin.topic_modelling" +description = "A Kiara plugin toho create a Topic Modelling workflow." +authors = [ + {name = "Mariella De Crouy Chanel", email = "mariella.decrouychanel@uni.lu"} +] +readme = "README.md" +requires-python = ">=3.8" +keywords = ["kiara"] +license = {text = "MPL-2.0"} +classifiers = [ + "Programming Language :: Python :: 3", + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12" +] +dependencies = [ + "kiara>=0.5.1,<0.6.0", + "kiara_plugin.core_types>=0.5.0,<0.6.0", +] +dynamic = ["version"] + +[project.optional-dependencies] +dev_documentation = [ + "kiara[dev_documentation]" +] +dev_testing = [ + "kiara[dev_testing]", +] +dev_utils = [ + "kiara[dev_utils]", +] +dev_all = [ + "kiara[dev_all]" +] +streamlit = [ + "kiara_plugin.streamlit" +] + +[project.entry-points."kiara.plugin"] +topic_modelling = "kiara_plugin.topic_modelling" + +[project.entry-points."kiara.data_types"] +topic_modelling = "kiara_plugin.topic_modelling:find_data_types" + +[project.entry-points."kiara.model_classes"] +topic_modelling = "kiara_plugin.topic_modelling:find_model_classes" + +[project.entry-points."kiara.modules"] +topic_modelling = "kiara_plugin.topic_modelling:find_modules" + +[project.entry-points."kiara.pipelines"] +topic_modelling = "kiara_plugin.topic_modelling:find_pipelines" + + +[project.urls] +homepage = "https://github.com/DHARPA-Project/kiara_plugin.topic_modelling" +documentation = "https://DHARPA-Project.github.io/kiara_plugin.topic_modelling" +repository = "https://github.com/DHARPA-Project/kiara_plugin.topic_modelling" + +[tool.black] +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + | external +)/ +''' + +[tool.setuptools_scm] +write_to = "src/kiara_plugin/topic_modelling/version.txt" + +[tool.pytest.ini_options] +pythonpath = [ + "src" +] +addopts = "--verbose" +norecursedirs = [ + "dist", + "build", + ".tox" +] +testpaths = ["tests"] + +[tool.coverage.run] +branch = true +source = ["kiara_plugin.topic_modelling"] +relative_files = true + +[tool.coverage.paths] +source = [ + "src/", + "*/site-packages/" +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "if self\\.debug", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if typing.TYPE_CHECKING:", +] + +[tool.ruff] +line-length = 88 + +src = ["src", "tests"] + +select = [ + "E", + "F", + "RUF", + "I001", + "YTT", + "S", + "C4", + "T10", + "ISC", + "ICN", + "T20", + "Q", + "TID", + "PD", + "PLC", + "PLE", + "PLR", + "PLW", + "PIE", +] +ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PLR0912", "PLR0915", "PIE810"] + +fix = true +fixable = ["E", "F", "RUF100", "I001", "Q"] + +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] +per-file-ignores = { } + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +# Assume Python 3.10. +target-version = "py310" + +[tool.ruff.mccabe] +# Unlike Flake8, default to a complexity level of 10. +max-complexity = 10 + +[tool.ruff.isort] +known-first-party = ["kiara", "kiara_plugin"] + +[tool.ruff.pydocstyle] +convention = "google" + +[tool.pyright] +include = ["src"] +exclude = ["**/__pycache__"] +ignore = [] + +reportMissingImports = true +reportMissingTypeStubs = false + +executionEnvironments = [ +] + +[tool.mypy] +python_version = "3.9" +warn_return_any = true +warn_unused_configs = true +mypy_path = "src" +namespace_packages = false +plugins = [ + "pydantic.mypy" +] + + +# mypy per-module options: +[[tool.mypy.overrides]] +module = ["placholder.dummy.*"] +ignore_missing_imports = true diff --git a/scripts/documentation/gen_api_doc_pages.py b/scripts/documentation/gen_api_doc_pages.py new file mode 100644 index 0000000..c94e5ed --- /dev/null +++ b/scripts/documentation/gen_api_doc_pages.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +"""Generate the code reference pages and navigation.""" + +from pathlib import Path + +import mkdocs_gen_files + +nav = mkdocs_gen_files.Nav() + +for path in sorted(Path("src").rglob("*.py")): + module_path = path.relative_to("src").with_suffix("") + doc_path = path.relative_to("src").with_suffix(".md") + full_doc_path = Path("reference", doc_path) + + parts = list(module_path.parts) + + if parts[-1] == "__init__": + parts = parts[:-1] + elif parts[-1] == "__main__": + continue + + nav[parts] = doc_path # + + with mkdocs_gen_files.open(full_doc_path, "w") as fd: + ident = ".".join(parts) + print("::: " + ident, file=fd) + + mkdocs_gen_files.set_edit_path(full_doc_path, path) + +with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file: # + nav_file.writelines(nav.build_literate_nav()) # diff --git a/scripts/documentation/gen_info_pages.py b/scripts/documentation/gen_info_pages.py new file mode 100644 index 0000000..d09bc02 --- /dev/null +++ b/scripts/documentation/gen_info_pages.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2022-2022, Markus Binsteiner +# +# Mozilla Public License, version 2.0 (see LICENSE or https://www.mozilla.org/en-US/MPL/2.0/) + +import builtins + +from kiara.context import Kiara, KiaraContextInfo +from kiara.doc.gen_info_pages import generate_detail_pages + +pkg_name = "kiara_plugin.topic_modelling" + +kiara: Kiara = Kiara.instance() +context_info = KiaraContextInfo.create_from_kiara_instance( + kiara=kiara, package_filter=pkg_name +) + +generate_detail_pages(context_info=context_info) + +builtins.plugin_package_context_info = context_info diff --git a/scripts/documentation/gen_module_doc.py b/scripts/documentation/gen_module_doc.py new file mode 100644 index 0000000..8e4b9ea --- /dev/null +++ b/scripts/documentation/gen_module_doc.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +import os + +import mkdocs_gen_files +from kiara.context import Kiara + +kiara = Kiara.instance() + +modules_file_path = os.path.join("modules_list.md") +modules_page_content = """# Available module types + +This page contains a list of all available *Kiara* module types, and their details. + +!!! note +The formatting here will be improved later on, for now this should be enough to get the important details of each module type. + +""" + +BASE_PACKAGE = "kiara_plugin.topic_modelling" + + + +for module_type in kiara.module_mgmt.find_modules_for_package(BASE_PACKAGE, include_pipelines=False).keys(): + + if module_type == "pipeline": + continue + + modules_page_content = modules_page_content + f"## ``{module_type}``\n\n" + modules_page_content = ( + modules_page_content + + "```\n{{ get_module_info('" + + module_type + + "') }}\n```\n\n" + ) + +with mkdocs_gen_files.open(modules_file_path, "w") as f: + f.write(modules_page_content) + +pipelines_file_path = os.path.join("pipelines_list.md") +pipelines_page_content = """# Available pipeline module types + +This page contains a list of all available *Kiara* pipeline module types, and their details. + +!!! note +The formatting here will be improved later on, for now this should be enough to get the important details of each module type. + +""" + +for module_type in kiara.module_mgmt.find_modules_for_package(BASE_PACKAGE, include_core_modules=False): + + if module_type == "pipeline": + continue + + pipelines_page_content = pipelines_page_content + f"## ``{module_type}``\n\n" + pipelines_page_content = ( + pipelines_page_content + + "```\n{{ get_module_info('" + + module_type + + "') }}\n```\n\n" + ) + +with mkdocs_gen_files.open(pipelines_file_path, "w") as f: + f.write(pipelines_page_content) + + diff --git a/src/kiara_plugin/topic_modelling/__init__.py b/src/kiara_plugin/topic_modelling/__init__.py new file mode 100644 index 0000000..5d6c55e --- /dev/null +++ b/src/kiara_plugin/topic_modelling/__init__.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for kiara_plugin.topic_modelling.""" + + +import os + +from kiara.utils.class_loading import ( + KiaraEntryPointItem, + find_data_types_under, + find_kiara_model_classes_under, + find_kiara_modules_under, + find_pipeline_base_path_for_module, +) + +__author__ = """Mariella De Crouy Chanel""" +__email__ = "mariella.decrouychanel@uni.lu" + + +KIARA_METADATA = { + "authors": [{"name": __author__, "email": __email__}], + "description": "Kiara modules for: topic_modelling", + "references": { + "source_repo": { + "desc": "The module package git repository.", + "url": "https://github.com/DHARPA-Project/kiara_plugin.topic_modelling", + }, + "documentation": { + "desc": "The url for the module package documentation.", + "url": "https://DHARPA-Project.github.io/kiara_plugin.topic_modelling/", + }, + }, + "tags": ["topic_modelling"], + "labels": {"package": "kiara_plugin.topic_modelling"}, +} + +find_modules: KiaraEntryPointItem = ( + find_kiara_modules_under, + "kiara_plugin.topic_modelling.modules", +) +find_model_classes: KiaraEntryPointItem = ( + find_kiara_model_classes_under, + "kiara_plugin.topic_modelling.models", +) +find_data_types: KiaraEntryPointItem = ( + find_data_types_under, + "kiara_plugin.topic_modelling.data_types", +) +find_pipelines: KiaraEntryPointItem = ( + find_pipeline_base_path_for_module, + "kiara_plugin.topic_modelling.pipelines", + KIARA_METADATA, +) + + +def get_version(): + from importlib.metadata import PackageNotFoundError, version + + try: + # Change here if project is renamed and does not equal the package name + dist_name = __name__ + __version__ = version(dist_name) + except PackageNotFoundError: + + try: + version_file = os.path.join(os.path.dirname(__file__), "version.txt") + + if os.path.exists(version_file): + with open(version_file, encoding="utf-8") as vf: + __version__ = vf.read() + else: + __version__ = "unknown" + + except (Exception): + pass + + if __version__ is None: + __version__ = "unknown" + + return __version__ diff --git a/src/kiara_plugin/topic_modelling/data_types.py b/src/kiara_plugin/topic_modelling/data_types.py new file mode 100644 index 0000000..957c43b --- /dev/null +++ b/src/kiara_plugin/topic_modelling/data_types.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +"""This module contains the value type classes that are used in the ``kiara_plugin.topic_modelling`` package. +""" diff --git a/src/kiara_plugin/topic_modelling/models.py b/src/kiara_plugin/topic_modelling/models.py new file mode 100644 index 0000000..6ab841b --- /dev/null +++ b/src/kiara_plugin/topic_modelling/models.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- + +"""This module contains the metadata (and other) models that are used in the ``kiara_plugin.topic_modelling`` package. + +Those models are convenience wrappers that make it easier for *kiara* to find, create, manage and version metadata -- but also +other type of models -- that is attached to data, as well as *kiara* modules. + +Metadata models must be a sub-class of [kiara.metadata.MetadataModel][kiara.metadata.MetadataModel]. Other models usually +sub-class a pydantic BaseModel or implement custom base classes. +""" diff --git a/src/kiara_plugin/topic_modelling/modules/__init__.py b/src/kiara_plugin/topic_modelling/modules/__init__.py new file mode 100644 index 0000000..328ed72 --- /dev/null +++ b/src/kiara_plugin/topic_modelling/modules/__init__.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +from pydantic import Field + +from kiara.api import KiaraModule, KiaraModuleConfig, ValueMap, ValueMapSchema + + +class ExampleModuleConfig(KiaraModuleConfig): + + separator: str = Field( + description="The seperator between the two strings.", default=" - " + ) + + +class ExampleModule(KiaraModule): + """A very simple example module; concatenate two strings. + + The purpose of this modules is to show the main elements of a [`KiaraModule`][kiara.modules.KiaraModule]: + + - ***the (optional) configuration class***: must inherit from [`KiaraModuleConfig`][kiara.modules.KiaraModuleConfig], and the config class must be set as the `_config_cls` attribute + on the `KiaraModule` class. Configuration values can be retrieved via the [`self.get_config_value(key)`][kiara.modules.KiaraModule.get_config_value] method + - ***the inputs description***: must return a dictionary, containing the input name(s) as keys, and another dictionary containing type_name information + and documentation about the input data as value + - ***the outputs description***: must return a dictionary, containing the output name(s) as keys, and another dictionary containing type_name information + and documentation about the output data as value + - ***the ``process`` method***: this is where the actual work gets done. Input data can be accessed via ``inputs.get_value_data(key)``, results + can be set with the ``outputs.set_value(key, value)`` method + + Example: + + This example module can be tested on the commandline with the ``kiara run`` command: + + ``` + kiara run core_types.example text_1="xxx" text_2="yyy" + ``` + """ + + _config_cls = ExampleModuleConfig + _module_type_name = "topic_modelling.example" + + def create_inputs_schema( + self, + ) -> ValueMapSchema: + + inputs = { + "text_1": {"type": "string", "doc": "The first text."}, + "text_2": {"type": "string", "doc": "The second text."}, + } + + return inputs + + def create_outputs_schema( + self, + ) -> ValueMapSchema: + + outputs = { + "text": { + "type": "string", + "doc": "The concatenated text.", + } + } + return outputs + + def process(self, inputs: ValueMap, outputs: ValueMap) -> None: + + separator = self.get_config_value("separator") + + text_1 = inputs.get_value_data("text_1") + text_2 = inputs.get_value_data("text_2") + + result = text_1 + separator + text_2 + outputs.set_value("text", result) diff --git a/src/kiara_plugin/topic_modelling/pipelines/.gitkeep b/src/kiara_plugin/topic_modelling/pipelines/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/kiara_plugin/topic_modelling/pipelines/__init__.py b/src/kiara_plugin/topic_modelling/pipelines/__init__.py new file mode 100644 index 0000000..62e63d2 --- /dev/null +++ b/src/kiara_plugin/topic_modelling/pipelines/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +"""Default (empty) module that is used as a base path for pipelines contained in this package. +""" diff --git a/src/kiara_plugin/topic_modelling/py.typed b/src/kiara_plugin/topic_modelling/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/kiara_plugin/topic_modelling/resources/.gitkeep b/src/kiara_plugin/topic_modelling/resources/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..949ca3d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + Dummy conftest.py for kiara_plugin.language_processing. + + If you don't know what this is for, just leave it empty. + Read more about conftest.py under: + https://pytest.org/latest/plugins.html +""" +# import pytest + + +import os +import tempfile +import uuid +from pathlib import Path + +import pytest + +from kiara.context import KiaraConfig +from kiara.interfaces.python_api import KiaraAPI +from kiara.interfaces.python_api.models.job import JobTest +from kiara.utils.testing import get_tests_for_job, list_job_descs + +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +JOBS_FOLDER = Path(os.path.join(ROOT_DIR, "examples", "jobs")) + + +def create_temp_dir(): + session_id = str(uuid.uuid4()) + TEMP_DIR = Path(os.path.join(tempfile.gettempdir(), "kiara_tests")) + + instance_path = os.path.join( + TEMP_DIR.resolve().absolute(), f"instance_{session_id}" + ) + return instance_path + + +@pytest.fixture +def kiara_api() -> KiaraAPI: + + instance_path = create_temp_dir() + kc = KiaraConfig.create_in_folder(instance_path) + api = KiaraAPI(kc) + return api + + +@pytest.fixture(params=list_job_descs(JOBS_FOLDER)) +def example_job_test(request, kiara_api) -> JobTest: + + job_tests_folder = Path(os.path.join(ROOT_DIR, "tests", "job_tests")) + + job_desc = request.param + tests = get_tests_for_job( + job_alias=job_desc.job_alias, job_tests_folder=job_tests_folder + ) + + job_test = JobTest(kiara_api=kiara_api, job_desc=job_desc, tests=tests) + return job_test + + +@pytest.fixture +def example_data_folder() -> Path: + return Path(os.path.join(ROOT_DIR, "examples", "data")) + + +@pytest.fixture +def example_pipelines_folder() -> Path: + return Path(os.path.join(ROOT_DIR, "examples", "pipelines")) + + +@pytest.fixture() +def tests_resources_folder() -> Path: + return Path(os.path.join(ROOT_DIR, "tests")) diff --git a/tests/job_tests/example_job_topic_modelling/outputs.py b/tests/job_tests/example_job_topic_modelling/outputs.py new file mode 100644 index 0000000..6709a44 --- /dev/null +++ b/tests/job_tests/example_job_topic_modelling/outputs.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +from kiara.models.values.value import Value, ValueMap + + +def check_results(outputs: ValueMap): + """You can either check the whole results instance by specifying an argument called 'outputs'.""" + + assert list(outputs.field_names) == ["greeting"] + + +def check_greeting(greeting: Value): + """Or you can check each output value seperately by specifying the name of the output field.""" + + assert greeting.data_type_name == "string" + assert greeting.data == "Hello beautiful world!" + + assert ( + greeting.get_property_data( + "metadata.python_class" + ).python_class.python_class_name + == "str" + ) diff --git a/tests/job_tests/example_job_topic_modelling/outputs.yaml b/tests/job_tests/example_job_topic_modelling/outputs.yaml new file mode 100644 index 0000000..68bb601 --- /dev/null +++ b/tests/job_tests/example_job_topic_modelling/outputs.yaml @@ -0,0 +1,2 @@ +greeting::data: "Hello beautiful world!" +greeting::properties::metadata.python_class::python_class::python_class_name: "str" diff --git a/tests/resources/.gitkeep b/tests/resources/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_job_descs.py b/tests/test_job_descs.py new file mode 100644 index 0000000..2f39954 --- /dev/null +++ b/tests/test_job_descs.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +from kiara.interfaces.python_api.models.job import JobTest + +"""Auto-generated tests that use job descriptions in the 'examples/jobs' folder and run them. + +To test against the outputs of those jobs, add files into subfolders that are called the same as the job (minus the file extension), under the `tests/job_tests` folder. + +To test values directly, add a file called `outputs.json` or `outputs.yaml` into that folder, containing a 'dict' +data structure with the value attribute to test as key and the expected value as value. + +Most likely you will want to test against a value property, which would be done like so (in yaml): + +```yaml +network_data::properties::metadata.graph_properties::number_of_self_loops: 1 +``` + +The format is: +```yaml +::properties::::[]::[]: +``` + +In case of scalars, you can also test against the value directly: +``` +output_string::data: "some string" +``` + +If the results are too complex to test against in this way, you can also write Python code. Add a file with a random +name and a `.py` extension (output.py is a good choice if you only have one). In that file, each function will be run +against the job results. You can control which arguments will be passed to the function by naming the arguments: + +- `kiara_api`: a kiara api instance will be passed in +- 'outputs`: the whole result of the job will be passed in (of type `ValueMap`) +- the field name of the value you are interested in (e.g. `table`, `network_data`, depends on the job) + +Specifying any other argument name will throw an error. +""" + + +def test_job_desc(example_job_test: JobTest): + + example_job_test.run_tests() diff --git a/tests/test_kiara_modules_default.py b/tests/test_kiara_modules_default.py new file mode 100755 index 0000000..9bdeb1b --- /dev/null +++ b/tests/test_kiara_modules_default.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Tests for `kiara_plugin.topic_modelling` package.""" + +import kiara_plugin.topic_modelling +import pytest # noqa + + +def test_assert(): + + assert kiara_plugin.topic_modelling.get_version() is not None