diff --git a/.all-contributorsrc b/.all-contributorsrc index 9a80f0d73..03ce183d5 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -14,7 +14,8 @@ "code", "question", "design", - "bug" + "bug", + "review" ] }, { @@ -152,7 +153,9 @@ "avatar_url": "https://avatars0.githubusercontent.com/u/13706448?v=4", "profile": "https://github.com/eurunuela", "contributions": [ - "code" + "code", + "review", + "ideas" ] }, { @@ -164,11 +167,30 @@ "doc", "code" ] + }, + { + "login": "Islast", + "name": "Isla", + "avatar_url": "https://avatars2.githubusercontent.com/u/23707851?v=4", + "profile": "http://isla.st", + "contributions": [ + "review" + ] + }, + { + "login": "mjversluis", + "name": "mjversluis", + "avatar_url": "https://avatars0.githubusercontent.com/u/32125111?v=4", + "profile": "https://github.com/mjversluis", + "contributions": [ + "doc" + ] } ], "contributorsPerLine": 5, "projectName": "tedana", "projectOwner": "ME-ICA", "repoType": "github", - "repoHost": "https://github.com" + "repoHost": "https://github.com", + "skipCi": true } diff --git a/.circleci/config.yml b/.circleci/config.yml index d4bcd86a5..d3a2a2fb9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,4 +1,4 @@ -# Python CircleCI 2.0 configuration file +# Python CircleCI 2.1 configuration file # # Check https://circleci.com/docs/2.0/language-python/ for more details # @@ -14,7 +14,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Generate environment command: | @@ -24,7 +24,7 @@ jobs: pip install -r dev_requirements.txt fi - save_cache: - key: conda-py37-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} paths: - /opt/conda/envs/tedana_py37 @@ -35,7 +35,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py35-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py35-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Generate environment command: | @@ -53,7 +53,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py35 - save_cache: - key: conda-py35-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py35-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} paths: - /opt/conda/envs/tedana_py35 - persist_to_workspace: @@ -68,7 +68,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py36-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py36-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Generate environment command: | @@ -86,7 +86,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py36 - save_cache: - key: conda-py36-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py36-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} paths: - /opt/conda/envs/tedana_py36 - persist_to_workspace: @@ -101,7 +101,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Running unit tests command: | @@ -122,7 +122,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Style check command: | @@ -137,7 +137,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Run integration tests no_output_timeout: 40m @@ -153,6 +153,31 @@ jobs: root: /tmp paths: - src/coverage/.coverage.three-echo + + four-echo: + docker: + - image: continuumio/miniconda3 + working_directory: /tmp/src/tedana + steps: + - checkout + - restore_cache: + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} + - run: + name: Run integration tests + no_output_timeout: 40m + command: | + apt-get install -yqq make + source activate tedana_py37 # depends on makeenv_37 + make four-echo + mkdir /tmp/src/coverage + mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.four-echo + - store_artifacts: + path: /tmp/data + - persist_to_workspace: + root: /tmp + paths: + - src/coverage/.coverage.four-echo + five-echo: docker: - image: continuumio/miniconda3 @@ -160,7 +185,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Run integration tests no_output_timeout: 40m @@ -177,6 +202,30 @@ jobs: paths: - src/coverage/.coverage.five-echo + t2smap: + docker: + - image: continuumio/miniconda3 + working_directory: /tmp/src/tedana + steps: + - checkout + - restore_cache: + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} + - run: + name: Run integration tests + no_output_timeout: 40m + command: | + apt-get install -yqq make + source activate tedana_py37 # depends on makeenv_37 + make t2smap + mkdir /tmp/src/coverage + mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.t2smap + - store_artifacts: + path: /tmp/data + - persist_to_workspace: + root: /tmp + paths: + - src/coverage/.coverage.t2smap + merge_coverage: working_directory: /tmp/src/tedana docker: @@ -186,7 +235,7 @@ jobs: at: /tmp - checkout - restore_cache: - key: conda-py37-v1-{{ checksum "dev_requirements.txt" }} + key: conda-py37-v1-{{ checksum "dev_requirements.txt" }}-{{ checksum "requirements.txt" }} - run: name: Merge coverage files command: | @@ -216,13 +265,21 @@ workflows: - three-echo: requires: - makeenv_37 + - four-echo: + requires: + - makeenv_37 - five-echo: requires: - makeenv_37 + - t2smap: + requires: + - makeenv_37 - merge_coverage: requires: - unittest_35 - unittest_36 - unittest_37 - three-echo + - four-echo - five-echo + - t2smap diff --git a/.github/stale.yml b/.github/stale.yml index 1807c9bee..bfc0594ec 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,16 +1,19 @@ # Number of days of inactivity before an issue becomes stale daysUntilStale: 90 # Number of days of inactivity before a stale issue is closed -daysUntilClose: 7 +daysUntilClose: 600 # Issues with these labels will never be considered stale exemptLabels: - community + - bug + - paused + - good first issue # Label to use when marking an issue as stale staleLabel: stale # Comment to post when marking an issue as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it has not had - recent activity. It will be closed if no further activity occurs. Thank you - for your contributions to tedana:tada: ! + any activity in 90 days. It will be closed in 600 days if no further activity + occurs. Thank you for your contributions to tedana:tada: ! # Comment to post when closing a stale issue. Set to `false` to disable -closeComment: false \ No newline at end of file +closeComment: false diff --git a/.gitignore b/.gitignore index 41e4de67c..51be73d77 100644 --- a/.gitignore +++ b/.gitignore @@ -104,4 +104,8 @@ ENV/ .mypy_cache/ # vscode -.vscode \ No newline at end of file +.vscode + +# jupyter notebooks +.ipynb_checkpoints/ +*.ipynb \ No newline at end of file diff --git a/.zenodo.json b/.zenodo.json index e8746aa82..d273c53c7 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -35,6 +35,6 @@ "neuroimaging", "fMRI" ], - "license": "lgpl-2.1", + "license": "LGPL-2.1", "upload_type": "software" } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 314bd9b46..3116ab1ac 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,13 +11,16 @@ Here are some [instructions][link_signupinstructions]. Already know what you're looking for in this guide? Jump to the following sections: * [Joining the conversation](#joining-the-conversation) - * [Monthly developer calls](#monthly-developer-calls) * [Contributing small documentation changes](#contributing-small-documentation-changes) * [Contributing through Github](#contributing-through-github) * [Understanding issues, milestones, and project boards](#understanding-issues-milestones-and-project-boards) +* [Installing in editable mode](#3-Run-the-developer-setup) * [Making a change](#making-a-change) +* [Testing your change](#5-Test-your-changes) +* [Viewing Documentation Locally](#Changes-to-documentation) * [Structuring contributions](#style-guide) -* [Recognizing contributors](#recognizing-contributions) +* [Recognizing contributors](#Recognizing-contributors) +* [Monthly calls and testing guidelines][link_developing_rtd] Don't know where to get started? Read [Joining the conversation](#joining-the-conversation) and pop into @@ -33,15 +36,6 @@ We also maintain a [gitter chat room][link_gitter] for more informal conversatio There is significant cross-talk between these two spaces, and we look forward to hearing from you in either venue! As a reminder, we expect all contributions to `tedana` to adhere to our [code of conduct][link_coc]. -### Monthly developer calls - -We run monthly developer calls via Zoom. -You can see the schedule via the `tedana` [google calendar](https://calendar.google.com/calendar/embed?src=pl6vb4t9fck3k6mdo2mok53iss%40group.calendar.google.com). -An agenda will be circulated in the gitter channel in advance of the meeting. - -Everyone is welcome. -We look forward to meeting you there :hibiscus: - ## Contributing small documentation changes If you are new to GitHub and just have a small documentation change recommendation, please submit it to [our e-mail address](mailto:tedana.devs@gmail.com) @@ -92,11 +86,6 @@ towards ``tedana``'s shared vision. We might have just missed it, or we might not (yet) see how it aligns with the overall project structure. These conversations are important to have, and we are excited to hear your perspective! -* The **project board** is an automated [Kanban board][link_kanban] to keep track of what is currently underway -(in progress), what has been completed (done), and what remains to be done for a specific release. -The ``tedana`` maintainers use this board to keep an eye on how tasks are progressing week by week. - - ### Issue labels The current list of labels are [here][link_labels] and include: @@ -151,30 +140,66 @@ Once you've run this, your repository should be set for most changes (i.e., you ### 4. Make the changes you've discussed -Try to keep the changes focused to the issue. We've found that working on a [new branch][link_branches] for each issue makes it easier to keep your changes targeted. +Try to keep the changes focused to the issue. +We've found that working on a [new branch][link_branches] for each issue makes it easier to keep your changes targeted. +Using a new branch allows you to follow the standard GitHub workflow when making changes. +[This guide][link_gitworkflow] provides a useful overview for this workflow. +Before making a new branch, make sure your master is up to date with the following commands: -Using a new branch allows you to follow the standard "fork/branch/commit/pull-request/merge" GitHub workflow when making changes. [This guide][link_gitworkflow] provides a useful overview for this workflow. +``` +git checkout master +git fetch upstream master +git merge upstream/master +``` + +Then, make your new branch. + +``` +git checkout -b MYBRANCH +``` + +Please make sure to review the `tedana` [style conventions](#style-guide) and test your changes. + +If you are new to ``git`` and would like to work in a graphical user interface (GUI), there are several GUI git clients that you may find helpful, such as +- [GitKraken][link_git_kraken] +- [GitHub Desktop][link_github_desktop] +- [SourceTree][link_source_tree] -Before creating your pull request, please make sure to review the `tedana` [style conventions](#style-guide). ### 5. Test your changes -#### Changes to code +You can run style checks by running the following: +``` +flake8 $TEDANADIR/tedana +``` -For changes to the codebase, we suggest using our development Docker container which will run all the necessary checks and tests to ensure your code is ready to be merged into `tedana`! -(This does require that you have a local install of [Docker](https://www.docker.com/products/docker-desktop).) -You can run all the checks with: +and unit/integration tests by running `pytest` (more details below). +If you know a file will test your change, you can run only that test (see "One test file only" below). +Alternatively, running all unit tests is relatively quick and should be fairly comprehensive. +Running all `pytest` tests will be useful for pre-pushing checks. +Regardless, when you open a Pull Request, we use CircleCI to run all unit and integration tests. +All tests; final checks before pushing +``` +pytest $TEDANADIR/tedana/tests +``` +Unit tests and linting only ``` -docker run --tty --rm -v ${PWD}:/tedana tedana/tedana-dev:latest run_all_tests +pytest --skipintegration $TEDANADIR/tedana/tests +``` +One test file only +``` +pytest $TEDANADIR/tedana/tests/test_file.py +``` +Test one function in a file +``` +pytest -k my_function $TEDANADIR/tedana/tests/test_file.py ``` from within your local `tedana` repository. -(**N.B.** It is possible that, depending on your Docker setup, you may need to increase the amount of memory available to Docker in order to run the `tedana` test suite. -You can either do this permanently by editing your Docker settings or temporarily by adding `--memory=4g` to the above `docker run` command.) - -This will print out a number of different status update messages as the tests run, but if you see `"FINISHED RUNNING ALL TESTS! GREAT SUCCESS"` then it means everything finished succesfully. -If not, there should be some helpful outputs that specify which tests failed. +The test run will indicate the number of passes and failures. +Most often, the failures give enough information to determine the cause; if not, you can +refer to the [pytest documentation][link_pytest] for more details on the failure. #### Changes to documentation @@ -189,11 +214,23 @@ from the `docs` directory in your local `tedana` repository. You should then be When opening the pull request, we ask that you follow some [specific conventions](#pull-requests). We outline these below. After you have submitted the pull request, a member of the development team will review your changes to confirm that they can be merged into the main code base. +When you have two approving reviewers and all tests are passing, your pull request may be merged. -After successful merging of the pull request, remember to [keep your fork up to date][link_updateupstreamwiki] with the master `tedana` repository and to delete the branch on your fork that was used for the merged pull request. ### Pull Requests +To push your changes to your remote, use + +``` +git push -u origin MYBRANCH +``` + +and GitHub will respond by giving you a link to open a pull request to +ME-ICA/tedana. +Once you have pushed changes to the repository, please do not use commands such as rebase and +amend, as they will rewrite your history and make it difficult for developers to work with you on +your pull request. You can read more about that [here][link_git_rewriting]. + To improve understanding pull requests "at a glance", we encourage the use of several standardized tags. When opening a pull request, please use at least one of the following prefixes: @@ -202,20 +239,32 @@ When opening a pull request, please use at least one of the following prefixes: * **[ENH]** for enhancements * **[FIX]** for bug fixes * **[REF]** for refactoring existing code -* **[STY]** for stylistic changes * **[TST]** for new or updated tests, and -* **[WIP]** for changes which are not yet ready to be merged +* **[MAINT]** for maintenance of code + +You can also combine the tags above, for example if you are updating both a test and +the documentation: **[TST, DOC]**. Pull requests should be submitted early and often! -If your pull request is not yet ready to be merged, please also include the **[WIP]** prefix. +If your pull request is not yet ready to be merged, please use [draft PRs][link_draftpr] This tells the development team that your pull request is a "work-in-progress", and that you plan to continue working on it. -We request that you do not use the Draft PR feature at this time, -as it interferes with our Continuous Integration tool, Travis. - -You can also combine the tags above, for example if you are updating both a test and -the documentation: **[TST, DOC]**. -If you're still working on the pull request that prefix would be **[WIP, TST, DOC]**. +If no comments or commits occur on an open Pull Request, stale-bot will comment in order to remind +both you and the maintainers that the pull request is open. +If at this time you are awaiting a developer response, please ping them to remind them. +If you are no longer interested in working on the pull request, let us know and we will ask to +continue working on your branch. +Thanks for contributing! + +### Pull Request Checklist (For Fastest Review): +- [ ] Check that all tests are passing ("All tests passsed") +- [ ] Make sure you have docstrings for any new functions +- [ ] Make sure that docstrings are updated for edited functions +- [ ] Make sure you note any issues that will be closed by your PR +- [ ] Take a look at the automatically generated readthedocs for your PR (Show all checks -> continuous-documentation/readthedocs -> Details) + +### Comprehensive Developer Guide +For additional, in-depth information on contributing to `tedana`, please see our Developing Guidelines on [readthedocs][link_developing_rtd]. ## Style Guide @@ -270,7 +319,7 @@ You're awesome. :wave::smiley: [writing_formatting_github]: https://help.github.com/articles/getting-started-with-writing-and-formatting-on-github [markdown]: https://daringfireball.net/projects/markdown [rick_roll]: https://www.youtube.com/watch?v=dQw4w9WgXcQ -[restructuredtext]: http://docutils.sourceforge.net/rst.html#user-documentation +[restructuredtext]: http://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html [sphinx]: http://www.sphinx-doc.org/en/master/index.html [readthedocs]: https://docs.readthedocs.io/en/latest/index.html @@ -289,6 +338,7 @@ You're awesome. :wave::smiley: [link_kanban]: https://en.wikipedia.org/wiki/Kanban_board [link_pullrequest]: https://help.github.com/articles/creating-a-pull-request/ +[link_draftpr]: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests#draft-pull-requests [link_fork]: https://help.github.com/articles/fork-a-repo/ [link_pushpullblog]: https://www.igvita.com/2011/12/19/dont-push-your-pull-requests/ [link_updateupstreamwiki]: https://help.github.com/articles/syncing-a-fork/ @@ -306,3 +356,10 @@ You're awesome. :wave::smiley: [link_all-contributors-bot]: https://allcontributors.org/docs/en/bot/overview [link_all-contributors-bot-usage]: https://allcontributors.org/docs/en/bot/usage [link_stemmrolemodels]: https://github.com/KirstieJane/STEMMRoleModels +[link_pytest]: https://docs.pytest.org/en/latest/usage.html +[link_developing_rtd]: https://tedana.readthedocs.io/en/latest/developing.html + +[link_git_kraken]: https://www.gitkraken.com/ +[link_github_desktop]: https://desktop.github.com/ +[link_source_tree]: https://desktop.github.com/ +[link_git_rewriting]: https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History diff --git a/Makefile b/Makefile index ad706eac5..da526fd16 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .PHONY: all lint -all_tests: lint unittest three-echo five-echo +all_tests: lint unittest three-echo four-echo five-echo t2smap help: @echo "Please use 'make ' where is one of:" @@ -8,6 +8,7 @@ help: @echo " unittest to run unit tests on tedana" @echo " three-echo to run the three-echo test set on tedana" @echo " five-echo to run the five-echo test set on tedana" + @echo " t2smap to run the t2smap integration test set on tedana" @echo " all_tests to run 'lint', 'unittest', and 'integration'" lint: @@ -17,8 +18,13 @@ unittest: @py.test --skipintegration --cov-append --cov-report term-missing --cov=tedana tedana/ three-echo: - @py.test --cov-append --cov-report term-missing --cov=tedana -k test_integration_three_echo tedana/tests/test_integration.py + @py.test --log-cli-level=20 --cov-append --cov-report term-missing --cov=tedana -k test_integration_three_echo tedana/tests/test_integration.py + +four-echo: + @py.test --log-cli-level=20 --cov-append --cov-report term-missing --cov=tedana -k test_integration_four_echo tedana/tests/test_integration.py five-echo: - @py.test --cov-append --cov-report term-missing --cov=tedana -k test_integration_five_echo tedana/tests/test_integration.py + @py.test --log-cli-level=20 --cov-append --cov-report term-missing --cov=tedana -k test_integration_five_echo tedana/tests/test_integration.py +t2smap: + @py.test --log-cli-level=20 --cov-append --cov-report term-missing --cov=tedana -k test_integration_t2smap tedana/tests/test_integration.py diff --git a/README.md b/README.md index b92518d07..928207a83 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ multi-echo functional magnetic resonance imaging (fMRI) data. [![CircleCI](https://circleci.com/gh/ME-ICA/tedana.svg?style=shield)](https://circleci.com/gh/ME-ICA/tedana) [![Documentation Status](https://readthedocs.org/projects/tedana/badge/?version=latest)](http://tedana.readthedocs.io/en/latest/?badge=latest) [![Codecov](https://codecov.io/gh/me-ica/tedana/branch/master/graph/badge.svg)](https://codecov.io/gh/me-ica/tedana) +[![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/ME-ICA/tedana.svg)](http://isitmaintained.com/project/ME-ICA/tedana "Average time to resolve an issue") +[![Percentage of issues still open](http://isitmaintained.com/badge/open/ME-ICA/tedana.svg)](http://isitmaintained.com/project/ME-ICA/tedana "Percentage of issues still open") [![Join the chat at https://gitter.im/ME-ICA/tedana](https://badges.gitter.im/ME-ICA/tedana.svg)](https://gitter.im/ME-ICA/tedana?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Join our tinyletter mailing list](https://img.shields.io/badge/receive-our%20newsletter%20โค%EF%B8%8F-blueviolet.svg)](https://tinyletter.com/tedana-devs) [![All Contributors](https://img.shields.io/badge/all_contributors-14-orange.svg?style=flat-square)](#contributors) @@ -97,30 +99,37 @@ We ask that all contributors to ``tedana`` across all project-related spaces (in Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): - + + - - - - - + + + + + - - - - - + + + + + - - - - + + + + + + + +
Logan Dowdle
Logan Dowdle

๐Ÿ’ป ๐Ÿ’ฌ ๐ŸŽจ ๐Ÿ›
Elizabeth DuPre
Elizabeth DuPre

๐Ÿ’ป ๐Ÿ“– ๐Ÿค” ๐Ÿš‡ ๐Ÿ‘€ ๐Ÿ’ก โš ๏ธ ๐Ÿ’ฌ
Javier Gonzalez-Castillo
Javier Gonzalez-Castillo

๐Ÿค” ๐Ÿ’ป ๐ŸŽจ
Dan Handwerker
Dan Handwerker

๐ŸŽจ ๐Ÿ“– ๐Ÿ’ก ๐Ÿ‘€
Prantik Kundu
Prantik Kundu

๐Ÿ’ป ๐Ÿค”

Logan Dowdle

๐Ÿ’ป ๐Ÿ’ฌ ๐ŸŽจ ๐Ÿ› ๐Ÿ‘€

Elizabeth DuPre

๐Ÿ’ป ๐Ÿ“– ๐Ÿค” ๐Ÿš‡ ๐Ÿ‘€ ๐Ÿ’ก โš ๏ธ ๐Ÿ’ฌ

Javier Gonzalez-Castillo

๐Ÿค” ๐Ÿ’ป ๐ŸŽจ

Dan Handwerker

๐ŸŽจ ๐Ÿ“– ๐Ÿ’ก ๐Ÿ‘€

Prantik Kundu

๐Ÿ’ป ๐Ÿค”
Ross Markello
Ross Markello

๐Ÿ’ป ๐Ÿš‡ ๐Ÿ’ฌ
Taylor Salo
Taylor Salo

๐Ÿ’ป ๐Ÿค” ๐Ÿ“– โœ… ๐Ÿ’ฌ ๐Ÿ› โš ๏ธ ๐Ÿ‘€
Joshua Teves
Joshua Teves

๐Ÿ“† ๐Ÿ“– ๐Ÿ‘€ ๐Ÿšง ๐Ÿ’ป
Kirstie Whitaker
Kirstie Whitaker

๐Ÿ“– ๐Ÿ“† ๐Ÿ‘€ ๐Ÿ“ข
Monica Yao
Monica Yao

๐Ÿ“– โš ๏ธ

Ross Markello

๐Ÿ’ป ๐Ÿš‡ ๐Ÿ’ฌ

Taylor Salo

๐Ÿ’ป ๐Ÿค” ๐Ÿ“– โœ… ๐Ÿ’ฌ ๐Ÿ› โš ๏ธ ๐Ÿ‘€

Joshua Teves

๐Ÿ“† ๐Ÿ“– ๐Ÿ‘€ ๐Ÿšง ๐Ÿ’ป

Kirstie Whitaker

๐Ÿ“– ๐Ÿ“† ๐Ÿ‘€ ๐Ÿ“ข

Monica Yao

๐Ÿ“– โš ๏ธ
Stephan Heunis
Stephan Heunis

๐Ÿ“–
Benoรฎt Bรฉranger
Benoรฎt Bรฉranger

๐Ÿ’ป
Eneko Uruรฑuela
Eneko Uruรฑuela

๐Ÿ’ป
Cesar Caballero Gaudes
Cesar Caballero Gaudes

๐Ÿ“– ๐Ÿ’ป

Stephan Heunis

๐Ÿ“–

Benoรฎt Bรฉranger

๐Ÿ’ป

Eneko Uruรฑuela

๐Ÿ’ป ๐Ÿ‘€ ๐Ÿค”

Cesar Caballero Gaudes

๐Ÿ“– ๐Ÿ’ป

Isla

๐Ÿ‘€

mjversluis

๐Ÿ“–
+ + This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 000000000..61667f5f9 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,5 @@ +ignore: + - "tedana/tests/" + - "tedana/due.py" + - "tedana/_version.py" + - "tedana/info.py" diff --git a/dev_requirements.txt b/dev_requirements.txt index 608d2780b..429e89412 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -2,9 +2,10 @@ sphinx sphinx-argparse sphinx_rtd_theme -coverage +coverage<5.0 flake8>=3.7 numpydoc pytest pytest-cov requests +sphinx diff --git a/dev_tools/run_tests.sh b/dev_tools/run_tests.sh index 6d1181097..b771b89fd 100644 --- a/dev_tools/run_tests.sh +++ b/dev_tools/run_tests.sh @@ -27,6 +27,14 @@ run_three_echo_test() { cprint "THREE-ECHO TEST PASSED !" } +run_four_echo_test() { + # + # Runs tedana four-echo test + cprint "RUNNING four-ECHO TEST" + make four-echo + cprint "FOUR-ECHO TEST PASSED !" +} + run_five_echo_test() { # # Runs tedana five-echo test @@ -66,6 +74,7 @@ run_all_tests() { run_lint_tests run_unit_tests run_three_echo_test + run_four_echo_test run_five_echo_test cprint "FINISHED RUNNING ALL TESTS -- GREAT SUCCESS !" diff --git a/docs/acquisition.rst b/docs/acquisition.rst new file mode 100644 index 000000000..8cceb6803 --- /dev/null +++ b/docs/acquisition.rst @@ -0,0 +1,181 @@ +Acquiring multi-echo data +========================= + +Available multi-echo fMRI sequences +----------------------------------- +We have attempted to compile some basic multi-echo fMRI protocols in an `OSF project`_. +The parameter choices in these protocols run and seem reasonable, but they have +not been optimized for a specific situation. +They are a good starting point for someone designing a study, but should not be +considered canonical. +If you would like to use one of them, please customize it for your own purposes +and make sure to run pilot scans to test your choices. + +.. _OSF project: https://osf.io/ebkrp/ + +Siemens +``````` +**For Siemens** users, there are two options for Works In Progress (WIPs) Sequences. + +* | The Center for Magnetic Resonance Research at the University of Minnesota + | provides a custom MR sequence that allows users to collect multiple echoes + | (termed **Contrasts**). The sequence and documentation can be `found here`_. + | For details on obtaining a license follow `this link`_. + | By default the number of contrasts is 1, yielding a single-echo sequence. + | In order to collect multiple echoes, increase number of Contrasts on the + | **Sequence Tab, Part 1** on the MR console. +* | The Martinos Center at Harvard also has a MR sequence available, with the + | details `available here`_. The number of echoes can be specified on the + | **Sequence, Special** tab in this sequence. + +.. _found here: https://www.cmrr.umn.edu/multiband/ +.. _this link: http://license.umn.edu/technologies/cmrr_center-for-magnetic-resonance-research-software-for-siemens-mri-scanners +.. _available here: https://www.nmr.mgh.harvard.edu/software/c2p/sms + + +GE +`` +**For GE users**, there are currently two sharable pulse sequences: + +* Multi-echo EPI (MEPI) โ€“ Software releases: DV24, MP24 and DV25 (with offline recon) +* | Hyperband Multi-echo EPI (HyperMEPI) - Software releases: DV26, MP26, DV27, RX27 + | (here hyperband can be deactivated to do simple Multi-echo EPI โ€“ online recon) + +Please reach out to the GE Research Operation team or each pulse sequenceโ€™s +author to begin the process of obtaining this software. +More information can be found on the `GE Collaboration Portal`_ + +Once logged in, go to Groups > GE Works-in-Progress you can find the description +of the current ATSM (i.e. prototypes). + +.. _GE Collaboration Portal: https://collaborate.mr.gehealthcare.com + +Philips +``````` +**For Philips** users, sequences can be defined using product software. + +Multi-echo EPI (ME-EPI) can be acquired using the product software and can be combined with +SENSE parallel imaging and MultiBand. +The combination with MultiBand requires a SW release >R5.1 and MultiBand functionality to be present. +No default ME-EPI are provided, but existing single-echo EPI sequences from the BOLD fMRI folder can be +modified into multi-echo sequences by increasing the number of echoes. +As a starting point to develop a 3 echo EPI protocol start by opening the default fMRI protocol and +modify the following: increase number of echoes to 3 on the Contrast tab, set SENSE = 3, MB-SENSE = 3, +set to 3mm isotropic voxels and adjust TEs to your preference. + + +Other available multi-echo MRI sequences +---------------------------------------- +In addition to ME-fMRI, other MR sequences benefit from acquiring multiple +echoes, including T1-weighted imaging (MEMPRAGE) and susceptibility weighted imaging. +While most of these kinds of sequences fall outside the purview of this documentation, +quantitative T2* mapping is relevant since a baseline T2* map is used in several +processing steps including :ref:`optimal combination`. +While the T2* map estimated directly from fMRI time series is noisy, no current +study quantifies the benefit to optimal combination or tedana denoising if a +higher quality T2* map is used. +Some benefit is likely, so, if a T2* map is independently calculated, it can be +used as an input to many functions in the tedana workflow. + +.. warning:: + While tedana allows the input of a T2* map from any source, and a more + accurate T2* map should lead to better results, this hasn't been + systematically evaluated yet. + +There are many ways to calculate T2* maps, with some using multi-echo acquisitions. +We are not presenting an expansive review of this literature here, +but `Cohen-Adad et al. (2012)`_ and `Ruuth et al. (2019)`_ are good places to start +learning more about this topic. + +.. _Cohen-Adad et al. (2012): https://doi.org/10.1016/j.neuroimage.2012.01.053 +.. _Ruuth et al. (2019): https://doi.org/10.1016/j.ejro.2018.12.006 + + +Acquisition parameter recommendations +------------------------------------- +There is no empirically tested best parameter set for multi-echo fMRI acquisition. +The guidelines for optimizing parameters are similar to single-echo fMRI. +For multi-echo fMRI, the same factors that may guide priorities for single echo +fMRI sequences are also relevant. +Choose sequence parameters that meet the priorities of a study with regards to spatial resolution, +spatial coverage, sample rate, signal-to-noise ratio, signal drop-out, distortion, and artifacts. + +A minimum of 3 echoes is required for running the current implementation fo TE-dependent denoising in +``tedana``. +It may be useful to have at least one echo that is earlier and one echo that is later than the +TE one would use for single-echo T2* weighted fMRI. + +.. note:: + This is in contrast to the **dual echo** denoising method which uses a very early (~5ms) + first echo in order to clean data. For more information on this method, see `Bright and Murphy`_ (2013). + +.. _Bright and Murphy: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518782/ + +More than 3 echoes may be useful, because that would allow for more accurate +estimates of BOLD and non-BOLD weighted fluctuations, but more echoes have an +additional time cost, which would result in either less spatiotemporal coverage +or more acceleration. +Where the benefits of more echoes balance out the additional costs is an open research question. + +We are not recommending specific parameter options at this time. +There are multiple ways to balance the slight time cost from the added echoes that have +resulted in research publications. +We suggest new multi-echo fMRI users examine the :ref:`spreadsheet of publications` that use +multi-echo fMRI to identify studies with similar acquisition priorities, +and use the parameters from those studies as a starting point. +More complete recommendations +and guidelines are discussed in the `appendix`_ of Dipasquale et al, 2017. + +.. _appendix: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0173289 + +.. note:: + In order to increase the number of contrasts ("echoes") you may need to first increase the TR, shorten the + first TE and/or enable in-plane acceleration. + For typically used parameters see the **ME-fMRI parameters** section below. + + +.. _common multi-echo parameters: + +ME-fMRI parameters +------------------ +The following section highlights a selection of parameters collected from published papers that have +used multi-echo fMRI. +You can see the spreadsheet of publications at :ref:`spreadsheet of publications`. + +The following plots reflect the average values for studies conducted at 3 Tesla. + +.. plot:: + + import matplotlib.pyplot as plt + import pandas as pd + import numpy as np + # TODO deal with the issue that the plot doesn't regenterate (ie isn't alive) + # Unless the code is updated. + metable = pd.read_csv('https://docs.google.com/spreadsheets/d/1WERojJyxFoqcg_tndUm5Kj0H1UfUc9Ban0jFGGfPaBk/export?gid=0&format=csv', + header=0) + TEs = [metable.TE1.mean(), metable.TE2.mean(), metable.TE3.mean(), metable.TE4.mean(), metable.TE5.mean()] + TE_labels = ['TE1', 'TE2', 'TE3', 'TE4', 'TE5'] + plt.bar([1, 2, 3, 4, 5], TEs) + plt.title('Echo Times', fontsize=18) + pub_count = metable.TE1.count() + plt.text(0.5,60, 'Average from {} studies'.format(pub_count)) + plt.xlabel('Echo Number') + plt.ylabel('Echo Time (ms)') + plt.show() + + + plt.hist(metable.TR.to_numpy()) + plt.title('Repetition Times', fontsize = 18) + plt.xlabel('Repetition Time (s)') + plt.ylabel('Count') + plt.show() + + + x_vox = metable.x.to_numpy() + y_vox = metable.y.to_numpy() + z_vox = metable.z.to_numpy() + plt.hist(np.nanmean([x_vox, y_vox, z_vox],0)) + plt.title('Voxel Dimensions', fontsize = 18) + plt.xlabel('Average Voxel dimension (mm)') + plt.ylabel('Count') + plt.show() diff --git a/docs/api.rst b/docs/api.rst index c44365ca7..e8627a9ad 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -93,11 +93,10 @@ API .. autosummary:: :toctree: generated/ - :template: function.rst - - tedana.metrics.dependence_metrics - tedana.metrics.kundu_metrics + :template: module.rst + tedana.metrics.collect + tedana.metrics.dependence .. _api_selection_ref: @@ -116,6 +115,7 @@ API tedana.selection.manual_selection tedana.selection.kundu_selection_v2 + tedana.selection.kundu_tedpca .. _api_gscontrol_ref: @@ -152,10 +152,13 @@ API :toctree: generated/ :template: function.rst - tedana.io.split_ts - tedana.io.filewrite tedana.io.load_data + tedana.io.filewrite tedana.io.new_nii_like + tedana.io.save_comptable + tedana.io.load_comptable + tedana.io.add_decomp_prefix + tedana.io.split_ts tedana.io.write_split_ts tedana.io.writefeats tedana.io.writeresults @@ -199,6 +202,10 @@ API tedana.utils.andb tedana.utils.dice + tedana.utils.get_spectrum tedana.utils.load_image tedana.utils.make_adaptive_mask + tedana.utils.threshold_map tedana.utils.unmask + tedana.utils.sec2millisec + tedana.utils.millisec2sec diff --git a/docs/approach.rst b/docs/approach.rst index 650796a9a..b41048893 100644 --- a/docs/approach.rst +++ b/docs/approach.rst @@ -1,7 +1,7 @@ The tedana pipeline =================== -``tedana`` works by decomposing multi-echo BOLD data via priniciple component analysis (PCA) +``tedana`` works by decomposing multi-echo BOLD data via principal component analysis (PCA) and independent component analysis (ICA). The resulting components are then analyzed to determine whether they are TE-dependent or -independent. @@ -13,8 +13,8 @@ and decompose the resulting data into components that can be classified as BOLD or non-BOLD. This is performed in a series of steps, including: -* Principal components analysis -* Independent components analysis +* Principal component analysis +* Independent component analysis * Component classification .. image:: /_static/tedana-workflow.png @@ -67,11 +67,11 @@ value for that voxel in the adaptive mask. Monoexponential decay model fit ``````````````````````````````` The next step is to fit a monoexponential decay model to the data in order to -estimate voxel-wise :math:`T_{2}^*` and :math:`S_0`. -:math:`S_0` corresponds to the total signal in each voxel before decay and can reflect coil sensivity. +estimate voxel-wise :math:`T_{2}^*` and :math:`S_0`. +:math:`S_0` corresponds to the total signal in each voxel before decay and can reflect coil sensivity. :math:`T_{2}^*` corresponds to the rate at which a voxel decays over time, which -is related to signal dropout and BOLD sensitivity. -Estimates of the parameters are saved as **t2sv.nii.gz** and **s0v.nii.gz**. +is related to signal dropout and BOLD sensitivity. +Estimates of the parameters are saved as **t2sv.nii.gz** and **s0v.nii.gz**. While :math:`T_{2}^*` and :math:`S_0` in fact fluctuate over time, estimating them on a volume-by-volume basis with only a small number of echoes is not @@ -90,10 +90,10 @@ The echo times are also multiplied by -1. .. note:: - It is now possible to do a nonlinear monoexponential fit to the original, untransformed - data values by specifiying ``--fittype curvefit``. + It is now possible to do a nonlinear monoexponential fit to the original, untransformed + data values by specifiying ``--fittype curvefit``. This method is slightly more computationally demanding but may obtain more - accurate fits. + accurate fits. .. image:: /_static/a04_echo_log_value_distributions.png @@ -150,14 +150,14 @@ For the example voxel, the resulting weights are: :align: center These normalized weights are then used to compute a weighted average that takes advantage -of the higher signal in earlier echoes and the heigher sensitivty at later echoes. +of the higher signal in earlier echoes and the higher sensitivity at later echoes. The distribution of values for the optimally combined data lands somewhere between the distributions for other echoes. .. image:: /_static/a09_optimal_combination_value_distributions.png The time series for the optimally combined data also looks like a combination -of the other echoes (which it is). +of the other echoes (which it is). This optimally combined data is written out as **ts_OC.nii.gz** `Optimal combination code`_ @@ -180,24 +180,25 @@ This optimally combined data is written out as **ts_OC.nii.gz** Denoising ````````` -The next step is an attempt to remove noise from the data. -This process can be -broadly seperated into three steps: **decomposition**, **metric calculation** and -**component selection**. -Decomposition reduces the dimensionality of the -optimally combined data using `Principal Components Analysis (PCA)`_ and then an `Independent Components Analysis (ICA)`_. -Metrics which highlights the -TE-dependence or indepence are derived from these components. -Component selection -uses these metrics in order to identify components that should be kept in the data -or discarded. -Unwanted components are then removed from the optimally combined data -to produce the denoised data output. +The next step is an attempt to remove noise from the data. +This process can be broadly separated into three steps: **decomposition**, +**metric calculation** and **component selection**. +Decomposition reduces the dimensionality of the optimally combined data using +`principal component analysis (PCA)`_ and then an `independent component analysis (ICA)`_. +Metrics which highlights the TE-dependence or independence are derived from these components. +Component selection uses these metrics in order to identify components that +should be kept in the data or discarded. +Unwanted components are then removed from the optimally combined data +to produce the denoised data output. + +.. _principal component analysis (PCA): https://en.wikipedia.org/wiki/Principal_component_analysis +.. _independent component Analysis (ICA): https://en.wikipedia.org/wiki/Independent_component_analysis + TEDPCA `````` The next step is to dimensionally reduce the data with TE-dependent principal -components analysis (PCA). +component analysis (PCA). The goal of this step is to make it easier for the later ICA decomposition to converge. Dimensionality reduction is a common step prior to ICA. TEDPCA applies PCA to the optimally combined data in order to decompose it into component maps and @@ -207,20 +208,28 @@ Here we can see time series for some example components (we don't really care ab .. image:: /_static/a11_pca_component_timeseries.png These components are subjected to component selection, the specifics of which -vary according to algorithm. +vary according to algorithm. Specifically, ``tedana`` offers two different approaches that perform this step. + +The simplest approach (the default `mdl`, `aic` and `kic` options for `--tedpca`) is based on a Moving Average (stationary Gaussian) process +proposed by `Li et al (2007)`_. A moving average process is the output of a linear system (which in this case is +a smoothing filter) that has an independent and identically distributed Gaussian process as the input. If we assume that the linear system is shift +invariant, the moving average process is a stationary Gaussian random process. Simply put, this process more optimally +selects the number of components for fMRI data following a subsampling scheme described in `Li et al (2007)`_. The +selection of components is performed with either of the three options provided by `--tedpca`: -In the simplest approach, ``tedana`` uses Minkaโ€™s MLE to estimate the -dimensionality of the data, which disregards low-variance components (the `mle` option in for `--tedpca`). +* `aic`: the Akaike Information Criterion, which is the least aggressive option; i.e., returns the largest number of components. +* `kic`: the Kullback-Leibler Information Criterion, which stands in the middle in terms of aggressiveness. +* `mdl`: the Minimum Description Length, which is the most aggressive (and recommended) option. A more complicated approach involves applying a decision tree (similar to the decision tree described in the TEDICA section below) to identify and discard PCA components which, in addition to not explaining much variance, are also not significantly TE-dependent (i.e., have low Kappa) or -TE-independent (i.e., have low Rho). -These approaches can be accessed using either the `kundu` or `kundu_stabilize` -options for the `--tedpca` flag. -For a more thorough explanation of this approach, consider the supplemental information -in `Kundu et al (2013)`_ +TE-independent (i.e., have low Rho). +These approaches can be accessed using either the `kundu` or `kundu_stabilize` +options for the `--tedpca` flag. +For a more thorough explanation of this approach, consider the supplemental information +in `Kundu et al (2013)`_ After component selection is performed, the retained components and their associated betas are used to reconstruct the optimally combined data, resulting @@ -235,23 +244,23 @@ in a dimensionally reduced version of the dataset which is then used in the `TED TEDICA `````` -Next, ``tedana`` applies TE-dependent independent components analysis (ICA) in +Next, ``tedana`` applies TE-dependent independent component analysis (ICA) in order to identify and remove TE-independent (i.e., non-BOLD noise) components. The dimensionally reduced optimally combined data are first subjected to ICA in -order to fit a mixing matrix to the whitened data. -This generates a number of -independent timeseries (saved as **meica_mix.1D**), as well as beta maps which show -the spatial loading of these components on the brain (**betas_OC.nii.gz**). +order to fit a mixing matrix to the whitened data. +This generates a number of +independent timeseries (saved as **meica_mix.1D**), as well as beta maps which show +the spatial loading of these components on the brain (**betas_OC.nii.gz**). .. image:: /_static/a13_ica_component_timeseries.png Linear regression is used to fit the component time series to each voxel in each -of the original, echo-specific data. -This results in echo- and voxel-specific +of the original, echo-specific data. +This results in echo- and voxel-specific betas for each of the components. -The beta values from the linear regression -can be used to determine how the fluctutations (in each component timeseries) change -across the echo times. +The beta values from the linear regression +can be used to determine how the fluctuations (in each component timeseries) change +across the echo times. TE-dependence (:math:`R_2` or :math:`1/T_{2}^*`) and TE-independence (:math:`S_0`) models can then be fit to these betas. @@ -270,14 +279,14 @@ The grey lines show how beta values (Parameter Estimates) change over time. Refe A decision tree is applied to :math:`\kappa`, :math:`\rho`, and other metrics in order to classify ICA components as TE-dependent (BOLD signal), TE-independent -(non-BOLD noise), or neither (to be ignored). -These classifications are saved in +(non-BOLD noise), or neither (to be ignored). +These classifications are saved in `comp_table_ica.txt`. The actual decision tree is dependent on the component selection algorithm employed. ``tedana`` includes the option `kundu` (which uses hardcoded thresholds applied to each of the metrics). -Components that are classified as noise are projected out of the optimally combined data, +Components that are classified as noise are projected out of the optimally combined data, yielding a denoised timeseries, which is saved as `dn_ts_OC.nii.gz`. `TEDICA code`_ @@ -313,3 +322,4 @@ Currently, ``tedana`` implements GSR and T1c-GSR. .. _physics section: https://tedana.readthedocs.io/en/latest/multi_echo.html .. _Kundu et al (2013): https://www.ncbi.nlm.nih.gov/pubmed/24038744 +.. _Li et al (2007): https://onlinelibrary.wiley.com/doi/abs/10.1002/hbm.20359 diff --git a/docs/conf.py b/docs/conf.py index aa4eb12eb..081fd9e78 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -114,8 +114,10 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -# -# html_theme_options = {} + +html_theme_options = { + 'includehidden': False, +} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/considerations.rst b/docs/considerations.rst deleted file mode 100644 index b4c4c11f8..000000000 --- a/docs/considerations.rst +++ /dev/null @@ -1,262 +0,0 @@ -########################## -Considerations for ME-fMRI -########################## -Multi-echo fMRI acquisition sequences and analysis methods are rapidly maturing. -Someone who has access to a multi-echo fMRI sequence should seriously consider using it. - -The possible costs and benefits of multi-echo fMRI -================================================== -The following are a few points to consider when deciding whether or not to collect multi-echo data. - -Possible increase in TR ------------------------ -The one difference with multi-echo is a slight time cost. -For multi-echo fMRI, the shortest echo time (TE) is essentially free since it is collected in the -gap between the RF pulse and the single-echo acquisition. -The second echo tends to roughly match the single-echo TE. -Additional echoes require more time. -For example, on a 3T MRI, if the T2* weighted TE is 30ms for single echo fMRI, -a multi-echo sequence may have TEs of 15.4, 29.7, and 44.0ms. -In this example, the extra 14ms of acquisition time per RF pulse is the cost of multi-echo fMRI. - -One way to think about this cost is in comparison to single-echo fMRI. -If a multi-echo sequence has identical spatial resolution and acceleration as a single-echo sequence, -then a rough rule of thumb is that the multi-echo sequence will have 10% fewer slices or 10% longer TR. -Instead of compromising on slice coverage or TR, one can increase acceleration. -If one increases acceleration, it is worth doing an empirical comparison to make sure there -isn't a non-trivial loss in SNR or an increase of artifacts. - -Weighted Averaging may lead to an increase in SNR -------------------------------------------------- -Multiple studies have shown that a -weighted average of the echoes to optimize T2* weighting, sometimes called "optimally combined," -gives a reliable, modest boost in data quality. -The optimal combination of echoes can currently be calculated in several software packages including AFNI, -fMRIPrep, and tedana. In tedana, the weighted -average can be calculated with `t2smap`_ If no other -acquisition compromises are necessary to acquire multi-echo data, this boost is worthwhile. - -Consider the life of the dataset --------------------------------- -If other -compromises are necessary, consider the life of the data set. -If data is being acquired for a discrete -study that will be acquired, analyzed, and published in a year or two, it might not be worth making -compromises to acquire multi-echo data. -If a data set is expected to be used for future analyses in later -years, it is likely that more powerful approaches to multi-echo denoising will sufficiently mature and add -even more value to a data set. - -Other multi-echo denoising methods, such as MEICA, the predecessor to tedana, have shown the potential for -much greater data quality improvements, as well as the ability to more accurately separate visually similar -signal vs noise, such as scanner based drifts vs slow changes in BOLD signal. -More powerful methods are -still being improved, and associated algorithms are still being actively developed. -Users need to have the time and knowledge to look -at the denoising output from every run to make sure denoising worked as intended. - -You may recover signal in areas affected by dropout ---------------------------------------------------- -Typical single echo fMRI uses an echo time that is appropriate for signal across most of the brain. -While this is effective -it also leads to drop out in regions with low :math:T_2^* values. -This can lead to low or even no signal at all in some areas. -If your research question could benefit from having either -improved signal characteristics in regions such as the orbitofrontal cortex, ventral temporal cortex or -the ventral striatum them multi-echo fMRI may be beneficial. - -Consider the cost of added quality control ------------------------------------------- -The developers of ``tedana`` strongly support always examining data for quality concerns, whether -or not multi-echo fMRI is used. -Multi-echo data and denoising are no exception. -For this purpose, ``tedana`` currently produces basic diagnostic images by default, which can be -inspected in order to determine the quality of denoising. -`See outputs`_ for more information on these outputs. - -.. _t2smap: https://tedana.readthedocs.io/en/latest/usage.html#run-t2smap -.. _see outputs: https://tedana.readthedocs.io/en/latest/outputs.html - -Acquisition Parameter Recommendations -===================================== -There is no empirically tested best parameter set for multi-echo acquisition. -The guidelines for optimizing parameters are similar to single-echo fMRI. -For multi-echo fMRI, the same factors that may guide priorities for single echo -fMRI sequences are also relevant. -Choose sequence parameters that meet the priorities of a study with regards to spatial resolution, -spatial coverage, sample rate, signal-to-noise ratio, signal drop-out, distortion, and artifacts. - -A minimum of 3 echoes is required for running the current implementation fo TE-dependent denoising in -``tedana``. -It may be useful to have at least one echo that is earlier and one echo that is later than the -TE one would use for single-echo T2* weighted fMRI. - -.. note:: - This is in contrast to the **dual echo** denoising method which uses a very early (~5ms) - first echo in order to clean data. For more information on this method, see `Bright and Murphy`_ (2013). - -.. _Bright and Murphy: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518782/ - -More than 3 echoes may be useful, because that would allow for more accurate -estimates of BOLD and non-BOLD weighted fluctuations, but more echoes have an -additional time cost, which would result in either less spatiotemporal coverage -or more acceleration. -Where the benefits of more echoes balance out the additional costs is an open research question. - -We are not recommending specific parameter options at this time. -There are multiple ways to balance the slight time cost from the added echoes that have -resulted in research publications. -We suggest new multi-echo fMRI users examine the :ref:`spreadsheet of publications` that use -multi-echo fMRI to identify studies with similar acquisition priorities, -and use the parameters from those studies as a starting point. -More complete recomendations -and guidelines are discussed in the `appendix`_ of Dipasquale et al, 2017. - -.. _appendix: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0173289 - -.. _found here: https://www.cmrr.umn.edu/multiband/ -.. _this link: http://license.umn.edu/technologies/cmrr_center-for-magnetic-resonance-research-software-for-siemens-mri-scanners -.. _available here: https://www.nmr.mgh.harvard.edu/software/c2p/sms -.. _GE Collaboration Portal: https://collaborate.mr.gehealthcare.com -.. note:: - In order to increase the number of contrasts ("echoes") you may need to first increase the TR, shorten the - first TE and/or enable in-plane acceleration. - For typically used parameters see the `parameters and publications page`_ -.. _parameters and publications page: https://tedana.readthedocs.io/en/latest/publications.html - -Resources -========= - -Journal articles ----------------- -* | :ref:`spreadsheet of publications` catalogues papers using multi-echo fMRI, - with information about acquisition parameters. -* | `Multi-echo acquisition`_ - | Posse, NeuroImage 2012 - | Includes an historical overview of multi-echo acquisition and research -* | `Multi-Echo fMRI A Review of Applications in fMRI Denoising and Analysis of BOLD Signals`_ - | Kundu et al, NeuroImage 2017 - | A review of multi-echo denoising with a focus on the MEICA algorithm -* | `Enhanced identification of BOLD-like componenents with MESMS and MEICA`_ - | Olafsson et al, NeuroImage 2015 - | The appendix includes a good explanation of the math underlying MEICA denoising -* | `Comparing resting state fMRI de-noising approaches using multi- and single-echo acqusitions`_ - | Dipasquale et al, PLoS One 2017 - | The appendix includes some recommendations for multi-echo acqusition - -.. _Multi-echo acquisition: https://www.ncbi.nlm.nih.gov/pubmed/22056458 -.. _Multi-Echo fMRI A Review of Applications in fMRI Denoising and Analysis of BOLD Signals: https://www.ncbi.nlm.nih.gov/pubmed/28363836 -.. _Enhanced identification of BOLD-like componenents with MESMS and MEICA: https://www.ncbi.nlm.nih.gov/pubmed/25743045 -.. _Comparing resting state fMRI de-noising approaches using multi- and single-echo acqusitions: https://www.ncbi.nlm.nih.gov/pubmed/28323821 - -Videos ------- -* An `educational session from OHBM 2017`_ by Dr. Prantik Kundu about multi-echo denoising -* A `series of lectures from the OHBM 2017 multi-echo session`_ on multiple facets of multi-echo data analysis -* | Multi-echo fMRI lecture from the `2018 NIH FMRI Summer Course`_ by Javier Gonzalez-Castillo - | `Slides from 2018 NIH FMRI Summer Course`_ - -.. _educational session from OHBM 2017: https://www.pathlms.com/ohbm/courses/5158/sections/7788/video_presentations/75977 -.. _series of lectures from the OHBM 2017 multi-echo session: https://www.pathlms.com/ohbm/courses/5158/sections/7822 -.. _2018 NIH FMRI Summer Course: https://fmrif.nimh.nih.gov/course/fmrif_course/2018/14_Javier_20180713 -.. _Slides from 2018 NIH FMRI Summer Course: https://fmrif.nimh.nih.gov/COURSE/fmrif_course/2018/content/14_Javier_20180713.pdf - -Available multi-echo fMRI sequences for multiple vendors --------------------------------------------------------- - -**For Siemens** users, there are two options for Works In Progress (WIPs) Sequences. -The Center for Magnetic Resonance Research at the University of Minnesota -provides a custom MR sequence that allows users to collect multiple echoes -(termed **Contrasts**). -The sequence and documentation can be `found here`_. For details -on obtaining a license follow `this link`_. -By default the number of contrasts is 1, -yielding a signal echo sequence. -In order to collect multiple echoes, increase number of -Contrasts on the **Sequence Tab, Part 1** on the MR console. - -In addition, the Martinos Center at Harvard also has a MR sequence available, with the -details `available here`_. -The number of echoes can be specified on the **Sequence, Special** tab -in this sequence. - -**For GE users**, there are currently two sharable pulse sequences: - -Multi-echo EPI (MEPI) โ€“ Software releases: DV24, MP24 and DV25 (with offline recon) -Hyperband Multi-echo EPI (HyperMEPI) - Software releases: DV26, MP26, DV27, RX27 -(here Hyperband can be deactivated to do simple Multi-echo EPI โ€“ online recon) - -Please reach out to the GE Research Operation team or each pulse sequenceโ€™s -author to begin the process of obtaining this software. -More information can be -found on the `GE Collaboration Portal`_ - -Once logged-in, go to Groups > GE Works-in-Progress you can find the description of the current ATSM (i.e. prototypes) - -Multi-echo preprocessing software ---------------------------------- - -tedana requires data that has already been preprocessed for head motion, alignment, etc. - -AFNI can process multi-echo data natively as well as apply tedana denoising through the use of -**afni_proc.py**. To see various implementations, start with Example 12 in the `afni_proc.py help`_ - -.. _afni_proc.py help: https://afni.nimh.nih.gov/pub/dist/doc/program_help/afni_proc.py.html - -`fmriprep` can also process multi-echo data, but is currently limited to using the optimally combined -timeseries. -For more details, see the `fmriprep workflows page`_ - -.. _fmriprep workflows page: https://fmriprep.readthedocs.io/en/stable/workflows.html - -Currently SPM and FSL do not natively support mutli-echo fmri data processing. - -Other software that uses multi-echo fMRI -======================================== - -``tedana`` represents only one approach to processing multiecho data. -Currently there are a number of methods that can take advantage of or use the information contain in multi-echo data. -These include: - -`3dMEPFM`_: A multi-echo implemntation of 'paradigm free mapping', that is detection of neural events in the absense of -a prespecified model. -By leveraging the information present in multiecho data, changes in relaxation time can be directly esimated and -more events can be detected. For more information, see the `following paper`_. - -.. _3dMEPFM: https://afni.nimh.nih.gov/pub/dist/doc/program_help/3dMEPFM.html -.. _following paper: https://www.sciencedirect.com/science/article/pii/S105381191930669X - -`Bayesian approach to denoising`_: An alternative approach to seperating out BOLD and non-BOLD signals within a Bayesian -framework is currently under development. - -.. _Bayesian approach to denoising: https://ww5.aievolution.com/hbm1901/index.cfm?do=abs.viewAbs&abs=5026 - -`Multi-echo Group ICA`_: Current approches to ICA just use a single run of data in order to perform denoising. An alternative -approach is to use information from multiple subjects or multiple runs from a single subject in order to improve the -classification of BOLD and non-BOLD components. - -.. _Multi-echo Group ICA: https://ww5.aievolution.com/hbm1901/index.cfm?do=abs.viewAbs&abs=1286 - -`Dual Echo Denoising`_: If the first echo can be collected early enough, there are currently methods that take advantage of the -very limited BOLD weighting at these early echo times. - -.. _Dual Echo Denoising: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518782/ - - - -Datasets --------- -A number of multi-echo datasets have been made public so far. -This list is not necessarily up-to-date, so please check out OpenNeuro to potentially find more. - -* `Multi-echo fMRI replication sample of autobiographical memory, prospection and theory of mind reasoning tasks`_ -* `Multi-echo Cambridge`_ -* `Multiband multi-echo imaging of simultaneous oxygenation and flow timeseries for resting state connectivity`_ -* `Valence processing differs across stimulus modalities`_ -* `Cambridge Centre for Ageing Neuroscience (Cam-CAN)`_ - -.. _Multi-echo fMRI replication sample of autobiographical memory, prospection and theory of mind reasoning tasks: https://openneuro.org/datasets/ds000210/ -.. _Multi-echo Cambridge: https://openneuro.org/datasets/ds000258 -.. _Multiband multi-echo imaging of simultaneous oxygenation and flow timeseries for resting state connectivity: https://openneuro.org/datasets/ds000254 -.. _Valence processing differs across stimulus modalities: https://openneuro.org/datasets/ds001491 -.. _Cambridge Centre for Ageing Neuroscience (Cam-CAN): https://camcan-archive.mrc-cbu.cam.ac.uk/dataaccess/ diff --git a/docs/developing.rst b/docs/developing.rst new file mode 100644 index 000000000..1cb47cc46 --- /dev/null +++ b/docs/developing.rst @@ -0,0 +1,227 @@ +==================== +Developer Guidelines +==================== + +This webpage is intended to guide users through making making changes to +``tedana``'s codebase, in particular working with tests. +The worked example also offers some guidelines on approaching testing when +adding new functions. +Please check out our `contributing guide`_ for getting started. + + +Monthly Developer Calls +======================= +We run monthly developer calls via Zoom. +You can see the schedule via the tedana `google calendar`_. + +Everyone is welcome. +We look forward to meeting you there! + + +Adding and Modifying Tests +========================== +Testing is an important component of development. +For simplicity, we have migrated all tests to ``pytest``. +There are two basic kinds of tests: unit and integration tests. +Unit tests focus on testing individual functions, whereas integration tests focus on making sure +that the whole workflow runs correctly. + +Unit Tests +---------- +For unit tests, we try to keep tests from the same module grouped into one file. +Make sure the function you're testing is imported, then write your test. +Good tests will make sure that edge cases are accounted for as well as common cases. +You may also use ``pytest.raises`` to ensure that errors are thrown for invalid inputs to a +function. + +Integration Tests +----------------- +Adding integration tests is relatively rare. +An integration test will be a complete multi-echo dataset called with some set of options to ensure +end-to-end pipeline functionality. +These tests are relatively computationally expensive but aid us in making sure the pipeline is +stable during large sets of changes. +If you believe you have a dataset that will test ``tedana`` more completely, please open an issue +before attempting to add an integration test. +After securing the appropriate permission from the dataset owner to share it with ``tedana``, you +can use the following procedure: + +(1) Make a ``tar.gz`` file which will unzip to be only the files you'd like to +run a workflow on. +You can do this with the following, which would make an archive ``my_data.tar.gz``: + +.. code-block:: bash + + tar czf my_data.tar.gz my_data/*.nii.gz + +(2) Run the workflow with a known-working version, and put the outputs into a text file inside +``$TEDANADIR/tedana/tests/data/``, where ``TEDANADIR`` is your local ``tedana repository``. +We encourage using the convention ``__echo_outputs.txt``, appending ``verbose`` +to the filename if the integration test uses ``tedana`` in the verbose mode. + +(3) Write a test function in ``test_integration.py``. +To write the test function you can follow the model of our `five echo set`_, which takes the following steps: + +1. Check if a pytest user is skipping integration, skip if so +#. Use ``download_test_data`` to retrieve the test data from OSF +#. Run a workflow +#. Use ``resources_filename`` and ``check_integration_outputs`` to compare your expected output to + actual output. + +(4) If you need to upload new data, you will need to contact the maintainers and ask them to either add +it to the `tedana OSF project`_ or give you permission to add it. + +(5) Once you've tested your integration test locally and it is working, you will need to add it to the +CircleCI config and the ``Makefile``. +Following the model of the three-echo and five-echo sets, define a name for your integration test +and on an indented line below put + +.. code-block:: bash + + @py.test --cov-append --cov-report term-missing --cov=tedana -k TEST + +with ``TEST`` your test function's name. +This call basically adds code coverage reports to account for the new test, and runs the actual +test in addition. + +(6) Using the five-echo set as a template, you should then edit ``.circlec/config.yml`` to add your +test, calling the same name you define in the ``Makefile``. + +Viewing CircleCI Outputs +------------------------ +If you need to take a look at a failed test on CircleCI rather than locally, you can use the +following block to retrieve artifacts (see CircleCI documentation here_) + +.. code-block:: bash + + export CIRCLE_TOKEN=':your_token' + + curl https://circleci.com/api/v1.1/project/:vcs-type/:username/:project/$build_number/artifacts?circle-token=$CIRCLE_TOKEN \ + | grep -o 'https://[^"]*' \ + | sed -e "s/$/?circle-token=$CIRCLE_TOKEN/" \ + | wget -v -i - + +To get a CircleCI token, follow the instructions for `getting one`_. +You cannot do this unless you are part of the ME-ICA/tedana organization. +If you don't want all of the artifacts, you can go to the test details and use the browser to +manually select the files you would like. + +Worked Example +============== +Suppose we want to add a function in ``tedana`` that creates a file called ```hello_world.txt`` to +be stored along the outputs of the ``tedana`` workflow. + +First, we merge the repository's ``master`` branch into our own to make sure we're up to date, and +then we make a new branch called something like ``feature/say_hello``. +Any changes we make will stay on this branch. +We make the new function and call it ``say_hello`` and locate this function inside of ``io.py``. +We'll also need to make a unit test. +(Some developers actually make the unit test before the new function; this is a great way to make +sure you don't forget to create it!) +Since the function lives in ``io.py``, its unit test should go into ``test_io.py``. +The job of this test is exclusively to tell if the function we wrote does what it claims to do +without errors. +So, we define a new function in ``test_io.py`` that looks something like this: + +.. code-block:: python + + def test_say_hello(): + # run the function + say_hello() + # test the function + assert op.exists('hello_world.txt') + # clean up + os.remove('hello_world.txt') + +We should see that our unit test is successful via + +.. code-block:: bash + + pytest $TEDANADIR/tedana/tests/test_io.py -k test_say_hello + +If not, we should continue editing the function until it passes our test. +Let's suppose that suddenly, you realize that what would be even more useful is a function that +takes an argument, ``place``, so that the output filename is actually ``hello_PLACE``, with +``PLACE`` the value passed and ``'world'`` as the default value. +We merge any changes from the upstream master branch into our branch via + +.. code-block:: bash + + git checkout feature/say_hello + git fetch upstream master + git merge upstream/master + +and then begin work on our test. +We need to our unit test to be more complete, so we update it to look more like the following, +adding several cases to make sure our function is robust to the name supplied: + +.. code-block:: python + + def test_say_hello(): + # prefix of all files to be checked + prefix = 'hello_' + # suffix of all files to be checked + suffix = '.txt' + # run the function with several cases + for x in ['world', 'solar system', 'galaxy', 'universe']: + # current test name + outname = prefix + x + suffix + # call the function + say_hello(x) + # test the function + assert op.exists(outname) + # clean up from this call + os.remove(outname) + +Once that test is passing, we may need to adjust the integration test. +Our program creates a file, ``hello_world.txt``, which the older version would not have produced. +Therefore, we need to add the file to ``$TEDANADIR/tedana/tests/data/tedana_outputs.txt`` and its +counterpart, R2-D2-- uh, we mean, ``tedana_outputs_verbose.txt``. +With that edit complete, we can run the full ``pytest`` suite via + +.. code-block:: bash + + pytest $TEDANADIR/tedana/tests + +Once that filename is added, all of the tests should be passing and we should open a PR to have our +change reviewed. + +From here, others working on the project may request changes and we'll have to make sure that our +tests are kept up to date with any changes made as we did before updating the unit test. +For example, if a new parameter is added, ``greeting``, with a default of ``hello``, we'll need to +adjust the unit test. +However, since this doesn't change the typical workflow of ``tedana``, there's no need to change +the integration test; we're still matching the original filename. +Once we are happy with the changes and some members of ``tedana`` have approved the changes, our +changes will be merged! + +We should then do the following cleanup with our git repository: + +.. code-block:: bash + + git checkout master + git fetch upstream master + git merge upstream/master + git branch -d feature/say_hello + git push --delete origin feature/say_hello + +and we're good to go! + + +.. _`tedana OSF project`: https://osf.io/bpe8h/ +.. _git: https://git-scm.com/ +.. _`git pro`: https://git-scm.com/book/en/v2 +.. _repository: https://github.com/ME-ICA/tedana +.. _Fork: https://help.github.com/en/github/getting-started-with-github/fork-a-repo +.. _`pull request`: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request +.. _GitKraken: https://www.gitkraken.com/ +.. _`GitHub Desktop`: https://desktop.github.com/ +.. _SourceTree: https://www.sourcetreeapp.com/ +.. _`GitHub UI`: https://help.github.com/en/github/managing-files-in-a-repository/editing-files-in-your-repository +.. _this: https://github.com/ME-ICA/tedana/tree/master/docs +.. _ReStructuredText: http://docutils.sourceforge.net/rst.html#user-documentation +.. _`five echo set`: https://github.com/ME-ICA/tedana/blob/37368f802f77b4327fc8d3f788296ca0f01074fd/tedana/tests/test_integration.py#L71-L95 +.. _here: https://circleci.com/docs/2.0/artifacts/#downloading-all-artifacts-for-a-build-on-circleci +.. _`getting one`: https://circleci.com/docs/2.0/managing-api-tokens/?gclid=CjwKCAiAqqTuBRBAEiwA7B66heDkdw6l68GAYAHtR2xS1xvDNNUzy7l1fmtwQWvVN0OIa97QL8yfhhoCejoQAvD_BwE#creating-a-personal-api-token +.. _`google calendar`: https://calendar.google.com/calendar/embed?src=pl6vb4t9fck3k6mdo2mok53iss%40group.calendar.google.com +.. _`contributing guide`: https://github.com/ME-ICA/tedana/blob/master/CONTRIBUTING.md diff --git a/docs/faq.rst b/docs/faq.rst index ff80b6154..7ace84e2f 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -36,11 +36,11 @@ Anyone interested in using v3.2 may compile and install an earlier release (<=0. What is the warning about ``duecredit``? ````````````````````````````````````````` -``duecredit`` is a python package that is used, but not required by ``tedana``. -These warnings do not affect any of the processing within the ``tedana``. -To avoide this warning, you can install ``duecredit`` with ``pip install duecredit``. -For more information about ``duecredit`` and concerns about -the citation and visibility of software or methods, visit the `duecredit`_ github. +``duecredit`` is a python package that is used, but not required by ``tedana``. +These warnings do not affect any of the processing within the ``tedana``. +To avoid this warning, you can install ``duecredit`` with ``pip install duecredit``. +For more information about ``duecredit`` and concerns about +the citation and visibility of software or methods, visit the `duecredit`_ GitHub repository. .. _duecredit: https://github.com/duecredit/duecredit @@ -56,14 +56,14 @@ Multi-echo fMRI Does multi-echo fMRI require more radio frequency pulses? ````````````````````````````````````````````````````````` While multi-echo does lead to collecting more images during each TR (one per echo), there is still only a single -radiofrequency pulse per TR. This means that there is no change in the `specific absorbtion rate`_ (SAR) limits -for the participant. +radiofrequency pulse per TR. This means that there is no change in the `specific absorption rate`_ (SAR) limits +for the participant. -.. _specific absorbtion rate: https://www.mr-tip.com/serv1.php?type=db1&dbs=Specific%20Absorption%20Rate +.. _specific absorption rate: https://www.mr-tip.com/serv1.php?type=db1&dbs=Specific%20Absorption%20Rate Can I combine multiband (simultaneous multislice) with multi-echo fMRI? ``````````````````````````````````````````````````````````````````````` -Yes, these techniques are complementary. -Mutliband fMRI leads to collecting multiple slices within a volume simultaneouly, while multi-echo -fMRI is instead related to collecting multiple unique volumes. -These techniques can be combined to reduce the TR in a multi-echo sequence. +Yes, these techniques are complementary. +Multiband fMRI leads to collecting multiple slices within a volume simultaneously, while multi-echo +fMRI is instead related to collecting multiple unique volumes. +These techniques can be combined to reduce the TR in a multi-echo sequence. diff --git a/docs/index.rst b/docs/index.rst index 7f02eb45e..0d52ea263 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -36,6 +36,14 @@ multi-echo functional magnetic resonance imaging (fMRI) data. :target: https://codecov.io/gh/me-ica/tedana :alt: Codecov +.. image:: http://isitmaintained.com/badge/resolution/ME-ICA/tedana.svg + :target: http://isitmaintained.com/project/ME-ICA/tedana + :alt: Average time to resolve an issue + +.. image:: http://isitmaintained.com/badge/open/ME-ICA/tedana.svg + :target: http://isitmaintained.com/project/ME-ICA/tedana + :alt: Percentage of issues still open + .. image:: https://badges.gitter.im/ME-ICA/tedana.svg :target: https://gitter.im/ME-ICA/tedana :alt: Join the chat @@ -141,20 +149,27 @@ tedana is licensed under GNU Lesser General Public License version 2.1. .. toctree:: :maxdepth: 2 :caption: Contents: - + installation multi-echo - considerations - publications + acquisition + resources usage approach outputs faq support contributing + developing roadmap api +.. toctree:: + :hidden: + :name: hiddentoc + + dependence_metrics + Indices and tables ------------------ diff --git a/docs/multi-echo.rst b/docs/multi-echo.rst index 58725899f..169e0beaa 100644 --- a/docs/multi-echo.rst +++ b/docs/multi-echo.rst @@ -1,9 +1,9 @@ -What is multi-echo fMRI -======================= -Most echo-planar image (EPI) sequences collect a single brain image following -a radio frequency (RF) pulse, at a rate known as the repetition time (TR). -This typical approach is known as single-echo fMRI. -In contrast, multi-echo (ME) fMRI refers to collecting data at multiple echo times, +What is multi-echo fMRI? +======================== +Most echo-planar image (EPI) sequences collect a single brain image following +a radio frequency (RF) pulse, at a rate known as the repetition time (TR). +This typical approach is known as single-echo fMRI. +In contrast, multi-echo (ME) fMRI refers to collecting data at multiple echo times, resulting in multiple volumes with varying levels of contrast acquired per RF pulse. The physics of multi-echo fMRI @@ -18,53 +18,53 @@ Because the BOLD signal is known to decay at a set rate, collecting multiple echos allows us to assess non-BOLD. The image below shows the basic relationship between echo times and the image acquired at -3T (top, A) and 7T (bottom, B). Note that the earliest echo time is the brightest, as the -signal has only had a limited amount of time to decay. -In addition, the latter echo times show areas in which is the signal has decayed completely ('drop out') -due to inhomgeneity in the magnetic field. -By using the information across multiple echoes these images can be combined in -an optimal manner to take advantage of the signal +3T (top, A) and 7T (bottom, B). Note that the earliest echo time is the brightest, as the +signal has only had a limited amount of time to decay. +In addition, the latter echo times show areas in which is the signal has decayed completely ('drop out') +due to inhomogeneity in the magnetic field. +By using the information across multiple echoes these images can be combined in +an optimal manner to take advantage of the signal in the earlier echoes (see `processing pipeline details`_). .. image:: /_static/physics_kundu_2017_multiple_echoes.jpg - + Adapted from `Kundu et al. (2017)`_. -In order to classify the relationship between the signal and the echo time we can consider a -single voxel at two timepoints (x and y) and the measured signal measured at three different echo times - :math:`S(TE_n)`. +In order to classify the relationship between the signal and the echo time we can consider a +single voxel at two timepoints (x and y) and the measured signal measured at three different echo times - :math:`S(TE_n)`. .. image:: /_static/physics_kundu_2017_TE_dependence.jpg - + Adapted from `Kundu et al. (2017)`_. For the left column, we are observing a change that we term :math:`{\Delta}{S_0}` - that is a change -in the intercept or raw signal intensity. -A common example of this is participant movement, in which the voxel (which is at a static -location within the scanner) now contains different tissue or even an area outside of the brain. - -As we have collected three seperate echoes, we can compare the change in signal at each echo time, :math:`{\Delta}{S(TE_n)}`. -For :math:`{\Delta}{S_0}` we see that this produces a decaying curve. -If we compare this to the original signal, as in :math:`\frac{{\Delta}{S(TE_n)}}{S(TE_n)}` -we see that there is no echo time dependence, as the final plot is a flat line. - -In the right column, we consider changes that are related to brain activity. -For example, imagine that the two brain states here (x and y) are a baseline and task activated state respectively. -This effect is a change in in :math:`{\Delta}{R_2^*}` which is equivilent -to the inverse of :math:`{T_2^*}`. -We typically observe this change in signal amplitude occuring over volumes with -the hemodynamic response, while here we are examining the change in signal over echo times. -Again we can plot the difference in the signal between these two states as a function of echo time, -finding that the signal rises and falls. -If we compare this curve to the original signal we find +in the intercept or raw signal intensity. +A common example of this is participant movement, in which the voxel (which is at a static +location within the scanner) now contains different tissue or even an area outside of the brain. + +As we have collected three separate echoes, we can compare the change in signal at each echo time, :math:`{\Delta}{S(TE_n)}`. +For :math:`{\Delta}{S_0}` we see that this produces a decaying curve. +If we compare this to the original signal, as in :math:`\frac{{\Delta}{S(TE_n)}}{S(TE_n)}` +we see that there is no echo time dependence, as the final plot is a flat line. + +In the right column, we consider changes that are related to brain activity. +For example, imagine that the two brain states here (x and y) are a baseline and task activated state respectively. +This effect is a change in in :math:`{\Delta}{R_2^*}` which is equivalent +to the inverse of :math:`{T_2^*}`. +We typically observe this change in signal amplitude occurring over volumes with +the hemodynamic response, while here we are examining the change in signal over echo times. +Again we can plot the difference in the signal between these two states as a function of echo time, +finding that the signal rises and falls. +If we compare this curve to the original signal we find that the magnitude of the changes is dependent on the echo time. For a more comprehensive review of these topics and others, see `Kundu et al. (2017)`_. - .. _TEs: http://mriquestions.com/tr-and-te.html .. _BOLD signal: http://www.fil.ion.ucl.ac.uk/spm/course/slides10-zurich/Kerstin_BOLD.pdf .. _Kundu et al. (2017): https://www.sciencedirect.com/science/article/pii/S1053811917302410?via%3Dihub + Why use multi-echo? ------------------- There are many potential reasons an investigator would be interested in using multi-echo EPI (ME-EPI). @@ -72,8 +72,8 @@ Among these are the different levels of analysis ME-EPI enables. Specifically, by collecting multi-echo data, researchers are able to: **Compare results across different echoes**: currently, field standards are largely set using single-echo EPI. -Because multi-echo is composed of multiple single-echo time series, each of these can be analyzed separately -and compared to one another. +Because multi-echo is composed of multiple single-echo time series, each of these can be analyzed separately +and compared to one another. **Combine the results by weighted averaging**: Rather than analyzing single-echo time series separately, we can combine them into an "optimally combined time series". @@ -81,13 +81,13 @@ For more information on this combination, see `processing pipeline details`_. Optimally combined data exhibits higher SNR and improves statistical power of analyses in regions traditionally affected by drop-out. -**Denoise the data based on information contained in the echoes**: Collecting multi-echo data allows -access to unique denoising methods. +**Denoise the data based on information contained in the echoes**: Collecting multi-echo data allows +access to unique denoising methods. ICA-based denoising methods like ICA-AROMA (`Pruim et al. (2015)`_) -have been shown to significantly improve the quality of cleaned signal. +have been shown to significantly improve the quality of cleaned signal. These methods, however, have comparably limited information, as they are designed to work with single-echo EPI. -``tedana`` is an ICA-based denoising pipeline built especially for +``tedana`` is an ICA-based denoising pipeline built especially for multi-echo data. Collecting multi-echo EPI allows us to leverage all of the information available for single-echo datasets, as well as additional information only available when looking at signal decay across multiple TEs. We can use this information to denoise the optimally combined time series. @@ -95,3 +95,80 @@ We can use this information to denoise the optimally combined time series. .. _processing pipeline details: https://tedana.readthedocs.io/en/latest/approach.html#optimal-combination .. _Pruim et al. (2015): https://www.sciencedirect.com/science/article/pii/S1053811915001822 + +Considerations for ME-fMRI +-------------------------- +Multi-echo fMRI acquisition sequences and analysis methods are rapidly maturing. +Someone who has access to a multi-echo fMRI sequence should seriously consider using it. + +Costs and benefits of multi-echo fMRI +------------------------------------- +The following are a few points to consider when deciding whether or not to collect multi-echo data. + +Possible increase in TR +``````````````````````` +The one difference with multi-echo is a slight time cost. +For multi-echo fMRI, the shortest echo time (TE) is essentially free since it is collected in the +gap between the RF pulse and the single-echo acquisition. +The second echo tends to roughly match the single-echo TE. +Additional echoes require more time. +For example, on a 3T MRI, if the T2* weighted TE is 30ms for single echo fMRI, +a multi-echo sequence may have TEs of 15.4, 29.7, and 44.0ms. +In this example, the extra 14ms of acquisition time per RF pulse is the cost of multi-echo fMRI. + +One way to think about this cost is in comparison to single-echo fMRI. +If a multi-echo sequence has identical spatial resolution and acceleration as a single-echo sequence, +then a rough rule of thumb is that the multi-echo sequence will have 10% fewer slices or 10% longer TR. +Instead of compromising on slice coverage or TR, one can increase acceleration. +If one increases acceleration, it is worth doing an empirical comparison to make sure there +isn't a non-trivial loss in SNR or an increase of artifacts. + +Weighted averaging may lead to an increase in SNR +````````````````````````````````````````````````` +Multiple studies have shown that a +weighted average of the echoes to optimize T2* weighting, sometimes called "optimally combined," +gives a reliable, modest boost in data quality. +The optimal combination of echoes can currently be calculated in several software packages including AFNI, +fMRIPrep, and tedana. In tedana, the weighted +average can be calculated with `t2smap`_ If no other +acquisition compromises are necessary to acquire multi-echo data, this boost is worthwhile. + +Consider the life of the dataset +```````````````````````````````` +If other compromises are necessary, consider the life of the data set. +If data is being acquired for a discrete +study that will be acquired, analyzed, and published in a year or two, it might not be worth making +compromises to acquire multi-echo data. +If a data set is expected to be used for future analyses in later +years, it is likely that more powerful approaches to multi-echo denoising will sufficiently mature and add +even more value to a data set. + +Other multi-echo denoising methods, such as MEICA, the predecessor to tedana, have shown the potential for +much greater data quality improvements, as well as the ability to more accurately separate visually similar +signal vs noise, such as scanner based drifts vs slow changes in BOLD signal. +More powerful methods are +still being improved, and associated algorithms are still being actively developed. +Users need to have the time and knowledge to look +at the denoising output from every run to make sure denoising worked as intended. + +You may recover signal in areas affected by dropout +``````````````````````````````````````````````````` +Typical single echo fMRI uses an echo time that is appropriate for signal across most of the brain. +While this is effective, +it also leads to drop out in regions with low :math:T_2^* values. +This can lead to low or even no signal at all in some areas. +If your research question could benefit from having either +improved signal characteristics in regions such as the orbitofrontal cortex, ventral temporal cortex or +the ventral striatum them multi-echo fMRI may be beneficial. + +Consider the cost of added quality control +`````````````````````````````````````````` +The developers of ``tedana`` strongly support always examining data for quality concerns, whether +or not multi-echo fMRI is used. +Multi-echo data and denoising are no exception. +For this purpose, ``tedana`` currently produces basic diagnostic images by default, which can be +inspected in order to determine the quality of denoising. +`See outputs`_ for more information on these outputs. + +.. _t2smap: https://tedana.readthedocs.io/en/latest/usage.html#run-t2smap +.. _see outputs: https://tedana.readthedocs.io/en/latest/outputs.html diff --git a/docs/outputs.rst b/docs/outputs.rst index bddc75aa2..1a819ff25 100644 --- a/docs/outputs.rst +++ b/docs/outputs.rst @@ -1,5 +1,5 @@ Outputs of tedana -=========================== +================= tedana derivatives ------------------ @@ -8,6 +8,7 @@ tedana derivatives Filename Content ====================== ===================================================== t2sv.nii.gz Limited estimated T2* 3D map. + Values are in seconds. The difference between the limited and full maps is that, for voxels affected by dropout where only one echo contains good data, the full map @@ -61,9 +62,6 @@ ica_components.nii.gz Component weight maps from ICA decomposition. betas_OC.nii.gz Full ICA coefficient feature set. betas_hik_OC.nii.gz High-kappa ICA coefficient feature set feats_OC2.nii.gz Z-normalized spatial component maps -comp_table_ica.txt TEDICA component table. A tab-delimited file with - summary metrics and inclusion/exclusion information - for each component from the ICA decomposition. report.txt A summary report for the workflow with relevant citations. ====================== ===================================================== @@ -73,12 +71,13 @@ If ``verbose`` is set to True: ====================== ===================================================== Filename Content ====================== ===================================================== -t2svG.nii.gz Full T2* map/time series. The difference between - the limited and full maps is that, for voxels - affected by dropout where only one echo contains - good data, the full map uses the single echo's - value while the limited map has a NaN. Only used - for optimal combination. +t2svG.nii.gz Full T2* map/time series. + Values are in seconds. + The difference between the limited and full maps is + that, for voxels affected by dropout where only one + echo contains good data, the full map uses the + single echo's value while the limited map has a NaN. + Only used for optimal combination. s0vG.nii.gz Full S0 map/time series. Only used for optimal combination. hik_ts_e[echo].nii.gz High-Kappa time series for echo number ``echo`` diff --git a/docs/publications.rst b/docs/publications.rst deleted file mode 100644 index ece4b3c84..000000000 --- a/docs/publications.rst +++ /dev/null @@ -1,56 +0,0 @@ -.. _spreadsheet of publications: - -ME-fMRI Parameters & Publications -================================= - -The following page highlights a selection of parameters collected from published papers that have -used multi-echo fMRI. -The subsequent spreadsheet is an on-going effort to track all of these publication. -This is a volunteer-led effort so, if you know of a excluded publication, whether or not it is yours, -please add it. - -The following plots reflect the average values for studies conducted at 3 Tesla. - -.. plot:: - - import matplotlib.pyplot as plt - import pandas as pd - import numpy as np - # TODO deal with the issue that the plot doesn't regenterate (ie isn't alive) - # Unless the code is updated. - metable = pd.read_csv('https://docs.google.com/spreadsheets/d/1WERojJyxFoqcg_tndUm5Kj0H1UfUc9Ban0jFGGfPaBk/export?gid=0&format=csv', - header=0) - TEs = [metable.TE1.mean(), metable.TE2.mean(), metable.TE3.mean(), metable.TE4.mean(), metable.TE5.mean()] - TE_labels = ['TE1', 'TE2', 'TE3', 'TE4', 'TE5'] - plt.bar([1, 2, 3, 4, 5], TEs) - plt.title('Echo Times', fontsize=18) - pub_count = metable.TE1.count() - plt.text(0.5,60, 'Average from {} studies'.format(pub_count)) - plt.xlabel('Echo Number') - plt.ylabel('Echo Time (ms)') - plt.show() - - - plt.hist(metable.TR.to_numpy()) - plt.title('Repetition Times', fontsize = 18) - plt.xlabel('Repetition Time (s)') - plt.ylabel('Count') - plt.show() - - - x_vox = metable.x.to_numpy() - y_vox = metable.y.to_numpy() - z_vox = metable.z.to_numpy() - plt.hist(np.nanmean([x_vox, y_vox, z_vox],0)) - plt.title('Voxel Dimensions', fontsize = 18) - plt.xlabel('Average Voxel dimension (mm)') - plt.ylabel('Count') - plt.show() - -You can view and suggest additions to this spreadsheet `here`_ - -.. raw:: html - - - -.. _here: https://docs.google.com/spreadsheets/d/1WERojJyxFoqcg_tndUm5Kj0H1UfUc9Ban0jFGGfPaBk/edit#gid=0 \ No newline at end of file diff --git a/docs/resources.rst b/docs/resources.rst new file mode 100644 index 000000000..3879374c4 --- /dev/null +++ b/docs/resources.rst @@ -0,0 +1,118 @@ +Resources +========= + +Journal articles describing multi-echo methods +---------------------------------------------- +* | :ref:`spreadsheet of publications` catalogues papers using multi-echo fMRI, + | with information about acquisition parameters. +* | `Multi-echo acquisition`_ + | Posse, NeuroImage 2012 + | Includes an historical overview of multi-echo acquisition and research +* | `Multi-Echo fMRI A Review of Applications in fMRI Denoising and Analysis of BOLD Signals`_ + | Kundu et al, NeuroImage 2017 + | A review of multi-echo denoising with a focus on the MEICA algorithm +* | `Enhanced identification of BOLD-like components with MESMS and MEICA`_ + | Olafsson et al, NeuroImage 2015 + | The appendix includes a good explanation of the math underlying MEICA denoising +* | `Comparing resting state fMRI de-noising approaches using multi- and single-echo acquisitions`_ + | Dipasquale et al, PLoS One 2017 + | The appendix includes some recommendations for multi-echo acquisition + +.. _Multi-echo acquisition: https://www.ncbi.nlm.nih.gov/pubmed/22056458 +.. _Multi-Echo fMRI A Review of Applications in fMRI Denoising and Analysis of BOLD Signals: https://www.ncbi.nlm.nih.gov/pubmed/28363836 +.. _Enhanced identification of BOLD-like components with MESMS and MEICA: https://www.ncbi.nlm.nih.gov/pubmed/25743045 +.. _Comparing resting state fMRI de-noising approaches using multi- and single-echo acquisitions: https://www.ncbi.nlm.nih.gov/pubmed/28323821 + +Videos +------ +* An `educational session from OHBM 2017`_ by Dr. Prantik Kundu about multi-echo denoising +* A `series of lectures from the OHBM 2017 multi-echo session`_ on multiple facets of multi-echo data analysis +* | Multi-echo fMRI lecture from the `2018 NIH FMRI Summer Course`_ by Javier Gonzalez-Castillo + | `Slides from 2018 NIH FMRI Summer Course`_ + +.. _educational session from OHBM 2017: https://www.pathlms.com/ohbm/courses/5158/sections/7788/video_presentations/75977 +.. _series of lectures from the OHBM 2017 multi-echo session: https://www.pathlms.com/ohbm/courses/5158/sections/7822 +.. _2018 NIH FMRI Summer Course: https://fmrif.nimh.nih.gov/course/fmrif_course/2018/14_Javier_20180713 +.. _Slides from 2018 NIH FMRI Summer Course: https://fmrif.nimh.nih.gov/COURSE/fmrif_course/2018/content/14_Javier_20180713.pdf + +Multi-echo preprocessing software +--------------------------------- +tedana requires data that has already been preprocessed for head motion, alignment, etc. + +AFNI can process multi-echo data natively as well as apply tedana denoising through the use of +**afni_proc.py**. To see various implementations, start with Example 12 in the `afni_proc.py help`_ + +.. _afni_proc.py help: https://afni.nimh.nih.gov/pub/dist/doc/program_help/afni_proc.py.html + +`fmriprep` can also process multi-echo data, but is currently limited to using the optimally combined +timeseries. +For more details, see the `fmriprep workflows page`_. + +.. _fmriprep workflows page: https://fmriprep.readthedocs.io/en/stable/workflows.html + +Currently SPM and FSL do not natively support multi-echo fmri data processing. + +Other software that uses multi-echo fMRI +---------------------------------------- +``tedana`` represents only one approach to processing multi-echo data. +Currently there are a number of methods that can take advantage of or use the +information contained in multi-echo data. +These include: + +* | `3dMEPFM`_: A multi-echo implementation of 'paradigm free mapping', that is + | detection of neural events in the absence of a prespecified model. By + | leveraging the information present in multi-echo data, changes in relaxation + | time can be directly estimated and more events can be detected. + | For more information, see the `following paper`_. +* | `Bayesian approach to denoising`_: An alternative approach to separating out + | BOLD and non-BOLD signals within a Bayesian framework is currently under + | development. +* | `Multi-echo Group ICA`_: Current approaches to ICA just use a single run of + | data in order to perform denoising. An alternative approach is to use + | information from multiple subjects or multiple runs from a single subject + | in order to improve the classification of BOLD and non-BOLD components. +* | `Dual Echo Denoising`_: If the first echo can be collected early enough, + | there are currently methods that take advantage of the very limited BOLD + | weighting at these early echo times. +* | `qMRLab`_: This is a MATLAB software package for quantitative magnetic + | resonance imaging. While it does not support ME-fMRI, it does include methods + | for estimating T2*/S0 from high-resolution, complex-valued multi-echo GRE + | data with correction for background field gradients. + +.. _3dMEPFM: https://afni.nimh.nih.gov/pub/dist/doc/program_help/3dMEPFM.html +.. _following paper: https://www.sciencedirect.com/science/article/pii/S105381191930669X +.. _Bayesian approach to denoising: https://ww5.aievolution.com/hbm1901/index.cfm?do=abs.viewAbs&abs=5026 +.. _Multi-echo Group ICA: https://ww5.aievolution.com/hbm1901/index.cfm?do=abs.viewAbs&abs=1286 +.. _Dual Echo Denoising: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518782/ +.. _qMRLab: https://github.com/qMRLab/qMRLab + +Datasets +-------- +A number of multi-echo datasets have been made public so far. +This list is not necessarily up to date, so please check out OpenNeuro to potentially find more. + +* `Multi-echo fMRI replication sample of autobiographical memory, prospection and theory of mind reasoning tasks`_ +* `Multi-echo Cambridge`_ +* `Multiband multi-echo imaging of simultaneous oxygenation and flow timeseries for resting state connectivity`_ +* `Valence processing differs across stimulus modalities`_ +* `Cambridge Centre for Ageing Neuroscience (Cam-CAN)`_ + +.. _Multi-echo fMRI replication sample of autobiographical memory, prospection and theory of mind reasoning tasks: https://openneuro.org/datasets/ds000210/ +.. _Multi-echo Cambridge: https://openneuro.org/datasets/ds000258 +.. _Multiband multi-echo imaging of simultaneous oxygenation and flow timeseries for resting state connectivity: https://openneuro.org/datasets/ds000254 +.. _Valence processing differs across stimulus modalities: https://openneuro.org/datasets/ds001491 +.. _Cambridge Centre for Ageing Neuroscience (Cam-CAN): https://camcan-archive.mrc-cbu.cam.ac.uk/dataaccess/ + +.. _spreadsheet of publications: + +Publications using multi-echo fMRI +---------------------------------- +You can view and suggest additions to this spreadsheet `here`_ +This is a volunteer-led effort so, if you know of a excluded publication, whether or not it is yours, +please add it. + +.. raw:: html + + + +.. _here: https://docs.google.com/spreadsheets/d/1WERojJyxFoqcg_tndUm5Kj0H1UfUc9Ban0jFGGfPaBk/edit#gid=0 diff --git a/docs/usage.rst b/docs/usage.rst index c82541300..8ce67dfc3 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,4 +1,4 @@ -tedana Usage +Using tedana ============ ``tedana`` minimally requires: @@ -20,8 +20,8 @@ for recommendations on doing so, see our general guidelines for .. _fMRIPrep: https://fmriprep.readthedocs.io .. _afni_proc.py: https://afni.nimh.nih.gov/pub/dist/doc/program_help/afni_proc.py.html -Run tedana ----------- +Running tedana +-------------- This is the full tedana workflow, which runs multi-echo ICA and outputs multi-echo denoised data along with many other derivatives. To see which files are generated by this workflow, check out the outputs page: @@ -34,15 +34,15 @@ https://tedana.readthedocs.io/en/latest/outputs.html .. note:: The ``--mask`` argument is not intended for use with very conservative region-of-interest - analyses. + analyses. One of the ways by which components are assessed as BOLD or non-BOLD is their spatial pattern, so overly conservative masks will invalidate several steps in the tedana - workflow. + workflow. To examine regions-of-interest with multi-echo data, apply masks after TE Dependent ANAlysis. -Run t2smap ----------- +Running t2smap +-------------- This workflow uses multi-echo data to optimally combine data across echoes and to estimate T2* and S0 maps or time series. To see which files are generated by this workflow, check out the workflow @@ -93,9 +93,9 @@ Instead, we recommend that researchers apply the same transforms to all echoes i That is, that they calculate head motion correction parameters from one echo and apply the resulting transformation to all echoes. -.. note:: +.. note:: Any intensity normalization or nuisance regressors should be applied to the data - *after* ``tedana`` calculates the BOLD and non-BOLD weighting of components. + *after* ``tedana`` calculates the BOLD and non-BOLD weighting of components. If this is not considered, resulting intensity gradients (e.g., in the case of scaling) or alignment parameters (e.g., in the case of motion correction, normalization) are likely to differ across echos, diff --git a/requirements.txt b/requirements.txt index 774e8fd8e..56135e864 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,10 @@ duecredit matplotlib -nibabel>=2.1.0 -nilearn -numpy>=1.14 +nibabel>=2.5.1 +nilearn>=0.5.2 +numpy>=1.15 pandas -scikit-learn -scipy +scikit-learn>=0.22 +scipy>=1.3.3 +threadpoolctl +bokeh diff --git a/setup.cfg b/setup.cfg index e618c6e50..d00a567c0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,3 +12,6 @@ exclude=*build/ ignore = E126,E402,W504 per-file-ignores = */__init__.py:F401 + +[tool:pytest] +log_cli = true diff --git a/setup.py b/setup.py index d5a4b82ef..fbcb8e963 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,8 @@ def main(): pkg_data = { 'tedana': [ 'tests/data/*', + 'reporting/data/*', + 'reporting/data/html/*', ] } diff --git a/tedana/combine.py b/tedana/combine.py index 89e995f62..db758d246 100644 --- a/tedana/combine.py +++ b/tedana/combine.py @@ -91,15 +91,19 @@ def _combine_paid(data, tes): "International Society for Magnetic Resonance in Medicine, " "55(6), 1227-1235.") n_vols = data.shape[-1] - alpha = data.mean(axis=-1) * tes + snr = data.mean(axis=-1) / data.std(axis=-1) + alpha = snr * tes alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_vols)) combined = np.average(data, axis=1, weights=alpha) return combined -def make_optcom(data, tes, mask, t2s=None, combmode='t2s', verbose=True): +def make_optcom(data, tes, adaptive_mask, t2s=None, combmode='t2s', verbose=True): """ - Optimally combine BOLD data across TEs. + Optimally combine BOLD data across TEs, using only those echos with reliable signal + across at least three echos. If the number of echos providing reliable signal is greater + than three but less than the total number of collected echos, we assume that later + echos do not provided meaningful signal. Parameters ---------- @@ -107,8 +111,8 @@ def make_optcom(data, tes, mask, t2s=None, combmode='t2s', verbose=True): Concatenated BOLD data. tes : (E,) :obj:`numpy.ndarray` Array of TEs, in seconds. - mask : (S,) :obj:`numpy.ndarray` - Brain mask in 3D array. + adaptive_mask : (S,) :obj:`numpy.ndarray` + Adaptive mask of the data indicating the number of echos with signal at each voxel t2s : (S [x T]) :obj:`numpy.ndarray` or None, optional Estimated T2* values. Only required if combmode = 't2s'. Default is None. @@ -142,12 +146,12 @@ def make_optcom(data, tes, mask, t2s=None, combmode='t2s', verbose=True): 'dimension of input data: {0} != ' '{1}'.format(len(tes), data.shape[1])) - if mask.ndim != 1: + if adaptive_mask.ndim != 1: raise ValueError('Mask is not 1D') - elif mask.shape[0] != data.shape[0]: + elif adaptive_mask.shape[0] != data.shape[0]: raise ValueError('Mask and data do not have same number of ' - 'voxels/samples: {0} != {1}'.format(mask.shape[0], - data.shape[0])) + 'voxels/samples: {0} != {1}'.format( + adaptive_mask.shape[0], data.shape[0])) if combmode not in ['t2s', 'paid']: raise ValueError("Argument 'combmode' must be either 't2s' or 'paid'") @@ -158,23 +162,32 @@ def make_optcom(data, tes, mask, t2s=None, combmode='t2s', verbose=True): LGR.warning("Argument 't2s' is not required if 'combmode' is 'paid'. " "'t2s' array will not be used.") - data = data[mask, :, :] # mask out empty voxels/samples - tes = np.array(tes)[np.newaxis, ...] # (1 x E) array_like - if combmode == 'paid': - LGR.info('Optimally combining data with parallel-acquired inhomogeneity ' - 'desensitized (PAID) method') - combined = _combine_paid(data, tes) + LGR.info('Optimally combining data with parallel-acquired ' + 'inhomogeneity desensitized (PAID) method') else: if t2s.ndim == 1: - msg = 'Optimally combining data with voxel-wise T2 estimates' + msg = 'Optimally combining data with voxel-wise T2* estimates' else: - msg = ('Optimally combining data with voxel- and volume-wise T2 ' + msg = ('Optimally combining data with voxel- and volume-wise T2* ' 'estimates') - t2s = t2s[mask, ..., np.newaxis] # mask out empty voxels/samples - LGR.info(msg) - combined = _combine_t2s(data, tes, t2s) + + mask = adaptive_mask >= 3 + data = data[mask, :, :] # mask out unstable voxels/samples + tes = np.array(tes)[np.newaxis, ...] # (1 x E) array_like + combined = np.zeros((data.shape[0], data.shape[2])) + for echo in np.unique(adaptive_mask[mask]): + echo_idx = adaptive_mask[mask] == echo + + if combmode == 'paid': + combined[echo_idx, :] = _combine_paid(data[echo_idx, :echo, :], + tes[:echo]) + else: + t2s_ = t2s[mask, ..., np.newaxis] # mask out empty voxels/samples + + combined[echo_idx, :] = _combine_t2s( + data[echo_idx, :echo, :], tes[:, :echo], t2s_[echo_idx, ...]) combined = unmask(combined, mask) return combined diff --git a/tedana/decay.py b/tedana/decay.py index 8304d30e6..a01d1e8ef 100644 --- a/tedana/decay.py +++ b/tedana/decay.py @@ -2,8 +2,10 @@ Functions to estimate S0 and T2* from multi-echo data. """ import logging -import scipy import numpy as np +import scipy +from scipy import stats + from tedana import utils LGR = logging.getLogger(__name__) @@ -268,6 +270,13 @@ def fit_decay(data, tes, mask, adaptive_mask, fittype): t2s_full = utils.unmask(t2s_full, mask) s0_full = utils.unmask(s0_full, mask) + # set a hard cap for the T2* map + # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile + cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5, + interpolation_method='lower') + LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) + t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s + return t2s_limited, s0_limited, t2s_full, s0_full diff --git a/tedana/decomposition/__init__.py b/tedana/decomposition/__init__.py index 25b79bdde..e984088b6 100644 --- a/tedana/decomposition/__init__.py +++ b/tedana/decomposition/__init__.py @@ -3,6 +3,6 @@ from .pca import tedpca from .ica import tedica +from .ma_pca import ma_pca, ent_rate_sp - -__all__ = ['tedpca', 'tedica'] +__all__ = ['tedpca', 'tedica', 'ma_pca', 'ent_rate_sp'] diff --git a/tedana/decomposition/ma_pca.py b/tedana/decomposition/ma_pca.py new file mode 100644 index 000000000..23f84359f --- /dev/null +++ b/tedana/decomposition/ma_pca.py @@ -0,0 +1,614 @@ +""" +PCA based on Moving Average (stationary Gaussian) process +""" +import logging + +import numpy as np + +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler + +from scipy.linalg import svd +from scipy.signal import detrend, fftconvolve +from scipy.fftpack import fftshift, fftn + +LGR = logging.getLogger(__name__) + + +def _autocorr(data): + """ + Calculates the auto correlation of a given array. + + Parameters + ---------- + data : array-like + The array to calculate the autocorrelation of + + Returns + ------- + u : ndarray + The array of autocorrelations + """ + u = np.correlate(data, data, mode='full') + # Take upper half of correlation matrix + return u[u.size // 2:] + + +def _check_order(order_in): + """ + Checks the order passed to the window functions. + + Parameters + ---------- + order_in : int + The order to be passed to the window function + + Returns + ------- + n_out : ndarray + An integer order array + w : list + The window to be used + trivialwin : boolean + Whether the window is trivial (w in [0,1]) + """ + + w = [] + trivialwin = False + + # Special case of negative orders: + if order_in < 0: + raise ValueError('Order cannot be less than zero.') + + order_out = np.round(order_in) + if not np.array_equal(order_in, order_out): + LGR.warning('Rounded order to nearest integer') + + # Special cases: + if not order_out or order_out == 0: + w = np.zeros((0, 1)) # Empty matrix: 0-by-1 + trivialwin = True + elif order_out == 1: + w = 1 + trivialwin = True + + return order_out, w, trivialwin + + +def _parzen_win(n_points): + """ + Returns the N-point Parzen (de la Valle-Poussin) window in a column vector. + + Parameters + ---------- + n_points : int + Number of non-zero points the window must contain + + Returns + ------- + parzen_w : 1D array + The Parzen window + + Notes + ----- + Maths are described in the following MATLAB documentation page: + https://www.mathworks.com/help/signal/ref/parzenwin.html + + References + ---------- + Harris, Fredric J. โ€œOn the Use of Windows for Harmonic Analysis + with the Discrete Fourier Transform.โ€ Proceedings of the IEEE. + Vol. 66, January 1978, pp. 51โ€“83. + """ + + # Check for valid window length (i.e., n < 0) + n_points, parzen_w, trivialwin = _check_order(n_points) + if trivialwin: + return parzen_w + + # Index vectors + k = np.arange(-(n_points - 1) / 2, ((n_points - 1) / 2) + 1) + k1 = k[k < -(n_points - 1) / 4] + k2 = k[abs(k) <= (n_points - 1) / 4] + + # Equation 37 of [1]: window defined in three sections + parzen_w1 = 2 * (1 - abs(k1) / (n_points / 2))**3 + parzen_w2 = 1 - 6 * (abs(k2) / (n_points / 2))**2 + 6 * (abs(k2) / (n_points / 2))**3 + parzen_w = np.hstack((parzen_w1, parzen_w2, parzen_w1[::-1])).T + + return parzen_w + + +def ent_rate_sp(data, sm_window): + """ + Calculate the entropy rate of a stationary Gaussian random process using + spectrum estimation with smoothing window. + + Parameters + ---------- + data : ndarray + Data to calculate the entropy rate of and smooth + sm_window : boolean + Whether there is a Parzen window to use + + Returns + ------- + ent_rate : float + The entropy rate + + Notes + ----- + This function attempts to calculate the entropy rate following + + References + ---------- + Li, Y.O., Adalฤฑ, T. and Calhoun, V.D., (2007). + Estimating the number of independent components for + functional magnetic resonance imaging data. + Human brain mapping, 28(11), pp.1251-1266. + """ + + dims = data.shape + + if data.ndim == 3 and min(dims) != 1: + pass + else: + raise ValueError('Incorrect matrix dimensions.') + + # Normalize x_sb to be unit variance + data_std = np.std(np.reshape(data, (-1, 1))) + + # Make sure we do not divide by zero + if data_std == 0: + raise ValueError('Divide by zero encountered.') + data = data / data_std + + if sm_window: + M = [int(i) for i in np.ceil(np.array(dims) / 10)] + + # Get Parzen window for each spatial direction + parzen_w_3 = np.zeros((2 * dims[2] - 1, )) + parzen_w_3[(dims[2] - M[2] - 1):(dims[2] + M[2])] = _parzen_win(2 * M[2] + 1) + + parzen_w_2 = np.zeros((2 * dims[1] - 1, )) + parzen_w_2[(dims[1] - M[1] - 1):(dims[1] + M[1])] = _parzen_win(2 * M[1] + 1) + + parzen_w_1 = np.zeros((2 * dims[0] - 1, )) + parzen_w_1[(dims[0] - M[0] - 1):(dims[0] + M[0])] = _parzen_win(2 * M[0] + 1) + + # Apply windows to 3D + # TODO: replace correlate2d with 3d if possible + data_corr = np.zeros((2 * dims[0] - 1, 2 * dims[1] - 1, 2 * dims[2] - 1)) + for m3 in range(dims[2] - 1): + temp = np.zeros((2 * dims[0] - 1, 2 * dims[1] - 1)) + for k in range(dims[2] - m3): + temp += fftconvolve(data[:, :, k + m3], data[::-1, ::-1, k]) + # default option: + # computes raw correlations with NO normalization + # -- Matlab help on xcorr + data_corr[:, :, (dims[2] - 1) - m3] = temp + data_corr[:, :, (dims[2] - 1) + m3] = temp + + # Create bias-correcting vectors + v1 = np.hstack((np.arange(1, dims[0] + 1), + np.arange(dims[0] - 1, 0, -1)))[np.newaxis, :] + v2 = np.hstack((np.arange(1, dims[1] + 1), + np.arange(dims[1] - 1, 0, -1)))[np.newaxis, :] + v3 = np.arange(dims[2], 0, -1) + + vd = np.dot(v1.T, v2) + vcu = np.zeros((2 * dims[0] - 1, 2 * dims[1] - 1, 2 * dims[2] - 1)) + for m3 in range(dims[2]): + vcu[:, :, (dims[2] - 1) - m3] = vd * v3[m3] + vcu[:, :, (dims[2] - 1) + m3] = vd * v3[m3] + + data_corr /= vcu + + # Scale Parzen windows + parzen_window_2D = np.dot(parzen_w_1[np.newaxis, :].T, + parzen_w_2[np.newaxis, :]) + parzen_window_3D = np.zeros((2 * dims[0] - 1, 2 * dims[1] - 1, 2 * dims[2] - 1)) + for m3 in range(dims[2] - 1): + parzen_window_3D[:, :, (dims[2] - 1) - m3] = np.dot( + parzen_window_2D, parzen_w_3[dims[2] - 1 - m3]) + parzen_window_3D[:, :, (dims[2] - 1) + m3] = np.dot( + parzen_window_2D, parzen_w_3[dims[2] - 1 + m3]) + + # Apply 3D Parzen Window + data_corr *= parzen_window_3D + data_fft = abs(fftshift(fftn(data_corr))) + data_fft[data_fft < 1e-4] = 1e-4 + + # Estimation of the entropy rate + ent_rate = 0.5 * np.log(2 * np.pi * np.exp(1)) + np.sum(np.log(abs( + (data_fft)))[:]) / 2 / np.sum(abs(data_fft)[:]) + + return ent_rate + + +def _est_indp_sp(data): + """ + Estimate the effective number of independent samples based on the maximum + entropy rate principle of stationary random process. + + Parameters + ---------- + data : ndarray + The data to have the number of samples estimated + + Returns + ------- + n_iters : int + Number of iterations required to estimate entropy rate + ent_rate : float + The entropy rate of the data + + Notes + ----- + This function estimates the effective number of independent samples by omitting + the least significant components with the subsampling scheme (Li et al., 2007) + """ + + dims = data.shape + n_iters_0 = None + + for j in range(np.min(dims) - 1): + data_sb = _subsampling(data, j + 1) + ent_rate = ent_rate_sp(data_sb, 1) + + # Upper-bound. + ent_ref = 1.41 + + # If entropy rate of a subsampled Gaussian sequence reaches the upper bound + # of the entropy rate, the subsampled sequence is an i.i.d. sequence. + if ent_rate > ent_ref: + n_iters_0 = j + break + + if n_iters_0 is None: + raise ValueError('Ill conditioned data, can not estimate ' + 'independent samples.') + n_iters = n_iters_0 + LGR.debug('Estimated the entropy rate of the Gaussian component ' + 'with subsampling depth {}'.format(j + 1)) + + return n_iters, ent_rate + + +def _subsampling(data, sub_depth): + """ + Subsampling the data evenly with space 'sub_depth'. + + Parameters + ---------- + data : ndarray + The data to be subsampled + sub_depth : int + The subsampling depth + + Returns + ------- + out : ndarray + Subsampled data + """ + + # First index from which to start subsampling for each dimension + idx_0 = [0, 0, 0] + ndims = data.shape + + if data.ndim == 3 and np.min(ndims) != 1: # 3D + out = data[np.arange( + idx_0[0], ndims[0], sub_depth), :, :][:, np.arange( + idx_0[1], ndims[1], sub_depth), :][:, :, np.arange(idx_0[2], ndims[2], sub_depth)] + else: + raise ValueError('Unrecognized matrix dimension! )' + 'Input array must be 3D with min dimension > 1.') + + return out + + +def _kurtn(data): + """ + Normalized kurtosis funtion so that for a Gaussian r.v. the kurtn(g) = 0. + + Parameters + ---------- + data : ndarray + The data to calculate the kurtosis of + + Returns + ------- + kurt : (1:N) array-like + The kurtosis of each vector in x along the second dimension. For + tedana, this will be the kurtosis of each PCA component. + """ + + kurt = np.zeros((data.shape[1], 1)) + + for i in range(data.shape[1]): + data_norm = detrend(data[:, i], type='constant') + data_norm /= np.std(data_norm) + kurt[i] = np.mean(data_norm**4) - 3 + + kurt[kurt < 0] = 0 + + return kurt + + +def _icatb_svd(data, n_comps=None): + """ + Run Singular Value Decomposition (SVD) on input data and extracts the + given number of components (n_comps). + + Parameters + ---------- + data : array + The data to compute SVD for + n_comps : int + Number of PCA components to be kept + + Returns + ------- + V : 2D array + Eigenvectors from SVD + Lambda : float + Eigenvalues + """ + + if not n_comps: + n_comps = np.min((data.shape[0], data.shape[1])) + + _, Lambda, vh = svd(data, full_matrices=False) + + # Sort eigen vectors in Ascending order + V = vh.T + Lambda = Lambda / np.sqrt(data.shape[0] - 1) # Whitening (sklearn) + inds = np.argsort(np.power(Lambda, 2)) + Lambda = np.power(Lambda, 2)[inds] + V = V[:, inds] + sumAll = np.sum(Lambda) + + # Return only the extracted components + V = V[:, (V.shape[1] - n_comps):] + Lambda = Lambda[Lambda.shape[0] - n_comps:] + sumUsed = np.sum(Lambda) + retained = (sumUsed / sumAll) * 100 + LGR.debug('{ret}% of non-zero components retained'.format(ret=retained)) + + return V, Lambda + + +def _eigensp_adj(lam, n, p): + """ + Eigen spectrum adjustment for EVD on finite samples. + + Parameters + ---------- + lam : [Px1] array-like + Component eigenvalues + n : int + Effective number of i.i.d. samples. + p : int + Number of eigen values. + + Returns + ------- + lam_adj : (p,) array-like + adjusted eigen values. + + Notes + ----- + Adjusts the eigen spectrum to account for the finite samples + after subsampling (Li et al., 2007) + + References + ---------- + Li, Y.O., Adalฤฑ, T. and Calhoun, V.D., (2007). + Estimating the number of independent components for + functional magnetic resonance imaging data. + Human brain mapping, 28(11), pp.1251-1266. + """ + + r = p / n + bp = np.power((1 + np.sqrt(r)), 2) + bm = np.power((1 - np.sqrt(r)), 2) + vv_step = (bp - bm) / (5 * p - 1) + vv = np.arange(bm, bp + vv_step, vv_step) + gv = (1 / (2 * np.pi * r * vv)) * np.sqrt(abs((vv - bm) * (bp - vv))) + gvd = np.zeros(gv.shape) + for i in range(gv.shape[0]): + gvd[i] = sum(gv[0:i]) + + gvd /= np.max(gvd) + + lam_emp = np.zeros(lam.shape) + for idx, i in enumerate(np.arange(1, p + 1)): + i_norm = (i) / p + minx = np.argmin(abs(i_norm - gvd)) + lam_emp[idx] = vv[minx] + + lam_emp = np.flip(lam_emp) + + lam_adj = lam / lam_emp + + return lam_adj + + +def ma_pca(data_nib, mask_nib, criteria='mdl'): + """ + Run Singular Value Decomposition (SVD) on input data, + automatically select components based on a Moving Average + (stationary Gaussian) process. Finally perform PCA with + selected number of components. + + Parameters + ---------- + data_nib : 4D nibabel + Unmasked data to compute the PCA on. + mask_nib : 4D nibabel + Mask to apply on data_nib. + criteria : string in ['aic', 'kic', mdl'] + Criteria to select the number of components; + default='mdl'. + + Returns + ------- + u : (S [*E] x C) array-like + Component weight map for each component. + s : (C,) array-like + Variance explained for each component. + varex_norm : (n_components,) array-like + Explained variance ratio. + v : (T x C) array-like + Component timeseries. + + Notes + ----- + aic : Akaike Information Criterion. Least aggressive option. + kic : Kullback-Leibler Information Criterion. Stands in the + middle in terms of aggressiveness. + mdl : Minimum Description Length. Most aggressive + (and recommended) option. + """ + + data_nib = data_nib.get_data() + mask_nib = mask_nib.get_data() + [Nx, Ny, Nz, Nt] = data_nib.shape + data_nib_V = np.reshape(data_nib, (Nx * Ny * Nz, Nt), order='F') + maskvec = np.reshape(mask_nib, Nx * Ny * Nz, order='F') + data_non_normalized = data_nib_V[maskvec == 1, :] + scaler = StandardScaler(with_mean=True, with_std=True) + # TODO: determine if tedana is already normalizing before this + data = scaler.fit_transform(data_non_normalized) # This was X_sc + data = data_non_normalized + + LGR.info('Performing SVD on original OC data...') + V, EigenValues = _icatb_svd(data, Nt) + LGR.info('SVD done on original OC data') + + # Reordering of values + EigenValues = EigenValues[::-1] + dataN = np.dot(data, V[:, ::-1]) + # Potentially the small differences come from the different signs on V + + # Using 12 gaussian components from middle, top and bottom gaussian + # components to determine the subsampling depth. Final subsampling depth is + # determined using median + kurtv1 = _kurtn(dataN) + kurtv1[EigenValues > np.mean(EigenValues)] = 1000 + idx_gauss = np.where( + ((kurtv1[:, 0] < 0.3) & (kurtv1[:, 0] > 0) & (EigenValues > np.finfo(float).eps) + ) == 1)[0] # DOUBT: make sure np.where is giving us just one tuple + idx = np.array(idx_gauss[:]).T + dfs = np.sum(EigenValues > np.finfo(float).eps) # degrees of freedom + minTp = 12 + + if (len(idx) >= minTp): + middle = int(np.round(len(idx) / 2)) + idx = np.hstack([idx[0:4], idx[middle - 1:middle + 3], idx[-4:]]) + else: + minTp = np.min([minTp, dfs]) + idx = np.arange(dfs - minTp, dfs) + + idx = np.unique(idx) + + # Estimate the subsampling depth for effectively i.i.d. samples + LGR.info('Estimating the subsampling depth for effective i.i.d samples...') + mask_ND = np.reshape(maskvec, (Nx, Ny, Nz), order='F') + sub_depth = len(idx) + sub_iid_sp = np.zeros((sub_depth, )) + for i in range(sub_depth): + x_single = np.zeros(Nx * Ny * Nz) + x_single[maskvec == 1] = dataN[:, idx[i]] + x_single = np.reshape(x_single, (Nx, Ny, Nz), order='F') + sub_iid_sp[i] = _est_indp_sp(x_single)[0] + 1 + if i > 6: + tmp_sub_sp = sub_iid_sp[0:i] + tmp_sub_median = np.round(np.median(tmp_sub_sp)) + if np.sum(tmp_sub_sp == tmp_sub_median) > 6: + sub_iid_sp = tmp_sub_sp + break + dim_n = x_single.ndim + + sub_iid_sp_median = int(np.round(np.median(sub_iid_sp))) + if np.floor(np.power(np.sum(maskvec) / Nt, 1 / dim_n)) < sub_iid_sp_median: + sub_iid_sp_median = int(np.floor(np.power(np.sum(maskvec) / Nt, 1 / dim_n))) + N = np.round(np.sum(maskvec) / np.power(sub_iid_sp_median, dim_n)) + + if sub_iid_sp_median != 1: + mask_s = _subsampling(mask_ND, sub_iid_sp_median) + mask_s_1d = np.reshape(mask_s, np.prod(mask_s.shape), order='F') + dat = np.zeros((int(np.sum(mask_s_1d)), Nt)) + LGR.info('Generating subsampled i.i.d. OC data...') + for i in range(Nt): + x_single = np.zeros((Nx * Ny * Nz, )) + x_single[maskvec == 1] = data[:, i] + x_single = np.reshape(x_single, (Nx, Ny, Nz), order='F') + dat0 = _subsampling(x_single, sub_iid_sp_median) + dat0 = np.reshape(dat0, np.prod(dat0.shape), order='F') + dat[:, i] = dat0[mask_s_1d == 1] + + # Perform Variance Normalization + dat = scaler.fit_transform(dat) + + # (completed) + LGR.info('Performing SVD on subsampled i.i.d. OC data...') + [V, EigenValues] = _icatb_svd(dat, Nt) + LGR.info('SVD done on subsampled i.i.d. OC data') + EigenValues = EigenValues[::-1] + + LGR.info('Effective number of i.i.d. samples %d' % N) + + # Make eigen spectrum adjustment + LGR.info('Perform eigen spectrum adjustment ...') + EigenValues = _eigensp_adj(EigenValues, N, EigenValues.shape[0]) + # (completed) + if np.sum(np.imag(EigenValues)): + raise ValueError('Invalid eigen value found for the subsampled data.') + + # Correction on the ill-conditioned results (when tdim is large, + # some least significant eigenvalues become small negative numbers) + if EigenValues[np.real(EigenValues) <= np.finfo(float).eps].shape[0] > 0: + EigenValues[np.real(EigenValues) <= np.finfo(float).eps] = np.min( + EigenValues[np.real(EigenValues) >= np.finfo(float).eps]) + LGR.info('Estimating the dimension ...') + p = Nt + aic = np.zeros(p - 1) + kic = np.zeros(p - 1) + mdl = np.zeros(p - 1) + + for k_idx, k in enumerate(np.arange(1, p)): + LH = np.log(np.prod(np.power(EigenValues[k:], 1 / (p - k))) / np.mean(EigenValues[k:])) + mlh = 0.5 * N * (p - k) * LH + df = 1 + 0.5 * k * (2 * p - k + 1) + aic[k_idx] = (-2 * mlh) + (2 * df) + kic[k_idx] = (-2 * mlh) + (3 * df) + mdl[k_idx] = -mlh + (0.5 * df * np.log(N)) + + itc = np.row_stack([aic, kic, mdl]) + + if criteria == 'aic': + criteria_idx = 0 + elif criteria == 'kic': + criteria_idx = 1 + elif criteria == 'mdl': + criteria_idx = 2 + + dlap = np.diff(itc[criteria_idx, :]) + a = np.where(dlap > 0)[0] + 1 # Plus 1 to + if a.size == 0: + comp_est = itc[criteria_idx, :].shape[0] + else: + comp_est = a[0] + + LGR.info('Estimated components is found out to be %d' % comp_est) + + # PCA with estimated number of components + ppca = PCA(n_components=comp_est, svd_solver='full', copy=False) + ppca.fit(data) + v = ppca.components_.T + s = ppca.explained_variance_ + u = np.dot(np.dot(data, v), np.diag(1. / s)) + varex_norm = ppca.explained_variance_ratio_ + + return u, s, varex_norm, v diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py index 5ddc657cb..41ee405d8 100644 --- a/tedana/decomposition/pca.py +++ b/tedana/decomposition/pca.py @@ -10,57 +10,15 @@ from sklearn.decomposition import PCA from tedana import metrics, utils, io -from tedana.decomposition._utils import eimask +from tedana.decomposition import ma_pca from tedana.stats import computefeats2 from tedana.selection import kundu_tedpca -from tedana.due import due, BibTeX LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') RefLGR = logging.getLogger('REFERENCES') -@due.dcite(BibTeX( - """ - @inproceedings{minka2001automatic, - title={Automatic choice of dimensionality for PCA}, - author={Minka, Thomas P}, - booktitle={Advances in neural information processing systems}, - pages={598--604}, - year={2001} - } - """), - description='Introduces method for choosing PCA dimensionality ' - 'automatically') -def run_mlepca(data): - """ - Run Singular Value Decomposition (SVD) on input data, - automatically select components on MLE variance cut-off. - - Parameters - ---------- - data : (S [*E] x T) array_like - Optimally combined (S x T) or full multi-echo (S*E x T) data. - - Returns - ------- - u : (S [*E] x C) array_like - Component weight map for each component. - s : (C,) array_like - Variance explained for each component. - v : (T x C) array_like - Component timeseries. - """ - # do PC dimension selection and get eigenvalue cutoff - ppca = PCA(n_components='mle', svd_solver='full', copy=False) - ppca.fit(data) - v = ppca.components_.T - s = ppca.explained_variance_ - u = np.dot(np.dot(data, v), np.diag(1. / s)) - varex_norm = ppca.explained_variance_ratio_ - return u, s, varex_norm, v - - def low_mem_pca(data): """ Run Singular Value Decomposition (SVD) on input data. @@ -88,8 +46,8 @@ def low_mem_pca(data): return u, s, v -def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, - ref_img, tes, algorithm='mle', source_tes=-1, kdaw=10., rdaw=1., +def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, + ref_img, tes, algorithm='mdl', kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal @@ -107,22 +65,19 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, Poser 2006 mask : (S,) array_like Boolean mask array - t2s : (S,) array_like - Map of voxel-wise T2* estimates. + adaptive_mask : (S,) array_like + Adaptive mask of the data indicating the number of echos with signal at each voxel t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds - algorithm : {'mle', 'kundu', 'kundu-stabilize'}, optional - Method with which to select components in TEDPCA. Default is 'mle'. - source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional - Which echos to use in PCA. Values -1 and 0 are special, where a value - of -1 will indicate using the optimal combination of the echos - and 0 will indicate using all the echos. A list can be provided - to indicate a subset of echos. - Default: -1 + algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional + Method with which to select components in TEDPCA. Default is 'mdl'. PCA + decomposition with the mdl, kic and aic options are based on a Moving Average + (stationary Gaussian) process and are ordered from most to least aggresive. + See (Li et al., 2007). kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. @@ -193,18 +148,7 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, pca_components.nii.gz Component weight maps. ====================== ================================================= """ - if low_mem and algorithm == 'mle': - LGR.warning('Low memory option is not compatible with MLE ' - 'dimensionality estimation. Switching to Kundu decision ' - 'tree.') - algorithm = 'kundu' - - if algorithm == 'mle': - alg_str = "using MLE dimensionality estimation (Minka, 2001)" - RefLGR.info("Minka, T. P. (2001). Automatic choice of dimensionality " - "for PCA. In Advances in neural information processing " - "systems (pp. 598-604).") - elif algorithm == 'kundu': + if algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " @@ -222,38 +166,31 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") - - if source_tes == -1: - dat_str = "the optimally combined data" - elif source_tes == 0: - dat_str = "the z-concatenated multi-echo data" else: - dat_str = "a z-concatenated subset of echoes from the input data" + alg_str = ("based on the PCA component estimation with a Moving Average" + "(stationary Gaussian) process (Li et al., 2007)") + RefLGR.info("Li, Y.O., Adalฤฑ, T. and Calhoun, V.D., (2007). " + "Estimating the number of independent components for " + "functional magnetic resonance imaging data. " + "Human brain mapping, 28(11), pp.1251-1266.") RepLGR.info("Principal component analysis {0} was applied to " - "{1} for dimensionality reduction.".format(alg_str, dat_str)) + "the optimally combined data for dimensionality " + "reduction.".format(alg_str)) n_samp, n_echos, n_vols = data_cat.shape - source_tes = np.array([int(ee) for ee in str(source_tes).split(',')]) - - if len(source_tes) == 1 and source_tes[0] == -1: - LGR.info('Computing PCA of optimally combined multi-echo data') - data = data_oc[mask, :][:, np.newaxis, :] - elif len(source_tes) == 1 and source_tes[0] == 0: - LGR.info('Computing PCA of spatially concatenated multi-echo data') - data = data_cat[mask, ...] - else: - LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes]))) - data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1) - eim = np.squeeze(eimask(data)) - data = np.squeeze(data[eim]) + LGR.info('Computing PCA of optimally combined multi-echo data') + data = data_oc[mask, :] data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything - if algorithm == 'mle': - voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z) + if algorithm in ['mdl', 'aic', 'kic']: + data_img = io.new_nii_like(ref_img, utils.unmask(data, mask)) + mask_img = io.new_nii_like(ref_img, mask.astype(int)) + voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca( + data_img, mask_img, algorithm) elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() @@ -267,19 +204,17 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps - eimum = np.atleast_2d(eim) - eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) - eimum = eimum.prod(axis=1) - o = np.zeros((mask.shape[0], *eimum.shape[1:])) - o[mask, ...] = eimum - eimum = np.squeeze(o).astype(bool) - - # Normalize each component's time series - vTmixN = stats.zscore(comp_ts, axis=0) - comptable, _, _, _ = metrics.dependence_metrics( - data_cat, data_oc, comp_ts, t2s, tes, ref_img, - reindex=False, mmixN=vTmixN, algorithm=None, - label='mepca_', out_dir=out_dir, verbose=verbose) + required_metrics = [ + 'kappa', 'rho', 'countnoise', 'countsigFT2', 'countsigFS0', + 'dice_FT2', 'dice_FS0', 'signal-noise_t', + 'variance explained', 'normalized variance explained', + 'd_table_score' + ] + comptable, _ = metrics.collect.generate_metrics( + data_cat, data_oc, comp_ts, mask, adaptive_mask, + tes, ref_img, + metrics=required_metrics, sort_by=None + ) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original @@ -297,9 +232,9 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) - elif algorithm == 'mle': - LGR.info('Selected {0} components with MLE dimensionality ' - 'detection'.format(comptable.shape[0])) + elif algorithm in ['mdl', 'aic', 'kic']: + LGR.info('Selected {0} components with {1} dimensionality ' + 'detection'.format(comptable.shape[0], algorithm)) comptable['classification'] = 'accepted' comptable['rationale'] = '' @@ -310,8 +245,7 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) - data_type = 'optimally combined data' if source_tes == -1 else 'z-concatenated data' - comptable['Description'] = 'PCA fit to {0}.'.format(data_type) + comptable['Description'] = 'PCA fit to optimally combined data.' mmix_dict = {} mmix_dict['Method'] = ('Principal components analysis implemented by ' 'sklearn. Components are sorted by variance ' @@ -323,8 +257,7 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size - voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * - varex[None, acc]) + voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series diff --git a/tedana/gscontrol.py b/tedana/gscontrol.py index 286c70eb7..be8a1a106 100644 --- a/tedana/gscontrol.py +++ b/tedana/gscontrol.py @@ -2,6 +2,7 @@ Global signal control methods """ import logging +import os.path as op import numpy as np from numpy.linalg import lstsq @@ -15,7 +16,7 @@ RefLGR = logging.getLogger('REFERENCES') -def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): +def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4): """ Removes global signal from individual echo `catd` and `optcom` time series @@ -35,6 +36,8 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): Number of echos in data. Should be the same as `E` dimension of `catd` ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk + out_dir : :obj:`str`, optional + Output directory. dtrank : :obj:`int`, optional Specifies degree of Legendre polynomial basis function for estimating spatial global signal. Default: 4 @@ -75,13 +78,13 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): detr = dat - np.dot(sol.T, Lmix.T)[0] sphis = (detr).min(axis=1) sphis -= sphis.mean() - io.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img) + io.filewrite(utils.unmask(sphis, Gmask), op.join(out_dir, 'T1gs'), ref_img) # find time course ofc the spatial global signal # make basis with the Legendre basis glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0] glsig = stats.zscore(glsig, axis=None) - np.savetxt('glsig.1D', glsig) + np.savetxt(op.join(out_dir, 'glsig.1D'), glsig) glbase = np.hstack([Lmix, glsig.T]) # Project global signal out of optimally combined data @@ -89,9 +92,9 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] - io.filewrite(optcom, 'tsoc_orig', ref_img) + io.filewrite(optcom, op.join(out_dir, 'tsoc_orig'), ref_img) dm_optcom = utils.unmask(tsoc_nogs, Gmask) - io.filewrite(dm_optcom, 'tsoc_nogs', ref_img) + io.filewrite(dm_optcom, op.join(out_dir, 'tsoc_nogs'), ref_img) # Project glbase out of each echo dm_catd = catd.copy() # don't overwrite catd @@ -105,7 +108,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): return dm_catd, dm_optcom -def gscontrol_mmix(optcom_ts, mmix, mask, comptable, ref_img): +def gscontrol_mmix(optcom_ts, mmix, mask, comptable, ref_img, out_dir='.'): """ Perform global signal regression. @@ -123,6 +126,8 @@ def gscontrol_mmix(optcom_ts, mmix, mask, comptable, ref_img): each metric. The index should be the component number. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk + out_dir : :obj:`str`, optional + Output directory. Notes ----- @@ -165,7 +170,7 @@ def gscontrol_mmix(optcom_ts, mmix, mask, comptable, ref_img): bold_ts = np.dot(cbetas[:, acc], mmix[:, acc].T) t1_map = bold_ts.min(axis=-1) t1_map -= t1_map.mean() - io.filewrite(utils.unmask(t1_map, mask), 'sphis_hik', ref_img) + io.filewrite(utils.unmask(t1_map, mask), op.join(out_dir, 'sphis_hik'), ref_img) t1_map = t1_map[:, np.newaxis] """ @@ -179,13 +184,14 @@ def gscontrol_mmix(optcom_ts, mmix, mask, comptable, ref_img): bold_noT1gs = bold_ts - np.dot(lstsq(glob_sig.T, bold_ts.T, rcond=None)[0].T, glob_sig) hik_ts = bold_noT1gs * optcom_std - io.filewrite(utils.unmask(hik_ts, mask), 'hik_ts_OC_T1c.nii', ref_img) + io.filewrite(utils.unmask(hik_ts, mask), op.join(out_dir, 'hik_ts_OC_T1c'), + ref_img) """ Make denoised version of T1-corrected time series """ medn_ts = optcom_mu + ((bold_noT1gs + resid) * optcom_std) - io.filewrite(utils.unmask(medn_ts, mask), 'dn_ts_OC_T1c.nii', ref_img) + io.filewrite(utils.unmask(medn_ts, mask), op.join(out_dir, 'dn_ts_OC_T1c'), ref_img) """ Orthogonalize mixing matrix w.r.t. T1-GS @@ -203,5 +209,5 @@ def gscontrol_mmix(optcom_ts, mmix, mask, comptable, ref_img): """ cbetas_norm = lstsq(mmixnogs_norm.T, data_norm.T, rcond=None)[0].T io.filewrite(utils.unmask(cbetas_norm[:, 2:], mask), - 'betas_hik_OC_T1c.nii', ref_img) - np.savetxt('meica_mix_T1c.1D', mmixnogs) + op.join(out_dir, 'betas_hik_OC_T1c'), ref_img) + np.savetxt(op.join(out_dir, 'meica_mix_T1c.1D'), mmixnogs) diff --git a/tedana/info.py b/tedana/info.py index 79c825f53..9cf59709e 100644 --- a/tedana/info.py +++ b/tedana/info.py @@ -33,9 +33,9 @@ 'nilearn', 'nibabel>=2.1.0', 'scipy', - 'versioneer', 'pandas', - 'matplotlib' + 'matplotlib', + 'threadpoolctl' ] TESTS_REQUIRES = [ @@ -45,6 +45,7 @@ ] EXTRA_REQUIRES = { + 'dev': ['versioneer'], 'doc': [ 'sphinx>=1.5.3', 'sphinx_rtd_theme', diff --git a/tedana/io.py b/tedana/io.py index 9eb756ba6..b8611f331 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -60,7 +60,7 @@ def split_ts(data, mmix, mask, comptable): return hikts, resid -def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''): +def write_split_ts(data, mmix, mask, comptable, ref_img, out_dir='.', suffix=''): """ Splits `data` into denoised / noise / ignored time series and saves to disk @@ -75,6 +75,8 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''): Boolean mask array ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk + out_dir : :obj:`str`, optional + Output directory. suffix : :obj:`str`, optional Appended to name of saved files (before extension). Default: '' @@ -116,22 +118,21 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''): if len(acc) != 0: fout = filewrite(utils.unmask(hikts, mask), - 'hik_ts_{0}'.format(suffix), ref_img) + op.join(out_dir, 'hik_ts_{0}'.format(suffix)), ref_img) LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout))) if len(rej) != 0: fout = filewrite(utils.unmask(lowkts, mask), - 'lowk_ts_{0}'.format(suffix), ref_img) + op.join(out_dir, 'lowk_ts_{0}'.format(suffix)), ref_img) LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout))) fout = filewrite(utils.unmask(dnts, mask), - 'dn_ts_{0}'.format(suffix), ref_img) + op.join(out_dir, 'dn_ts_{0}'.format(suffix)), ref_img) LGR.info('Writing denoised time series: {}'.format(op.abspath(fout))) - return varexpl -def writefeats(data, mmix, mask, ref_img, suffix=''): +def writefeats(data, mmix, mask, ref_img, out_dir='.', suffix=''): """ Converts `data` to component space with `mmix` and saves to disk @@ -146,6 +147,8 @@ def writefeats(data, mmix, mask, ref_img, suffix=''): Boolean mask array ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk + out_dir : :obj:`str`, optional + Output directory. suffix : :obj:`str`, optional Appended to name of saved files (before extension). Default: '' @@ -167,12 +170,11 @@ def writefeats(data, mmix, mask, ref_img, suffix=''): # write feature versions of components feats = utils.unmask(computefeats2(data, mmix, mask), mask) - fname = filewrite(feats, 'feats_{0}'.format(suffix), ref_img) - + fname = filewrite(feats, op.join(out_dir, 'feats_{0}'.format(suffix)), ref_img) return fname -def writeresults(ts, mask, comptable, mmix, n_vols, ref_img): +def writeresults(ts, mask, comptable, mmix, n_vols, ref_img, out_dir='.'): """ Denoises `ts` and saves all resulting files to disk @@ -193,6 +195,8 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img): Number of volumes in original time series ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk + out_dir : :obj:`str`, optional + Output directory. Notes ----- @@ -201,7 +205,6 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img): ====================== ================================================= Filename Content ====================== ================================================= - ts_OC.nii Optimally combined 4D time series. hik_ts_OC.nii High-Kappa time series. Generated by :py:func:`tedana.utils.io.write_split_ts`. midk_ts_OC.nii Mid-Kappa time series. Generated by @@ -214,28 +217,30 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img): betas_hik_OC.nii Denoised ICA coefficient feature set. feats_OC2.nii Z-normalized spatial component maps. Generated by :py:func:`tedana.utils.io.writefeats`. + ts_OC.nii Optimally combined 4D time series. ====================== ================================================= """ acc = comptable[comptable.classification == 'accepted'].index.values - fout = filewrite(ts, 'ts_OC', ref_img) + fout = filewrite(ts, op.join(out_dir, 'ts_OC'), ref_img) LGR.info('Writing optimally combined time series: {}'.format(op.abspath(fout))) - write_split_ts(ts, mmix, mask, comptable, ref_img, suffix='OC') + write_split_ts(ts, mmix, mask, comptable, ref_img, out_dir=out_dir, suffix='OC') ts_B = get_coeffs(ts, mmix, mask) - fout = filewrite(ts_B, 'betas_OC', ref_img) + fout = filewrite(ts_B, op.join(out_dir, 'betas_OC'), ref_img) LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout))) if len(acc) != 0: - fout = filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img) + fout = filewrite(ts_B[:, acc], op.join(out_dir, 'betas_hik_OC'), ref_img) LGR.info('Writing denoised ICA coefficient feature set: {}'.format(op.abspath(fout))) fout = writefeats(split_ts(ts, mmix, mask, comptable)[0], - mmix[:, acc], mask, ref_img, suffix='OC2') + mmix[:, acc], mask, ref_img, out_dir=out_dir, + suffix='OC2') LGR.info('Writing Z-normalized spatial component maps: {}'.format(op.abspath(fout))) -def writeresults_echoes(catd, mmix, mask, comptable, ref_img): +def writeresults_echoes(catd, mmix, mask, comptable, ref_img, out_dir='.'): """ Saves individually denoised echos to disk @@ -253,6 +258,8 @@ def writeresults_echoes(catd, mmix, mask, comptable, ref_img): each metric. The index should be the component number. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk + out_dir : :obj:`str`, optional + Output directory. Notes ----- @@ -279,7 +286,7 @@ def writeresults_echoes(catd, mmix, mask, comptable, ref_img): for i_echo in range(catd.shape[1]): LGR.info('Writing Kappa-filtered echo #{:01d} timeseries'.format(i_echo + 1)) write_split_ts(catd[:, i_echo, :], mmix, mask, comptable, ref_img, - suffix='e%i' % (i_echo + 1)) + out_dir=out_dir, suffix='e%i' % (i_echo + 1)) def new_nii_like(ref_img, data, affine=None, copy_header=True): @@ -351,7 +358,10 @@ def filewrite(data, filename, ref_img, gzip=True, copy_header=True): # FIXME: we only handle writing to nifti right now # get root of desired output file and save as nifti image - root, ext, add = splitext_addext(filename) + root = op.dirname(filename) + base = op.basename(filename) + base, ext, add = splitext_addext(base) + root = op.join(root, base) name = '{}.{}'.format(root, 'nii.gz' if gzip else 'nii') out.to_filename(name) diff --git a/tedana/metrics/__init__.py b/tedana/metrics/__init__.py index 95261dd14..8088e708c 100644 --- a/tedana/metrics/__init__.py +++ b/tedana/metrics/__init__.py @@ -1,9 +1,10 @@ # emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- # ex: set sts=4 ts=4 sw=4 et: -from .kundu_fit import ( - dependence_metrics, kundu_metrics, get_coeffs, computefeats2 +from .collect import ( + generate_metrics ) __all__ = [ - 'dependence_metrics', 'kundu_metrics', 'get_coeffs', 'computefeats2'] + 'generate_metrics' +] diff --git a/tedana/metrics/_utils.py b/tedana/metrics/_utils.py new file mode 100644 index 000000000..63e3318f8 --- /dev/null +++ b/tedana/metrics/_utils.py @@ -0,0 +1,183 @@ +""" +Misc. utils for metric calculation. +""" +import logging + +import numpy as np +from scipy import stats + +LGR = logging.getLogger(__name__) + + +def dependency_resolver(dict_, requested_metrics, base_inputs): + """ + Identify all necessary metrics based on a list of requested metrics and + the metrics each one requires to be calculated, as defined in a dictionary. + + Parameters + ---------- + dict_ : :obj:`dict` + Dictionary containing lists, where each key is a metric name and its + associated value is the list of metrics or inputs required to calculate + it. + requested_metrics : :obj:`list` + Child metrics for which the function will determine parents. + base_inputs : :obj:`list` + A list of inputs to the metric collection function, to differentiate + them from metrics to be calculated. + + Returns + ------- + required_metrics :obj:`list` + A comprehensive list of all metrics and inputs required to generate all + of the requested inputs. + """ + not_found = [k for k in requested_metrics if k not in dict_.keys()] + if not_found: + raise ValueError('Unknown metric(s): {}'.format(', '.join(not_found))) + + required_metrics = requested_metrics + escape_counter = 0 + while True: + required_metrics_new = required_metrics[:] + for k in required_metrics: + if k in dict_.keys(): + new_metrics = dict_[k] + elif k not in base_inputs: + print("Warning: {} not found".format(k)) + required_metrics_new += new_metrics + if set(required_metrics) == set(required_metrics_new): + # There are no more parent metrics to calculate + break + else: + required_metrics = required_metrics_new + escape_counter += 1 + if escape_counter >= 10: + LGR.warning('dependency_resolver in infinite loop. Escaping early.') + break + return required_metrics + + +def determine_signs(weights, axis=0): + """ + Determine component-wise optimal signs using voxel-wise parameter estimates. + + Parameters + ---------- + weights : (S x C) array_like + Parameter estimates for optimally combined data against the mixing + matrix. + + Returns + ------- + signs : (C) array_like + Array of 1 and -1 values corresponding to the appropriate flips for the + mixing matrix's component time series. + """ + # compute skews to determine signs based on unnormalized weights, + signs = stats.skew(weights, axis=axis) + signs /= np.abs(signs) + return signs + + +def flip_components(*args, signs): + """ + Flip an arbitrary set of input arrays based on a set of signs. + + Parameters + ---------- + *args : array_like + Any number of arrays with one dimension the same length as signs. + If multiple dimensions share the same size as signs, behavior of this + function will be unpredictable. + signs : array_like of :obj:`int` + Array of +/- 1 by which to flip the values in each argument. + + Returns + ------- + *args : array_like + Input arrays after sign flipping. + """ + assert signs.ndim == 1, 'Argument "signs" must be one-dimensional.' + for arg in args: + assert len(signs) in arg.shape, \ + ('Size of argument "signs" must match size of one dimension in ' + 'each of the input arguments.') + assert sum(x == len(signs) for x in arg.shape) == 1, \ + ('Only one dimension of each input argument can match the length ' + 'of argument "signs".') + # correct mixing & weights signs based on spatial distribution tails + return [arg * signs for arg in args] + + +def sort_df(df, by='kappa', ascending=False): + """ + Sort DataFrame and get index. + + Parameters + ---------- + df : :obj:`pandas.DataFrame` + DataFrame to sort. + by : :obj:`str` or None, optional + Column by which to sort the DataFrame. Default is 'kappa'. + ascending : :obj:`bool`, optional + Whether to sort the DataFrame in ascending (True) or descending (False) + order. Default is False. + + Returns + ------- + df : :obj:`pandas.DataFrame` + DataFrame after sorting, with index resetted. + argsort : array_like + Sorting index. + """ + if by is None: + return df, df.index.values + + # Order of kwargs is preserved at 3.6+ + argsort = df[by].argsort() + if not ascending: + argsort = argsort[::-1] + df = df.loc[argsort].reset_index(drop=True) + return df, argsort + + +def apply_sort(*args, sort_idx, axis=0): + """ + Apply a sorting index to an arbitrary set of arrays. + """ + for arg in args: + assert arg.shape[axis] == len(sort_idx) + return [np.take(arg, sort_idx, axis=axis) for arg in args] + + +def check_mask(data, mask): + """ + Check that no zero-variance voxels remain in masked data. + + Parameters + ---------- + data : (S [x E] x T) array_like + Data to be masked and evaluated. + mask : (S) array_like + Boolean mask. + + Raises + ------ + ValueError + """ + assert data.ndim <= 3 + assert mask.shape[0] == data.shape[0] + masked_data = data[mask, ...] + dims_to_check = list(range(1, data.ndim)) + for dim in dims_to_check: + # ignore singleton dimensions + if masked_data.shape[dim] == 1: + continue + + masked_data_std = masked_data.std(axis=dim) + zero_idx = np.where(masked_data_std == 0) + n_bad_voxels = len(zero_idx[0]) + if n_bad_voxels > 0: + raise ValueError('{0} voxels in masked data have zero variance. ' + 'Mask is too liberal.'.format(n_bad_voxels)) diff --git a/tedana/metrics/collect.py b/tedana/metrics/collect.py new file mode 100644 index 000000000..17e5c3e1d --- /dev/null +++ b/tedana/metrics/collect.py @@ -0,0 +1,281 @@ +""" +Collect metrics. +""" +import logging + +import numpy as np +import pandas as pd + +from . import dependence +from ._utils import (determine_signs, flip_components, sort_df, apply_sort, + dependency_resolver) +from tedana.stats import getfbounds + + +LGR = logging.getLogger(__name__) +RepLGR = logging.getLogger('REPORT') +RefLGR = logging.getLogger('REFERENCES') + + +def generate_metrics(data_cat, data_optcom, mixing, mask, adaptive_mask, + tes, ref_img, + metrics=None, sort_by='kappa', ascending=False): + """ + Fit TE-dependence and -independence models to components. + + Parameters + ---------- + data_cat : (S x E x T) array_like + Input data, where `S` is samples, `E` is echos, and `T` is time + data_optcom : (S x T) array_like + Optimally combined data + mixing : (T x C) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data_cat` + mask : (S) array_like + Boolean mask + adaptive_mask : (S) array_like + Adaptive mask, where each voxel's value is the number of echoes with + "good signal". + tes : list + List of echo times associated with `data_cat`, in milliseconds + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + metrics : list + List of metrics to return + sort_by : str, optional + Metric to sort component table by. Default is 'kappa'. + ascending : bool, optional + Whether to sort the table in ascending or descending order. + Default is False. + + Returns + ------- + comptable : (C x X) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric. The index is the component number. + mixing : :obj:`numpy.ndarray` + Mixing matrix after sign flipping and sorting. + """ + if metrics is None: + metrics = ['map weight'] + RepLGR.info('The following metrics were calculated: {}.'.format(', '.join(metrics))) + + if not (data_cat.shape[0] == data_optcom.shape[0] == adaptive_mask.shape[0] == + mask.shape[0]): + raise ValueError('First dimensions (number of samples) of data_cat ({0}), ' + 'data_optcom ({1}), adaptive_mask ({2}), and mask ({3}) do not ' + 'match'.format(data_cat.shape[0], data_optcom.shape[0], + adaptive_mask.shape[0], mask.shape[0])) + elif data_cat.shape[1] != len(tes): + raise ValueError('Second dimension of data_cat ({0}) does not match ' + 'number of echoes provided (tes; ' + '{1})'.format(data_cat.shape[1], len(tes))) + elif not (data_cat.shape[2] == data_optcom.shape[1] == mixing.shape[0]): + raise ValueError('Number of volumes in data_cat ({0}), ' + 'data_optcom ({1}), and mixing ({2}) do not ' + 'match.'.format(data_cat.shape[2], data_optcom.shape[1], + mixing.shape[0])) + + INPUTS = ['data_cat', 'data_optcom', 'mixing', 'adaptive_mask', + 'mask', 'tes', 'ref_img'] + METRIC_DEPENDENCIES = { + 'kappa': ['map FT2', 'map Z'], + 'rho': ['map FS0', 'map Z'], + 'countnoise': ['map Z', 'map Z clusterized'], + 'countsigFT2': ['map FT2 clusterized'], + 'countsigFS0': ['map FS0 clusterized'], + 'dice_FT2': ['map beta T2 clusterized', 'map FT2 clusterized'], + 'dice_FS0': ['map beta S0 clusterized', 'map FS0 clusterized'], + 'signal-noise_t': ['map Z', 'map Z clusterized', 'map FT2'], + 'variance explained': ['map optcom betas'], + 'normalized variance explained': ['map weight'], + 'd_table_score': ['kappa', 'dice_FT2', 'signal-noise_t', + 'countnoise', 'countsigFT2'], + 'map FT2': ['map Z', 'mixing', 'tes', 'data_cat', 'adaptive_mask'], + 'map FS0': ['map Z', 'mixing', 'tes', 'data_cat', 'adaptive_mask'], + 'map Z': ['map weight'], + 'map weight': ['data_optcom', 'mixing'], + 'map optcom betas': ['data_optcom', 'mixing'], + 'map percent signal change': ['data_optcom', 'map optcom betas'], + 'map Z clusterized': ['map Z', 'mask', 'ref_img', 'tes'], + 'map FT2 clusterized': ['map FT2', 'mask', 'ref_img', 'tes'], + 'map FS0 clusterized': ['map FS0', 'mask', 'ref_img', 'tes'], + 'map beta T2 clusterized': ['map FT2 clusterized', 'map optcom betas', + 'countsigFT2', 'mask', 'ref_img', 'tes'], + 'map beta S0 clusterized': ['map FS0 clusterized', 'map optcom betas', + 'countsigFS0', 'mask', 'ref_img', 'tes'], + } + # Apply masks before anything else + data_cat = data_cat[mask, ...] + data_optcom = data_optcom[mask, :] + adaptive_mask = adaptive_mask[mask] + + required_metrics = dependency_resolver(METRIC_DEPENDENCIES, metrics, INPUTS) + + # Use copy to avoid changing the original variable outside of this function + mixing = mixing.copy() + + # Generate the component table, which will be filled out, column by column, + # throughout this function + n_components = mixing.shape[1] + comptable = pd.DataFrame(index=np.arange(n_components, dtype=int)) + + # Metric maps + # Maps will be stored as arrays in an easily-indexable dictionary + metric_maps = {} + if 'map weight' in required_metrics: + LGR.info('Calculating weight maps') + metric_maps['map weight'] = dependence.calculate_weights(data_optcom, mixing) + signs = determine_signs(metric_maps['map weight'], axis=0) + metric_maps['map weight'], mixing = flip_components( + metric_maps['map weight'], mixing, signs=signs) + + if 'map optcom betas' in required_metrics: + LGR.info('Calculating parameter estimate maps for optimally combined data') + metric_maps['map optcom betas'] = dependence.calculate_betas(data_optcom, mixing) + + if 'map percent signal change' in required_metrics: + LGR.info('Calculating percent signal change maps') + # used in kundu v3.2 tree + metric_maps['map percent signal change'] = dependence.calculate_psc( + data_optcom, + metric_maps['map optcom betas']) + + if 'map Z' in required_metrics: + LGR.info('Calculating z-statistic maps') + metric_maps['map Z'] = dependence.calculate_z_maps(metric_maps['map weight']) + + if ('map FT2' in required_metrics) or ('map FS0' in required_metrics): + LGR.info('Calculating F-statistic maps') + metric_maps['map FT2'], metric_maps['map FS0'] = dependence.calculate_f_maps( + data_cat, metric_maps['map Z'], mixing, adaptive_mask, tes) + + if 'map Z clusterized' in required_metrics: + LGR.info('Thresholding z-statistic maps') + z_thresh = 1.95 + metric_maps['map Z clusterized'] = dependence.threshold_map( + metric_maps['map Z'], mask, ref_img, z_thresh) + + if 'map FT2 clusterized' in required_metrics: + LGR.info('Calculating T2* F-statistic maps') + f_thresh, _, _ = getfbounds(len(tes)) + metric_maps['map FT2 clusterized'] = dependence.threshold_map( + metric_maps['map FT2'], mask, ref_img, f_thresh) + + if 'map FS0 clusterized' in required_metrics: + LGR.info('Calculating S0 F-statistic maps') + f_thresh, _, _ = getfbounds(len(tes)) + metric_maps['map FS0 clusterized'] = dependence.threshold_map( + metric_maps['map FS0'], mask, ref_img, f_thresh) + + # Intermediate metrics + if 'countsigFT2' in required_metrics: + LGR.info('Counting significant voxels in T2* F-statistic maps') + comptable['countsigFT2'] = dependence.compute_countsignal( + metric_maps['map FT2 clusterized']) + + if 'countsigFS0' in required_metrics: + LGR.info('Counting significant voxels in S0 F-statistic maps') + comptable['countsigFS0'] = dependence.compute_countsignal( + metric_maps['map FS0 clusterized']) + + # Back to maps + if 'map beta T2 clusterized' in required_metrics: + LGR.info('Thresholding optimal combination beta maps to match T2* F-statistic maps') + metric_maps['map beta T2 clusterized'] = dependence.threshold_to_match( + metric_maps['map optcom betas'], + comptable['countsigFT2'], + mask, ref_img) + + if 'map beta S0 clusterized' in required_metrics: + LGR.info('Thresholding optimal combination beta maps to match S0 F-statistic maps') + metric_maps['map beta S0 clusterized'] = dependence.threshold_to_match( + metric_maps['map optcom betas'], + comptable['countsigFS0'], + mask, ref_img) + + # Dependence metrics + if ('kappa' in required_metrics) or ('rho' in required_metrics): + LGR.info('Calculating kappa and rho') + comptable['kappa'], comptable['rho'] = dependence.calculate_dependence_metrics( + F_T2_maps=metric_maps['map FT2'], + F_S0_maps=metric_maps['map FS0'], + Z_maps=metric_maps['map Z']) + + # Generic metrics + if 'variance explained' in required_metrics: + LGR.info('Calculating variance explained') + comptable['variance explained'] = dependence.calculate_varex( + metric_maps['map optcom betas']) + + if 'normalized variance explained' in required_metrics: + LGR.info('Calculating normalized variance explained') + comptable['normalized variance explained'] = dependence.calculate_varex_norm( + metric_maps['map weight']) + + # Spatial metrics + if 'dice_FT2' in required_metrics: + LGR.info('Calculating DSI between thresholded T2* F-statistic and ' + 'optimal combination beta maps') + comptable['dice_FT2'] = dependence.compute_dice( + metric_maps['map beta T2 clusterized'], + metric_maps['map FT2 clusterized'], axis=0) + + if 'dice_FS0' in required_metrics: + LGR.info('Calculating DSI between thresholded S0 F-statistic and ' + 'optimal combination beta maps') + comptable['dice_FS0'] = dependence.compute_dice( + metric_maps['map beta S0 clusterized'], + metric_maps['map FS0 clusterized'], axis=0) + + if 'signal-noise_t' in required_metrics: + LGR.info('Calculating signal-noise t-statistics') + RepLGR.info('A t-test was performed between the distributions of T2*-model ' + 'F-statistics associated with clusters (i.e., signal) and ' + 'non-cluster voxels (i.e., noise) to generate a t-statistic ' + '(metric signal-noise_z) and p-value (metric signal-noise_p) ' + 'measuring relative association of the component to signal ' + 'over noise.') + (comptable['signal-noise_t'], + comptable['signal-noise_p']) = dependence.compute_signal_minus_noise_t( + Z_maps=metric_maps['map Z'], + Z_clmaps=metric_maps['map Z clusterized'], + F_T2_maps=metric_maps['map FT2']) + + if 'signal-noise_z' in required_metrics: + LGR.info('Calculating signal-noise z-statistics') + RepLGR.info('A t-test was performed between the distributions of T2*-model ' + 'F-statistics associated with clusters (i.e., signal) and ' + 'non-cluster voxels (i.e., noise) to generate a z-statistic ' + '(metric signal-noise_z) and p-value (metric signal-noise_p) ' + 'measuring relative association of the component to signal ' + 'over noise.') + (comptable['signal-noise_z'], + comptable['signal-noise_p']) = dependence.compute_signal_minus_noise_z( + Z_maps=metric_maps['map Z'], + Z_clmaps=metric_maps['map Z clusterized'], + F_T2_maps=metric_maps['map FT2']) + + if 'countnoise' in required_metrics: + LGR.info('Counting significant noise voxels from z-statistic maps') + RepLGR.info('The number of significant voxels not from clusters was ' + 'calculated for each component.') + comptable['countnoise'] = dependence.compute_countnoise( + metric_maps['map Z'], + metric_maps['map Z clusterized']) + + # Composite metrics + if 'd_table_score' in required_metrics: + LGR.info('Calculating decision table score') + comptable['d_table_score'] = dependence.generate_decision_table_score( + comptable['kappa'], + comptable['dice_FT2'], + comptable['signal-noise_t'], + comptable['countnoise'], + comptable['countsigFT2']) + + # Sort the component table and mixing matrix + comptable, sort_idx = sort_df(comptable, by=sort_by, ascending=ascending) + mixing, = apply_sort(mixing, sort_idx=sort_idx, axis=1) + return comptable, mixing diff --git a/tedana/metrics/dependence.py b/tedana/metrics/dependence.py new file mode 100644 index 000000000..e27a29d12 --- /dev/null +++ b/tedana/metrics/dependence.py @@ -0,0 +1,574 @@ +""" +Fit models. +""" +import logging + +import numpy as np +from scipy import stats + +from tedana import io, utils +from tedana.stats import computefeats2, get_coeffs, t_to_z + + +LGR = logging.getLogger(__name__) +RepLGR = logging.getLogger('REPORT') +RefLGR = logging.getLogger('REFERENCES') + + +def calculate_weights(data_optcom, mixing): + """ + Calculate standardized parameter estimates between data and mixing matrix. + + Parameters + ---------- + data_optcom : (M x T) array_like + Optimally combined data, already masked. + mixing : (T x C) array_like + Mixing matrix + + Returns + ------- + weights : (M x C) array_like + Standardized parameter estimates for optimally combined data against + the mixing matrix. + """ + assert data_optcom.shape[1] == mixing.shape[0] + mixing_z = stats.zscore(mixing, axis=0) + # compute un-normalized weight dataset (features) + weights = computefeats2(data_optcom, mixing_z, normalize=False) + return weights + + +def calculate_betas(data_optcom, mixing): + """ + Calculate unstandardized parameter estimates between data and mixing + matrix. + + Parameters + ---------- + data_optcom : (M x T) array_like + Optimally combined data + mixing : (T x C) array_like + Mixing matrix + + Returns + ------- + betas : (M x C) array_like + Unstandardized parameter estimates + """ + assert data_optcom.shape[1] == mixing.shape[0] + # demean optimal combination + data_optcom_dm = data_optcom - data_optcom.mean(axis=-1, keepdims=True) + # compute PSC dataset - shouldn't have to refit data + betas = get_coeffs(data_optcom_dm, mixing) + return betas + + +def calculate_psc(data_optcom, optcom_betas): + """ + Calculate percent signal change maps for components against optimally + combined data. + + Parameters + ---------- + data_optcom : (M x T) array_like + Optimally combined data, already masked. + optcom_betas : (M x C) array_like + Component-wise, unstandardized parameter estimates from the regression + of the optimally combined data against component time series. + + Returns + ------- + psc : (M x C) array_like + Component-wise percent signal change maps. + """ + assert data_optcom.shape[0] == optcom_betas.shape[0] + psc = 100 * optcom_betas / data_optcom.mean(axis=-1, keepdims=True) + return psc + + +def calculate_z_maps(weights, z_max=8): + """ + Calculate z-statistic maps by z-scoring standardized parameter estimate + maps and cropping extreme values. + + Parameters + ---------- + weights : (M x C) array_like + Standardized parameter estimate maps for components. + z_max : float, optional + Maximum z-statistic, used to crop extreme values. Values in the + z-statistic maps greater than this value are set to it. + + Returns + ------- + Z_maps : (M x C) array_like + Z-statistic maps for components, reflecting voxel-wise component loadings. + """ + Z_maps = stats.zscore(weights, axis=0) + extreme_idx = np.abs(Z_maps) > z_max + Z_maps[extreme_idx] = z_max * np.sign(Z_maps[extreme_idx]) + return Z_maps + + +def calculate_f_maps(data_cat, Z_maps, mixing, adaptive_mask, tes, f_max=500): + """ + Calculate pseudo-F-statistic maps (per component) for TE-dependence + and -independence models. + + Parameters + ---------- + data_cat : (M x E x T) array_like + Multi-echo data, already masked. + Z_maps : (M x C) array_like + Z-statistic maps for components, reflecting voxel-wise component loadings. + mixing : (T x C) array_like + Mixing matrix + adaptive_mask : (M) array_like + Adaptive mask, where each voxel's value is the number of echoes with + "good signal". Limited to masked voxels. + tes : (E) array_like + Echo times in milliseconds, in the same order as the echoes in data_cat. + f_max : float, optional + Maximum F-statistic, used to crop extreme values. Values in the + F-statistic maps greater than this value are set to it. + + Returns + ------- + F_T2_maps, F_S0_maps : (M x C) array_like + Pseudo-F-statistic maps for TE-dependence and -independence models, + respectively. + """ + # TODO: Remove mask arg from get_coeffs + me_betas = get_coeffs(data_cat, mixing, mask=np.ones(data_cat.shape[:2], bool), + add_const=True) + n_voxels, n_echos, n_components = me_betas.shape + mu = data_cat.mean(axis=-1, dtype=float) + tes = np.reshape(tes, (n_echos, 1)) + + # set up Xmats + X1 = mu.T # Model 1 + X2 = np.tile(tes, (1, n_voxels)) * mu.T # Model 2 + + F_T2_maps = np.zeros([n_voxels, n_components]) + F_S0_maps = np.zeros([n_voxels, n_components]) + + for i_comp in range(n_components): + # size of comp_betas is (n_echoes, n_samples) + comp_betas = np.atleast_3d(me_betas)[:, :, i_comp].T + alpha = (np.abs(comp_betas)**2).sum(axis=0) + + # Only analyze good echoes at each voxel + for j_echo in np.unique(adaptive_mask[adaptive_mask >= 3]): + mask_idx = adaptive_mask == j_echo + alpha = (np.abs(comp_betas[:j_echo])**2).sum(axis=0) + + # S0 Model + # (S,) model coefficient map + coeffs_S0 = (comp_betas[:j_echo] * X1[:j_echo, :]).sum(axis=0) /\ + (X1[:j_echo, :]**2).sum(axis=0) + pred_S0 = X1[:j_echo, :] * np.tile(coeffs_S0, (j_echo, 1)) + SSE_S0 = (comp_betas[:j_echo] - pred_S0)**2 + SSE_S0 = SSE_S0.sum(axis=0) # (S,) prediction error map + F_S0 = (alpha - SSE_S0) * (j_echo - 1) / (SSE_S0) + F_S0[F_S0 > f_max] = f_max + F_S0_maps[mask_idx, i_comp] = F_S0[mask_idx] + + # T2 Model + coeffs_T2 = (comp_betas[:j_echo] * X2[:j_echo, :]).sum(axis=0) /\ + (X2[:j_echo, :]**2).sum(axis=0) + pred_T2 = X2[:j_echo] * np.tile(coeffs_T2, (j_echo, 1)) + SSE_T2 = (comp_betas[:j_echo] - pred_T2)**2 + SSE_T2 = SSE_T2.sum(axis=0) + F_T2 = (alpha - SSE_T2) * (j_echo - 1) / (SSE_T2) + F_T2[F_T2 > f_max] = f_max + F_T2_maps[mask_idx, i_comp] = F_T2[mask_idx] + + return F_T2_maps, F_S0_maps + + +def threshold_map(maps, mask, ref_img, threshold, csize=None): + """ + Perform cluster-extent thresholding. + + Parameters + ---------- + maps : (S x C) array_like + Statistical maps to be thresholded. + mask : (S) array_like + Binary mask. + ref_img : img_like + Reference image to convert to niimgs with. + threshold : :obj:`float` + Value threshold to apply to maps. + csize : :obj:`int` or :obj:`None`, optional + Minimum cluster size. If None, standard thresholding (non-cluster-extent) will be done. + Default is None. + + Returns + ------- + maps_thresh + """ + n_voxels, n_components = maps.shape + maps_thresh = np.zeros([n_voxels, n_components], bool) + if csize is None: + csize = np.max([int(n_voxels * 0.0005) + 5, 20]) + else: + csize = int(csize) + + for i_comp in range(n_components): + # Cluster-extent threshold and binarize F-maps + ccimg = io.new_nii_like( + ref_img, + np.squeeze(utils.unmask(maps[:, i_comp], mask))) + + maps_thresh[:, i_comp] = utils.threshold_map( + ccimg, min_cluster_size=csize, threshold=threshold, mask=mask, + binarize=True) + return maps_thresh + + +def threshold_to_match(maps, n_sig_voxels, mask, ref_img, csize=None): + """ + Cluster-extent threshold a map to have roughly some requested number of + significant voxels (with clusters accounted for). + + Parameters + ---------- + maps : (S x C) array_like + Statistical maps to be thresholded. + n_sig_voxels : (C) array_like + Number of significant voxels to threshold to, for each map in maps. + mask : (S) array_like + Binary mask. + ref_img : img_like + Reference image to convert to niimgs with. + csize : :obj:`int` or :obj:`None`, optional + Minimum cluster size. If None, standard thresholding (non-cluster-extent) will be done. + Default is None. + + Returns + ------- + clmaps : (S x C) array_like + Cluster-extent thresholded and binarized maps. + """ + n_voxels, n_components = maps.shape + abs_maps = np.abs(maps) + if csize is None: + csize = np.max([int(n_voxels * 0.0005) + 5, 20]) + else: + csize = int(csize) + + clmaps = np.zeros([n_voxels, n_components], bool) + for i_comp in range(n_components): + # Initial cluster-defining threshold is defined based on the number + # of significant voxels from the F-statistic maps. This threshold + # will be relaxed until the number of significant voxels from both + # maps is roughly equal. + ccimg = io.new_nii_like( + ref_img, + utils.unmask(stats.rankdata(abs_maps[:, i_comp]), mask)) + step = int(n_sig_voxels[i_comp] / 10) + rank_thresh = n_voxels - n_sig_voxels[i_comp] + + while True: + clmap = utils.threshold_map( + ccimg, min_cluster_size=csize, + threshold=rank_thresh, mask=mask, + binarize=True) + if rank_thresh <= 0: # all voxels significant + break + + diff = n_sig_voxels[i_comp] - clmap.sum() + if diff < 0 or clmap.sum() == 0: + rank_thresh += step + clmap = utils.threshold_map( + ccimg, min_cluster_size=csize, + threshold=rank_thresh, mask=mask, + binarize=True) + break + else: + rank_thresh -= step + clmaps[:, i_comp] = clmap + return clmaps + + +def calculate_dependence_metrics(F_T2_maps, F_S0_maps, Z_maps): + """ + Calculate Kappa and Rho metrics from F-statistic maps. + Just a weighted average over voxels. + + Parameters + ---------- + F_T2_maps, F_S0_maps : (S x C) array_like + Pseudo-F-statistic maps for TE-dependence and -independence models, + respectively. + Z_maps : (S x C) array_like + Z-statistic maps for components, reflecting voxel-wise component loadings. + + Returns + ------- + kappas, rhos : (C) array_like + Averaged pseudo-F-statistics for TE-dependence and -independence + models, respectively. + """ + assert F_T2_maps.shape == F_S0_maps.shape == Z_maps.shape + RepLGR.info('Kappa (kappa) and Rho (rho) were calculated as measures of ' + 'TE-dependence and TE-independence, respectively.') + + weight_maps = Z_maps ** 2. + n_components = Z_maps.shape[1] + kappas, rhos = np.zeros(n_components), np.zeros(n_components) + for i_comp in range(n_components): + kappas[i_comp] = np.average(F_T2_maps[:, i_comp], weights=weight_maps[:, i_comp]) + rhos[i_comp] = np.average(F_S0_maps[:, i_comp], weights=weight_maps[:, i_comp]) + return kappas, rhos + + +def calculate_varex(optcom_betas): + """ + Calculate unnormalized(?) variance explained from unstandardized + parameter estimate maps. + + Parameters + ---------- + optcom_betas : (S x C) array_like + Component-wise, unstandardized parameter estimates from the regression + of the optimally combined data against component time series. + + Returns + ------- + varex : (C) array_like + Unnormalized variance explained for each component. + """ + compvar = (optcom_betas ** 2).sum(axis=0) + varex = 100 * (compvar / compvar.sum()) + return varex + + +def calculate_varex_norm(weights): + """ + Calculate normalized variance explained from standardized parameter + estimate maps. + + Parameters + ---------- + weights : (S x C) array_like + Standardized parameter estimate maps for components. + + Returns + ------- + varex_norm : (C) array_like + Normalized variance explained scaled from 0 to 1. + """ + compvar = (weights ** 2).sum(axis=0) + varex_norm = compvar / compvar.sum() + return varex_norm + + +def compute_dice(clmaps1, clmaps2, axis=0): + """ + Compute the Dice similarity index between two thresholded and binarized maps. + NaNs are converted automatically to zeroes. + + Parameters + ---------- + clmaps1, clmaps2 : (S x C) array_like + Thresholded and binarized arrays. + axis : int or None, optional + Axis along which to calculate DSI. Default is 0. + + Returns + ------- + dice_values : array_like + DSI values. + """ + assert clmaps1.shape == clmaps2.shape + dice_values = utils.dice(clmaps1, clmaps2, axis=axis) + dice_values = np.nan_to_num(dice_values, 0) + return dice_values + + +def compute_signal_minus_noise_z(Z_maps, Z_clmaps, F_T2_maps, z_thresh=1.95): + """ + Divide voxel-level thresholded F-statistic maps into distributions of + signal (voxels in significant clusters) and noise (voxels from + non-significant clusters) statistics, then compare these distributions + with a two-sample t-test. Convert the resulting t-statistics (per map) + to normally distributed z-statistics. + + Parameters + ---------- + Z_maps : (S x C) array_like + Z-statistic maps for components, reflecting voxel-wise component loadings. + Z_clmaps : (S x C) array_like + Cluster-extent thresholded Z-statistic maps for components. + F_T2_maps : (S x C) array_like + Pseudo-F-statistic maps for components from TE-dependence models. + Each voxel reflects the model fit for the component weights to the + TE-dependence model across echoes. + z_thresh : float, optional + Z-statistic threshold for voxel-wise significance. Default is 1.95. + + Returns + ------- + signal_minus_noise_z : (C) array_like + Z-statistics from component-wise signal > noise paired t-tests. + signal_minus_noise_p : (C) array_like + P-values from component-wise signal > noise paired t-tests. + """ + assert Z_maps.shape == Z_clmaps.shape == F_T2_maps.shape + n_components = Z_maps.shape[1] + signal_minus_noise_z = np.zeros(n_components) + signal_minus_noise_p = np.zeros(n_components) + noise_idx = (np.abs(Z_maps) > z_thresh) & (Z_clmaps == 0) + countnoise = noise_idx.sum(axis=0) + countsignal = Z_clmaps.sum(axis=0) + for i_comp in range(n_components): + noise_FT2_Z = 0.5 * np.log(F_T2_maps[noise_idx[:, i_comp], i_comp]) + signal_FT2_Z = 0.5 * np.log(F_T2_maps[Z_clmaps[:, i_comp] == 1, i_comp]) + n_noise_dupls = noise_FT2_Z.size - np.unique(noise_FT2_Z).size + if n_noise_dupls: + LGR.debug('For component {}, {} duplicate noise F-values ' + 'detected.'.format(i_comp, n_noise_dupls)) + n_signal_dupls = signal_FT2_Z.size - np.unique(signal_FT2_Z).size + if n_signal_dupls: + LGR.debug('For component {}, {} duplicate signal F-values ' + 'detected.'.format(i_comp, n_signal_dupls)) + dof = countnoise[i_comp] + countsignal[i_comp] - 2 + + t_value, signal_minus_noise_p[i_comp] = stats.ttest_ind( + signal_FT2_Z, noise_FT2_Z, equal_var=False) + signal_minus_noise_z[i_comp] = t_to_z(t_value, dof) + + signal_minus_noise_z = np.nan_to_num(signal_minus_noise_z, 0) + signal_minus_noise_p = np.nan_to_num(signal_minus_noise_p, 0) + return signal_minus_noise_z, signal_minus_noise_p + + +def compute_signal_minus_noise_t(Z_maps, Z_clmaps, F_T2_maps, z_thresh=1.95): + """ + Divide voxel-level thresholded F-statistic maps into distributions of + signal (voxels in significant clusters) and noise (voxels from + non-significant clusters) statistics, then compare these distributions + with a two-sample t-test. + + Parameters + ---------- + Z_maps : (S x C) array_like + Z-statistic maps for components, reflecting voxel-wise component loadings. + Z_clmaps : (S x C) array_like + Cluster-extent thresholded Z-statistic maps for components. + F_T2_maps : (S x C) array_like + Pseudo-F-statistic maps for components from TE-dependence models. + Each voxel reflects the model fit for the component weights to the + TE-dependence model across echoes. + z_thresh : float, optional + Z-statistic threshold for voxel-wise significance. Default is 1.95. + + Returns + ------- + signal_minus_noise_t : (C) array_like + T-statistics from component-wise signal > noise paired t-tests. + signal_minus_noise_p : (C) array_like + P-values from component-wise signal > noise paired t-tests. + """ + assert Z_maps.shape == Z_clmaps.shape == F_T2_maps.shape + n_components = Z_maps.shape[1] + signal_minus_noise_t = np.zeros(n_components) + signal_minus_noise_p = np.zeros(n_components) + noise_idx = (np.abs(Z_maps) > z_thresh) & (Z_clmaps == 0) + for i_comp in range(n_components): + # NOTE: Why only compare distributions of *unique* F-statistics? + noise_FT2_Z = np.log10(np.unique(F_T2_maps[noise_idx[:, i_comp], i_comp])) + signal_FT2_Z = np.log10(np.unique(F_T2_maps[Z_clmaps[:, i_comp] == 1, i_comp])) + (signal_minus_noise_t[i_comp], + signal_minus_noise_p[i_comp]) = stats.ttest_ind( + signal_FT2_Z, noise_FT2_Z, equal_var=False) + + signal_minus_noise_t = np.nan_to_num(signal_minus_noise_t, 0) + signal_minus_noise_p = np.nan_to_num(signal_minus_noise_p, 0) + return signal_minus_noise_t, signal_minus_noise_p + + +def compute_countsignal(stat_cl_maps): + """ + Count the number of significant voxels, per map, in a set of cluster-extent + thresholded maps. + + Parameters + ---------- + stat_cl_maps : (S x C) array_like + Statistical map after cluster-extent thresholding and binarization. + + Returns + ------- + countsignal : (C) array_like + Number of significant (non-zero) voxels for each map in cl_arr. + """ + countsignal = stat_cl_maps.sum(axis=0) + return countsignal + + +def compute_countnoise(stat_maps, stat_cl_maps, stat_thresh=1.95): + """ + Count the number of significant voxels (after application of + cluster-defining threshold) from non-significant clusters (after + cluster-extent thresholding). + + Parameters + ---------- + stat_maps : (S x C) array_like + Unthresholded statistical maps. + stat_cl_maps : (S x C) array_like + Cluster-extent thresholded and binarized version of stat_maps. + stat_thresh : float, optional + Statistical threshold. Default is 1.95 (Z-statistic threshold + corresponding to p stat_thresh) & (stat_cl_maps == 0) + countnoise = noise_idx.sum(axis=0) + return countnoise + + +def generate_decision_table_score(kappa, dice_FT2, signal_minus_noise_t, + countnoise, countsigFT2): + """ + Generate a five-metric decision table. Metrics are ranked in either + descending or ascending order if they measure TE-dependence or + -independence, respectively, and are then averaged for each component. + + Parameters + ---------- + kappa : (C) array_like + Pseudo-F-statistics for TE-dependence model. + dice_FT2 : (C) array_like + Dice similarity index for cluster-extent thresholded beta maps and + cluster-extent thresholded TE-dependence F-statistic maps. + signal_minus_noise_t : (C) array_like + Signal-noise t-statistic metrics. + countnoise : (C) array_like + Numbers of significant non-cluster voxels from the thresholded beta + maps. + countsigFT2 : (C) array_like + Numbers of significant voxels from clusters from the thresholded + TE-dependence F-statistic maps. + + Returns + ------- + d_table_score : (C) array_like + Decision table metric scores. + """ + d_table_rank = np.vstack([ + len(kappa) - stats.rankdata(kappa), + len(kappa) - stats.rankdata(dice_FT2), + len(kappa) - stats.rankdata(signal_minus_noise_t), + stats.rankdata(countnoise), + len(kappa) - stats.rankdata(countsigFT2)]).T + d_table_score = d_table_rank.mean(axis=1) + return d_table_score diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py deleted file mode 100644 index d3ca732d9..000000000 --- a/tedana/metrics/kundu_fit.py +++ /dev/null @@ -1,417 +0,0 @@ -""" -Fit models. -""" -import logging -import os.path as op - -import numpy as np -import pandas as pd -from scipy import stats - -from tedana import io, utils -from tedana.stats import getfbounds, computefeats2, get_coeffs - - -LGR = logging.getLogger(__name__) -RepLGR = logging.getLogger('REPORT') -RefLGR = logging.getLogger('REFERENCES') - -F_MAX = 500 -Z_MAX = 8 - - -def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, - reindex=False, mmixN=None, algorithm=None, label=None, - out_dir='.', verbose=False): - """ - Fit TE-dependence and -independence models to components. - - Parameters - ---------- - catd : (S x E x T) array_like - Input data, where `S` is samples, `E` is echos, and `T` is time - tsoc : (S x T) array_like - Optimally combined data - mmix : (T x C) array_like - Mixing matrix for converting input data to component space, where `C` - is components and `T` is the same as in `catd` - t2s : (S [x T]) array_like - Limited T2* map or timeseries. - tes : list - List of echo times associated with `catd`, in milliseconds - ref_img : str or img_like - Reference image to dictate how outputs are saved to disk - reindex : bool, optional - Whether to sort components in descending order by Kappa. Default: False - mmixN : (T x C) array_like, optional - Z-scored mixing matrix. Default: None - algorithm : {'kundu_v2', 'kundu_v3', None}, optional - Decision tree to be applied to metrics. Determines which maps will be - generated and stored in seldict. Default: None - label : :obj:`str` or None, optional - Prefix to apply to generated files. Default is None. - out_dir : :obj:`str`, optional - Output directory for generated files. Default is current working - directory. - verbose : :obj:`bool`, optional - Whether or not to generate additional files. Default is False. - - Returns - ------- - comptable : (C x X) :obj:`pandas.DataFrame` - Component metric table. One row for each component, with a column for - each metric. The index is the component number. - seldict : :obj:`dict` or None - Dictionary containing component-specific metric maps to be used for - component selection. If `algorithm` is None, then seldict will be None as - well. - betas : :obj:`numpy.ndarray` - mmix_new : :obj:`numpy.ndarray` - """ - # Use t2s as mask - mask = t2s != 0 - if not (catd.shape[0] == t2s.shape[0] == mask.shape[0] == tsoc.shape[0]): - raise ValueError('First dimensions (number of samples) of catd ({0}), ' - 'tsoc ({1}), and t2s ({2}) do not ' - 'match'.format(catd.shape[0], tsoc.shape[0], - t2s.shape[0])) - elif catd.shape[1] != len(tes): - raise ValueError('Second dimension of catd ({0}) does not match ' - 'number of echoes provided (tes; ' - '{1})'.format(catd.shape[1], len(tes))) - elif not (catd.shape[2] == tsoc.shape[1] == mmix.shape[0]): - raise ValueError('Number of volumes in catd ({0}), ' - 'tsoc ({1}), and mmix ({2}) do not ' - 'match.'.format(catd.shape[2], tsoc.shape[1], mmix.shape[0])) - elif t2s.ndim == 2: - if catd.shape[2] != t2s.shape[1]: - raise ValueError('Number of volumes in catd ' - '({0}) does not match number of volumes in ' - 't2s ({1})'.format(catd.shape[2], t2s.shape[1])) - - RepLGR.info("A series of TE-dependence metrics were calculated for " - "each ICA component, including Kappa, Rho, and variance " - "explained.") - - # mask everything we can - tsoc = tsoc[mask, :] - catd = catd[mask, ...] - t2s = t2s[mask] - - # demean optimal combination - tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) - - # compute un-normalized weight dataset (features) - if mmixN is None: - mmixN = mmix - WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False) - - # compute PSC dataset - shouldn't have to refit data - tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None) - del tsoc_dm - tsoc_Babs = np.abs(tsoc_B) - PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 - - # compute skews to determine signs based on unnormalized weights, - # correct mmix & WTS signs based on spatial distribution tails - signs = stats.skew(WTS, axis=0) - signs /= np.abs(signs) - mmix = mmix.copy() - mmix *= signs - WTS *= signs - PSC *= signs - totvar = (tsoc_B**2).sum() - totvar_norm = (WTS**2).sum() - - # compute Betas and means over TEs for TE-dependence analysis - betas = get_coeffs(utils.unmask(catd, mask), - mmix, - np.repeat(mask[:, np.newaxis], len(tes), axis=1)) - betas = betas[mask, ...] - n_voxels, n_echos, n_components = betas.shape - mu = catd.mean(axis=-1, dtype=float) - tes = np.reshape(tes, (n_echos, 1)) - fmin, _, _ = getfbounds(n_echos) - - # set up Xmats - X1 = mu.T # Model 1 - X2 = np.tile(tes, (1, n_voxels)) * mu.T / t2s.T # Model 2 - - # tables for component selection - kappas = np.zeros([n_components]) - rhos = np.zeros([n_components]) - varex = np.zeros([n_components]) - varex_norm = np.zeros([n_components]) - Z_maps = np.zeros([n_voxels, n_components]) - F_R2_maps = np.zeros([n_voxels, n_components]) - F_S0_maps = np.zeros([n_voxels, n_components]) - pred_R2_maps = np.zeros([n_voxels, n_echos, n_components]) - pred_S0_maps = np.zeros([n_voxels, n_echos, n_components]) - - LGR.info('Fitting TE- and S0-dependent models to components') - for i_comp in range(n_components): - # size of comp_betas is (n_echoes, n_samples) - comp_betas = np.atleast_3d(betas)[:, :, i_comp].T - alpha = (np.abs(comp_betas)**2).sum(axis=0) - varex[i_comp] = (tsoc_B[:, i_comp]**2).sum() / totvar * 100. - varex_norm[i_comp] = (WTS[:, i_comp]**2).sum() / totvar_norm - - # S0 Model - # (S,) model coefficient map - coeffs_S0 = (comp_betas * X1).sum(axis=0) / (X1**2).sum(axis=0) - pred_S0 = X1 * np.tile(coeffs_S0, (n_echos, 1)) - pred_S0_maps[:, :, i_comp] = pred_S0.T - SSE_S0 = (comp_betas - pred_S0)**2 - SSE_S0 = SSE_S0.sum(axis=0) # (S,) prediction error map - F_S0 = (alpha - SSE_S0) * (n_echos - 1) / (SSE_S0) - F_S0_maps[:, i_comp] = F_S0 - - # R2 Model - coeffs_R2 = (comp_betas * X2).sum(axis=0) / (X2**2).sum(axis=0) - pred_R2 = X2 * np.tile(coeffs_R2, (n_echos, 1)) - pred_R2_maps[:, :, i_comp] = pred_R2.T - SSE_R2 = (comp_betas - pred_R2)**2 - SSE_R2 = SSE_R2.sum(axis=0) - F_R2 = (alpha - SSE_R2) * (n_echos - 1) / (SSE_R2) - F_R2_maps[:, i_comp] = F_R2 - - # compute weights as Z-values - wtsZ = (WTS[:, i_comp] - WTS[:, i_comp].mean()) / WTS[:, i_comp].std() - wtsZ[np.abs(wtsZ) > Z_MAX] = (Z_MAX * (np.abs(wtsZ) / wtsZ))[ - np.abs(wtsZ) > Z_MAX] - Z_maps[:, i_comp] = wtsZ - - # compute Kappa and Rho - F_S0[F_S0 > F_MAX] = F_MAX - F_R2[F_R2 > F_MAX] = F_MAX - norm_weights = np.abs(wtsZ ** 2.) - kappas[i_comp] = np.average(F_R2, weights=norm_weights) - rhos[i_comp] = np.average(F_S0, weights=norm_weights) - del SSE_S0, SSE_R2, wtsZ, F_S0, F_R2, norm_weights, comp_betas - if algorithm != 'kundu_v3': - del WTS, PSC, tsoc_B - - # tabulate component values - comptable = np.vstack([kappas, rhos, varex, varex_norm]).T - if reindex: - # re-index all components in descending Kappa order - sort_idx = comptable[:, 0].argsort()[::-1] - comptable = comptable[sort_idx, :] - mmix_new = mmix[:, sort_idx] - betas = betas[..., sort_idx] - pred_R2_maps = pred_R2_maps[:, :, sort_idx] - pred_S0_maps = pred_S0_maps[:, :, sort_idx] - F_R2_maps = F_R2_maps[:, sort_idx] - F_S0_maps = F_S0_maps[:, sort_idx] - Z_maps = Z_maps[:, sort_idx] - tsoc_Babs = tsoc_Babs[:, sort_idx] - if algorithm == 'kundu_v3': - WTS = WTS[:, sort_idx] - PSC = PSC[:, sort_idx] - tsoc_B = tsoc_B[:, sort_idx] - else: - mmix_new = mmix - del mmix - - if verbose: - # Echo-specific weight maps for each of the ICA components. - io.filewrite(utils.unmask(betas, mask), - op.join(out_dir, '{0}betas_catd.nii'.format(label)), - ref_img) - - # Echo-specific maps of predicted values for R2 and S0 models for each - # component. - io.filewrite(utils.unmask(pred_R2_maps, mask), - op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img) - io.filewrite(utils.unmask(pred_S0_maps, mask), - op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img) - # Weight maps used to average metrics across voxels - io.filewrite(utils.unmask(Z_maps ** 2., mask), - op.join(out_dir, '{0}metric_weights.nii'.format(label)), - ref_img) - del pred_R2_maps, pred_S0_maps - - comptable = pd.DataFrame(comptable, - columns=['kappa', 'rho', - 'variance explained', - 'normalized variance explained']) - comptable.index.name = 'component' - - # Generate clustering criteria for component selection - if algorithm in ['kundu_v2', 'kundu_v3']: - Z_clmaps = np.zeros([n_voxels, n_components], bool) - F_R2_clmaps = np.zeros([n_voxels, n_components], bool) - F_S0_clmaps = np.zeros([n_voxels, n_components], bool) - Br_R2_clmaps = np.zeros([n_voxels, n_components], bool) - Br_S0_clmaps = np.zeros([n_voxels, n_components], bool) - - LGR.info('Performing spatial clustering of components') - csize = np.max([int(n_voxels * 0.0005) + 5, 20]) - LGR.debug('Using minimum cluster size: {}'.format(csize)) - for i_comp in range(n_components): - # Cluster-extent threshold and binarize F-maps - ccimg = io.new_nii_like( - ref_img, - np.squeeze(utils.unmask(F_R2_maps[:, i_comp], mask))) - F_R2_clmaps[:, i_comp] = utils.threshold_map( - ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, - binarize=True) - countsigFR2 = F_R2_clmaps[:, i_comp].sum() - - ccimg = io.new_nii_like( - ref_img, - np.squeeze(utils.unmask(F_S0_maps[:, i_comp], mask))) - F_S0_clmaps[:, i_comp] = utils.threshold_map( - ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, - binarize=True) - countsigFS0 = F_S0_clmaps[:, i_comp].sum() - - # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05 - ccimg = io.new_nii_like( - ref_img, - np.squeeze(utils.unmask(Z_maps[:, i_comp], mask))) - Z_clmaps[:, i_comp] = utils.threshold_map( - ccimg, min_cluster_size=csize, threshold=1.95, mask=mask, - binarize=True) - - # Cluster-extent threshold and binarize ranked signal-change map - ccimg = io.new_nii_like( - ref_img, - utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), mask)) - Br_R2_clmaps[:, i_comp] = utils.threshold_map( - ccimg, min_cluster_size=csize, - threshold=(max(tsoc_Babs.shape) - countsigFR2), mask=mask, - binarize=True) - Br_S0_clmaps[:, i_comp] = utils.threshold_map( - ccimg, min_cluster_size=csize, - threshold=(max(tsoc_Babs.shape) - countsigFS0), mask=mask, - binarize=True) - del ccimg, tsoc_Babs - - if algorithm == 'kundu_v2': - # WTS, tsoc_B, PSC, and F_S0_maps are not used by Kundu v2.5 - selvars = ['Z_maps', 'F_R2_maps', - 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', - 'Br_R2_clmaps', 'Br_S0_clmaps'] - elif algorithm == 'kundu_v3': - selvars = ['WTS', 'tsoc_B', 'PSC', - 'Z_maps', 'F_R2_maps', 'F_S0_maps', - 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', - 'Br_R2_clmaps', 'Br_S0_clmaps'] - elif algorithm is None: - selvars = [] - else: - raise ValueError('Algorithm "{0}" not recognized.'.format(algorithm)) - - seldict = {} - for vv in selvars: - seldict[vv] = eval(vv) - else: - seldict = None - - return comptable, seldict, betas, mmix_new - - -def kundu_metrics(comptable, metric_maps): - """ - Compute metrics used by Kundu v2.5 and v3.2 decision trees. - - Parameters - ---------- - comptable : (C x M) :obj:`pandas.DataFrame` - Component metric table, where `C` is components and `M` is metrics - metric_maps : :obj:`dict` - A dictionary with component-specific feature maps used for - classification. The value for each key is a (S x C) array, where `S` is - voxels and `C` is components. Generated by `dependence_metrics` - - Returns - ------- - comptable : (C x M) :obj:`pandas.DataFrame` - Component metrics to be used for component selection, with new metrics - added. - """ - Z_maps = metric_maps['Z_maps'] - Z_clmaps = metric_maps['Z_clmaps'] - F_R2_maps = metric_maps['F_R2_maps'] - F_S0_clmaps = metric_maps['F_S0_clmaps'] - F_R2_clmaps = metric_maps['F_R2_clmaps'] - Br_S0_clmaps = metric_maps['Br_S0_clmaps'] - Br_R2_clmaps = metric_maps['Br_R2_clmaps'] - - """ - Tally number of significant voxels for cluster-extent thresholded R2 and S0 - model F-statistic maps. - """ - comptable['countsigFR2'] = F_R2_clmaps.sum(axis=0) - comptable['countsigFS0'] = F_S0_clmaps.sum(axis=0) - - """ - Generate Dice values for R2 and S0 models - - dice_FR2: Dice value of cluster-extent thresholded maps of R2-model betas - and F-statistics. - - dice_FS0: Dice value of cluster-extent thresholded maps of S0-model betas - and F-statistics. - """ - comptable['dice_FR2'] = np.zeros(comptable.shape[0]) - comptable['dice_FS0'] = np.zeros(comptable.shape[0]) - for i_comp in comptable.index: - comptable.loc[i_comp, 'dice_FR2'] = utils.dice(Br_R2_clmaps[:, i_comp], - F_R2_clmaps[:, i_comp]) - comptable.loc[i_comp, 'dice_FS0'] = utils.dice(Br_S0_clmaps[:, i_comp], - F_S0_clmaps[:, i_comp]) - - comptable.loc[np.isnan(comptable['dice_FR2']), 'dice_FR2'] = 0 - comptable.loc[np.isnan(comptable['dice_FS0']), 'dice_FS0'] = 0 - - """ - Generate three metrics of component noise: - - countnoise: Number of "noise" voxels (voxels highly weighted for - component, but not from clusters) - - signal-noise_t: T-statistic for two-sample t-test of F-statistics from - "signal" voxels (voxels in clusters) against "noise" voxels (voxels not - in clusters) for R2 model. - - signal-noise_p: P-value from t-test. - """ - comptable['countnoise'] = 0 - comptable['signal-noise_t'] = 0 - comptable['signal-noise_p'] = 0 - for i_comp in comptable.index: - # index voxels significantly loading on component but not from clusters - comp_noise_sel = ((np.abs(Z_maps[:, i_comp]) > 1.95) & - (Z_clmaps[:, i_comp] == 0)) - comptable.loc[i_comp, 'countnoise'] = np.array( - comp_noise_sel, dtype=np.int).sum() - # NOTE: Why only compare distributions of *unique* F-statistics? - noise_FR2_Z = np.log10(np.unique(F_R2_maps[comp_noise_sel, i_comp])) - signal_FR2_Z = np.log10(np.unique( - F_R2_maps[Z_clmaps[:, i_comp] == 1, i_comp])) - (comptable.loc[i_comp, 'signal-noise_t'], - comptable.loc[i_comp, 'signal-noise_p']) = stats.ttest_ind( - signal_FR2_Z, noise_FR2_Z, equal_var=False) - - comptable.loc[np.isnan(comptable['signal-noise_t']), 'signal-noise_t'] = 0 - comptable.loc[np.isnan(comptable['signal-noise_p']), 'signal-noise_p'] = 0 - - """ - Assemble decision table with five metrics: - - Kappa values ranked from largest to smallest - - R2-model F-score map/beta map Dice scores ranked from largest to smallest - - Signal F > Noise F t-statistics ranked from largest to smallest - - Number of "noise" voxels (voxels highly weighted for component, but not - from clusters) ranked from smallest to largest - - Number of voxels with significant R2-model F-scores within clusters - ranked from largest to smallest - - Smaller values (i.e., higher ranks) across metrics indicate more BOLD - dependence and less noise. - """ - d_table_rank = np.vstack([ - comptable.shape[0] - stats.rankdata(comptable['kappa']), - comptable.shape[0] - stats.rankdata(comptable['dice_FR2']), - comptable.shape[0] - stats.rankdata(comptable['signal-noise_t']), - stats.rankdata(comptable['countnoise']), - comptable.shape[0] - stats.rankdata(comptable['countsigFR2'])]).T - comptable['d_table_score'] = d_table_rank.mean(axis=1) - - return comptable diff --git a/tedana/reporting/__init__.py b/tedana/reporting/__init__.py new file mode 100644 index 000000000..d8c855a14 --- /dev/null +++ b/tedana/reporting/__init__.py @@ -0,0 +1,8 @@ +""" +Reporting code for tedana +""" + +from .html_report import generate_report +from .static_figures import comp_figures + +__all__ = ['generate_report', 'comp_figures'] diff --git a/tedana/reporting/data/README.md b/tedana/reporting/data/README.md new file mode 100644 index 000000000..8d5d5ba52 --- /dev/null +++ b/tedana/reporting/data/README.md @@ -0,0 +1,3 @@ +This directory contains data required for tedana reporting. + +html/ : HTML templates \ No newline at end of file diff --git a/tedana/reporting/data/html/__init__.py b/tedana/reporting/data/html/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tedana/reporting/data/html/report_body_template.html b/tedana/reporting/data/html/report_body_template.html new file mode 100644 index 000000000..1066b530e --- /dev/null +++ b/tedana/reporting/data/html/report_body_template.html @@ -0,0 +1,25 @@ + +
+ $content +
+
+ $about +
+ +$javascript \ No newline at end of file diff --git a/tedana/reporting/data/html/report_head_template.html b/tedana/reporting/data/html/report_head_template.html new file mode 100644 index 000000000..2df19b5d0 --- /dev/null +++ b/tedana/reporting/data/html/report_head_template.html @@ -0,0 +1,38 @@ + + + + +tedana report + + + + + + + + + + + + + + $body + + \ No newline at end of file diff --git a/tedana/reporting/dynamic_figures.py b/tedana/reporting/dynamic_figures.py new file mode 100644 index 000000000..6e5b749b8 --- /dev/null +++ b/tedana/reporting/dynamic_figures.py @@ -0,0 +1,284 @@ +import numpy as np +from math import pi +import pandas as pd +from sklearn.preprocessing import MinMaxScaler +from bokeh import (events, models, plotting, transform) + +color_mapping = {'accepted': '#2ecc71', + 'rejected': '#e74c3c', + 'ignored': '#3498db'} + +tap_callback_jscode = """ + // Accessing the selected component ID + var data = source_comp_table.data; + var selected_idx = source_comp_table.selected.indices; + if(selected_idx > 0) { + // A component has been selected + // ----------------------------- + var components = data['component'] + var selected = components[selected_idx] + var selected_padded = '' + selected; + while (selected_padded.length < 2) { + selected_padded = '0' + selected_padded; + } + var selected_padded_forIMG = '0' + selected_padded + var selected_padded_C = 'ica_' + selected_padded + + // Find color for selected component + var colors = data['color'] + var this_component_color = colors[selected_idx] + + // Image Below Plots + div.text = "" + var line = "Component Map\\n"; + console.log('Linea: ' + line) + var text = div.text.concat(line); + var lines = text.split("\\n") + if (lines.length > 35) + lines.shift(); + div.text = lines.join("\\n"); + + } else { + // No component has been selected + // ------------------------------ + // Set Component color to Black + var this_component_color = '#000000' + + // Image Below Plots + div.text = "" + var line = "

Please select an individual component to view it in more detail

\\n" + var text = div.text.concat(line); + + } + """ + + +def _create_data_struct(comptable_path, color_mapping=color_mapping): + """ + Create Bokeh ColumnDataSource with all info dynamic plots need + + Parameters + ---------- + comptable: str + file path to component table, JSON format + + Returns + ------- + cds: bokeh.models.ColumnDataSource + Data structure with all the fields to plot or hover over + """ + unused_cols = ['normalized variance explained', + 'countsigFR2', 'countsigFS0', + 'dice_FS0', 'countnoise', 'dice_FR2', + 'signal-noise_t', 'signal-noise_p', + 'd_table_score', 'kappa ratio', + 'rationale', 'd_table_score_scrub'] + + df = pd.read_json(comptable_path) + df.drop('Description', axis=0, inplace=True) + df.drop('Method', axis=1, inplace=True) + df = df.T + n_comps = df.shape[0] + + # remove space from column name + df.rename(columns={'variance explained': 'var_exp'}, inplace=True) + + # For providing sizes based on Var Explained that are visible + mm_scaler = MinMaxScaler(feature_range=(4, 20)) + df['var_exp_size'] = mm_scaler.fit_transform( + df[['var_exp', 'normalized variance explained']])[:, 0] + + # Calculate Kappa and Rho ranks + df['rho_rank'] = df['rho'].rank(ascending=False).values + df['kappa_rank'] = df['kappa'].rank(ascending=False).values + df['var_exp_rank'] = df['var_exp'].rank(ascending=False).values + + # Remove unused columns to decrease size of final HTML + # set errors to 'ignore' in case some columns do not exist in + # a given data frame + df.drop(unused_cols, axis=1, inplace=True, errors='ignore') + + # Create additional Column with colors based on final classification + df['color'] = [color_mapping[i] for i in df['classification']] + + # Create additional column with component ID + df['component'] = np.arange(n_comps) + + # Compute angle and re-sort data for Pie plots + df['angle'] = df['var_exp'] / df['var_exp'].sum() * 2 * pi + df.sort_values(by=['classification', 'var_exp'], inplace=True) + + cds = models.ColumnDataSource(data=dict( + kappa=df['kappa'], + rho=df['rho'], + varexp=df['var_exp'], + kappa_rank=df['kappa_rank'], + rho_rank=df['rho_rank'], + varexp_rank=df['var_exp_rank'], + component=[str(i) for i in df['component']], + color=df['color'], + size=df['var_exp_size'], + classif=df['classification'], + angle=df['angle'])) + + return cds + + +def _create_kr_plt(comptable_cds): + """ + Create Dymamic Kappa/Rho Scatter Plot + + Parameters + ---------- + comptable_cds: bokeh.models.ColumnDataSource + Data structure containing a limited set of columns from the comp_table + + Returns + ------- + fig: bokeh.plotting.figure.Figure + Bokeh scatter plot of kappa vs. rho + """ + # Create Panel for the Kappa - Rho Scatter + kr_hovertool = models.HoverTool(tooltips=[('Component ID', '@component'), + ('Kappa', '@kappa{0.00}'), + ('Rho', '@rho{0.00}'), + ('Var. Expl.', '@varexp{0.00}%')]) + fig = plotting.figure(plot_width=400, plot_height=400, + tools=["tap,wheel_zoom,reset,pan,crosshair,save", kr_hovertool], + title="Kappa / Rho Plot") + fig.circle('kappa', 'rho', size='size', color='color', alpha=0.5, source=comptable_cds, + legend_group='classif') + fig.xaxis.axis_label = 'Kappa' + fig.yaxis.axis_label = 'Rho' + fig.toolbar.logo = None + fig.legend.background_fill_alpha = 0.5 + fig.legend.orientation = 'horizontal' + fig.legend.location = 'bottom_right' + return fig + + +def _create_sorted_plt(comptable_cds, n_comps, x_var, y_var, title=None, + x_label=None, y_label=None): + """ + Create dynamic sorted plots + + Parameters + ---------- + comptable_ds: bokeh.models.ColumnDataSource + Data structure containing a limited set of columns from the comp_table + + x_var: str + Name of variable for the x-axis + + y_var: str + Name of variable for the y-axis + + title: str + Plot title + + x_label: str + X-axis label + + y_label: str + Y-axis label + + Returns + ------- + fig: bokeh.plotting.figure.Figure + Bokeh plot of components ranked by a given feature + """ + hovertool = models.HoverTool(tooltips=[('Component ID', '@component'), + ('Kappa', '@kappa{0.00}'), + ('Rho', '@rho{0.00}'), + ('Var. Expl.', '@varexp{0.00}%')]) + fig = plotting.figure(plot_width=400, plot_height=400, + tools=["tap,wheel_zoom,reset,pan,crosshair,save", hovertool], + title=title) + fig.line(x=np.arange(1, n_comps + 1), + y=comptable_cds.data[y_var].sort_values(ascending=False).values, + color='black') + fig.circle(x_var, y_var, source=comptable_cds, + size=5, color='color', alpha=0.7) + fig.xaxis.axis_label = x_label + fig.yaxis.axis_label = y_label + fig.x_range = models.Range1d(-1, n_comps + 1) + fig.toolbar.logo = None + + return fig + + +def _create_varexp_pie_plt(comptable_cds, n_comps): + fig = plotting.figure(plot_width=400, plot_height=400, title='Variance Explained View', + tools=['hover,tap,save'], + tooltips=[('Component ID', ' @component'), + ('Kappa', '@kappa{0.00}'), + ('Rho', '@rho{0.00}'), + ('Var. Exp.', '@varexp{0.00}%')]) + fig.wedge(x=0, y=1, radius=.9, + start_angle=transform.cumsum('angle', include_zero=True), + end_angle=transform.cumsum('angle'), + line_color="white", + fill_color='color', source=comptable_cds, fill_alpha=0.7) + fig.axis.visible = False + fig.grid.visible = False + fig.toolbar.logo = None + + circle = models.Circle(x=0, y=1, size=150, fill_color='white', line_color='white') + fig.add_glyph(circle) + + return fig + + +def _tap_callback(comptable_cds, div_content, out_dir): + """ + Javacript function to animate tap events and show component info on the right + + Parameters + ---------- + CDS: bokeh.models.ColumnDataSource + Data structure containing a limited set of columns from the comp_table + div: bokeh.models.Div + Target Div element where component images will be loaded + + Returns + ------- + CustomJS: bokeh.models.CustomJS + Javascript function that adds the tapping functionality + """ + return models.CustomJS(args=dict(source_comp_table=comptable_cds, + div=div_content, + outdir=out_dir), code=tap_callback_jscode) + + +def _link_figures(fig, comptable_ds, div_content, out_dir): + """ + Links figures and adds interaction on mouse-click. + + Parameters + ---------- + fig : bokeh.plotting.figure + Figure containing a given plot + + comptable_ds : bokeh.models.ColumnDataSource + Data structure with a limited version of the comptable + suitable for dynamic plot purposes + + div_content : bokeh.models.Div + Div element for additional HTML content. + + out_dir : str + Output directory of tedana results. + + Returns + ------- + fig : bokeh.plotting.figure + Same as input figure, but with a linked method to + its Tap event. + + """ + fig.js_on_event(events.Tap, + _tap_callback(comptable_ds, + div_content, + out_dir)) + return fig diff --git a/tedana/reporting/html_report.py b/tedana/reporting/html_report.py new file mode 100644 index 000000000..79f4e2b3b --- /dev/null +++ b/tedana/reporting/html_report.py @@ -0,0 +1,119 @@ +import pandas as pd +from bokeh import (embed, layouts, models) +from pathlib import Path +from os.path import join as opj +from string import Template +from tedana.info import __version__ +from tedana.reporting import dynamic_figures as df + + +def _update_template_bokeh(bokeh_id, about, bokeh_js): + """ + Populate a report with content. + + Parameters + ---------- + bokeh_id : str + HTML div created by bokeh.embed.components + about : str + Reporting information for a given run + bokeh_js : str + Javascript created by bokeh.embed.components + Returns + ------- + HTMLReport : an instance of a populated HTML report + """ + resource_path = Path(__file__).resolve().parent.joinpath('data', 'html') + + body_template_name = 'report_body_template.html' + body_template_path = resource_path.joinpath(body_template_name) + with open(str(body_template_path), 'r') as body_file: + body_tpl = Template(body_file.read()) + body = body_tpl.substitute(content=bokeh_id, + about=about, + javascript=bokeh_js) + return body + + +def _save_as_html(body): + """ + Save an HTML report out to a file. + + Parameters + ---------- + body : str + Body for HTML report with embedded figures + """ + resource_path = Path(__file__).resolve().parent.joinpath('data', 'html') + head_template_name = 'report_head_template.html' + head_template_path = resource_path.joinpath(head_template_name) + with open(str(head_template_path), 'r') as head_file: + head_tpl = Template(head_file.read()) + + html = head_tpl.substitute(version=__version__, body=body) + return html + + +def generate_report(out_dir, tr): + """ + Parameters + ---------- + out_dir : str + File path to a completed tedana output directory + tr : float + The repetition time (TR) for the collected multi-echo + sequence + + Returns + ------- + HTML : file + A generated HTML report + """ + # Load the component time series + comp_ts_path = opj(out_dir, 'ica_mixing.tsv') + comp_ts_df = pd.read_csv(comp_ts_path, sep='\t', encoding='utf=8') + n_vols, n_comps = comp_ts_df.shape + + # Load the component table + comptable_path = opj(out_dir, 'ica_decomposition.json') + comptable_cds = df._create_data_struct(comptable_path) + + # Create kappa rho plot + kappa_rho_plot = df._create_kr_plt(comptable_cds) + + # Create sorted plots + kappa_sorted_plot = df._create_sorted_plt(comptable_cds, n_comps, + 'kappa_rank', 'kappa', + 'Kappa Rank', 'Kappa') + rho_sorted_plot = df._create_sorted_plt(comptable_cds, n_comps, + 'rho_rank', 'rho', + 'Rho Rank', 'Rho') + varexp_pie_plot = df._create_varexp_pie_plt(comptable_cds, n_comps) + + # link all dynamic figures + figs = [kappa_rho_plot, kappa_sorted_plot, + rho_sorted_plot, varexp_pie_plot] + + div_content = models.Div(width=500, height=750, height_policy='fixed') + + for fig in figs: + df._link_figures(fig, comptable_cds, div_content, out_dir=out_dir) + + # Create a layout + app = layouts.gridplot([[ + layouts.column(layouts.row(kappa_rho_plot, varexp_pie_plot), + layouts.row(rho_sorted_plot, kappa_sorted_plot)), + layouts.column(div_content)]], + toolbar_location='left') + + # Embed for reporting and save out HTML + kr_script, kr_div = embed.components(app) + + # Read in relevant methods + with open(opj(out_dir, 'report.txt'), 'r+') as f: + about = f.read() + + body = _update_template_bokeh(kr_div, about, kr_script) + html = _save_as_html(body) + with open(opj(out_dir, 'tedana_report.html'), 'wb') as f: + f.write(html.encode('utf-8')) diff --git a/tedana/viz.py b/tedana/reporting/static_figures.py similarity index 52% rename from tedana/viz.py rename to tedana/reporting/static_figures.py index 92c664872..fb6a0e44b 100644 --- a/tedana/viz.py +++ b/tedana/reporting/static_figures.py @@ -9,7 +9,7 @@ matplotlib.use('AGG') import matplotlib.pyplot as plt -from tedana import metrics +from tedana import stats from tedana.utils import get_spectrum LGR = logging.getLogger(__name__) @@ -19,7 +19,7 @@ RefLGR = logging.getLogger('REFERENCES') -def trim_edge_zeros(arr): +def _trim_edge_zeros(arr): """ Trims away the zero-filled slices that surround many 3/4D arrays @@ -43,8 +43,7 @@ def trim_edge_zeros(arr): return arr[bounding_box] -def write_comp_figs(ts, mask, comptable, mmix, ref_img, out_dir, - png_cmap): +def comp_figures(ts, mask, comptable, mmix, ref_img, out_dir, png_cmap): """ Creates static figures that highlight certain aspects of tedana processing This includes a figure for each component showing the component time course, @@ -66,23 +65,16 @@ def write_comp_figs(ts, mask, comptable, mmix, ref_img, out_dir, Reference image to dictate how outputs are saved to disk out_dir : :obj:`str` Figures folder within output directory - png_cmap : :obj:`str` - The name of a matplotlib colormap to use when making figures. Optional. - Default colormap is 'coolwarm' """ # Get the lenght of the timeseries n_vols = len(mmix) - # Check that colormap provided exists - if png_cmap not in plt.colormaps(): - LGR.warning('Provided colormap is not recognized, proceeding with default') - png_cmap = 'coolwarm' # regenerate the beta images - ts_B = metrics.get_coeffs(ts, mmix, mask) + ts_B = stats.get_coeffs(ts, mmix, mask) ts_B = ts_B.reshape(ref_img.shape[:3] + ts_B.shape[1:]) # trim edges from ts_B array - ts_B = trim_edge_zeros(ts_B) + ts_B = _trim_edge_zeros(ts_B) # Mask out remaining zeros ts_B = np.ma.masked_where(ts_B == 0, ts_B) @@ -154,7 +146,7 @@ def write_comp_figs(ts, mask, comptable, mmix, ref_img, out_dir, imgmax = 0.1 * np.abs(ts_B[:, :, :, compnum]).max() imgmin = imgmax * -1 - for idx, cut in enumerate(cuts): + for idx, _ in enumerate(cuts): for imgslice in range(1, 6): ax = plt.subplot2grid((5, 6), (idx + 1, imgslice - 1), rowspan=1, colspan=1) ax.axis('off') @@ -193,130 +185,3 @@ def write_comp_figs(ts, mask, comptable, mmix, ref_img, out_dir, compplot_name = os.path.join(out_dir, plot_name) plt.savefig(compplot_name) plt.close() - - -def write_kappa_scatter(comptable, out_dir): - """ - Creates a scatter plot of Kappa vs Rho values. The shape and size of the - points is based on classification and variance explained, respectively. - - Parameters - ---------- - comptable : (C x X) :obj:`pandas.DataFrame` - Component metric table. One row for each component, with a column for - each metric. Requires at least four columns: "classification", - "kappa", "rho", and "variance explained". - out_dir : :obj:`str` - Figures folder within output directory - - """ - - # Creating Kappa Vs Rho plot - ax_scatter = plt.gca() - - # Set up for varying marker shape and color - mkr_dict = {'accepted': ['*', 'g'], 'rejected': ['v', 'r'], - 'ignored': ['d', 'k']} - - # Prebuild legend so that the marker sizes are uniform - for kind in mkr_dict: - plt.scatter([], [], s=1, marker=mkr_dict[kind][0], - c=mkr_dict[kind][1], label=kind, alpha=0.5) - # Create legend - ax_scatter.legend(markerscale=10) - - # Plot actual values - for kind in mkr_dict: - d = comptable[comptable.classification == kind] - plt.scatter(d.kappa, d.rho, - s=150 * d['variance explained'], marker=mkr_dict[kind][0], - c=mkr_dict[kind][1], alpha=0.5) - - # Finish labeling the plot. - ax_scatter.set_xlabel('kappa') - ax_scatter.set_ylabel('rho') - ax_scatter.set_title('Kappa vs Rho') - ax_scatter.xaxis.label.set_fontsize(20) - ax_scatter.yaxis.label.set_fontsize(20) - ax_scatter.title.set_fontsize(25) - scatter_title = os.path.join(out_dir, 'Kappa_vs_Rho_Scatter.png') - plt.savefig(scatter_title) - - plt.close() - - -def write_summary_fig(comptable, out_dir): - """ - Creates a pie chart showing 1) The total variance explained by each - component in the outer ring, 2) the variance explained by each - individual component in the inner ring, 3) counts of each classification - and 4) the amount of unexplained variance. - - Parameters - ---------- - comptable : (C x X) :obj:`pandas.DataFrame` - Component metric table. One row for each component, with a column for - each metric. Requires at least two columns: "variance explained" and - "classification". - out_dir : :obj:`str` - Figures folder within output directory - """ - - var_expl = [] - ind_var_expl = {} - counts = {} - # Get overall variance explained, each components variance and counts of comps - for clf in ['accepted', 'rejected', 'ignored']: - var_expl.append(np.sum(comptable[comptable.classification == clf]['variance explained'])) - ind_var_expl[clf] = comptable[comptable.classification == clf]['variance explained'].values - counts[clf] = '{0} {1}'.format(comptable[comptable.classification == clf].count()[0], clf) - - # Generate Colormaps for individual components - acc_colors = plt.cm.Greens(np.linspace(0.2, .6, len(ind_var_expl['accepted'].tolist()))) - rej_colors = plt.cm.Reds(np.linspace(0.2, .6, len(ind_var_expl['rejected'].tolist()))) - ign_colors = plt.cm.Greys(np.linspace(0.2, .8, len(ind_var_expl['ignored'].tolist()))) - unxp_colors = np.atleast_2d(np.array(plt.cm.Greys(0))) - - # Shuffle the colors so that neighboring wedges are (perhaps) visually seperable - np.random.shuffle(rej_colors) - np.random.shuffle(acc_colors) - np.random.shuffle(ign_colors) - - # Decision on whether to include the unexplained variance in figure - unexpl_var = [100 - np.sum(var_expl)] - all_var_expl = [] - if unexpl_var >= [0.001]: - var_expl += unexpl_var - counts['unexplained'] = 'unexplained variance' - # Combine individual variances from giant list - for value in ind_var_expl.values(): - all_var_expl += value.tolist() - # Add in unexplained variance - all_var_expl += unexpl_var - outer_colors = np.stack((plt.cm.Greens(0.7), plt.cm.Reds(0.7), - plt.cm.Greys(0.7), plt.cm.Greys(0))) - inner_colors = np.concatenate((acc_colors, rej_colors, ign_colors, unxp_colors), axis=0) - else: - for value in ind_var_expl.values(): - all_var_expl += value.tolist() - outer_colors = np.stack((plt.cm.Greens(0.7), plt.cm.Reds(0.7), plt.cm.Greys(0.7))) - inner_colors = np.concatenate((acc_colors, rej_colors, ign_colors), axis=0) - - labels = counts.values() - - fig, ax = plt.subplots(figsize=(16, 10)) - size = 0.3 - # Build outer, overall pie chart, and then inner individual comp pie - ax.pie(var_expl, radius=1, colors=outer_colors, labels=labels, - autopct='%1.1f%%', pctdistance=0.85, textprops={'fontsize': 20}, - wedgeprops=dict(width=size, edgecolor='w')) - - ax.pie(all_var_expl, radius=1 - size, colors=inner_colors, - wedgeprops=dict(width=size)) - - ax.set(aspect="equal") - ax.set_title('Variance Explained By Classification', fontdict={'fontsize': 28}) - if unexpl_var < [0.001]: - plt.text(1, -1, '*Unexplained Variance less than 0.001', fontdict={'fontsize': 12}) - sumfig_title = os.path.join(out_dir, 'Component_Overview.png') - plt.savefig(sumfig_title) diff --git a/tedana/selection/tedica.py b/tedana/selection/tedica.py index 46c029124..95bdf063f 100644 --- a/tedana/selection/tedica.py +++ b/tedana/selection/tedica.py @@ -153,17 +153,17 @@ def kundu_selection_v2(comptable, n_echos, n_vols): comptable.loc[temp_rej0a, 'classification'] = 'rejected' comptable.loc[temp_rej0a, 'rationale'] += 'I002;' - # Number of significant voxels for S0 model is higher than number for R2 - # model *and* number for R2 model is greater than zero. - temp_rej0b = all_comps[((comptable['countsigFS0'] > comptable['countsigFR2']) & - (comptable['countsigFR2'] > 0))] + # Number of significant voxels for S0 model is higher than number for T2 + # model *and* number for T2 model is greater than zero. + temp_rej0b = all_comps[((comptable['countsigFS0'] > comptable['countsigFT2']) & + (comptable['countsigFT2'] > 0))] comptable.loc[temp_rej0b, 'classification'] = 'rejected' comptable.loc[temp_rej0b, 'rationale'] += 'I003;' rej = np.union1d(temp_rej0a, temp_rej0b) - # Dice score for S0 maps is higher than Dice score for R2 maps and variance + # Dice score for S0 maps is higher than Dice score for T2 maps and variance # explained is higher than the median across components. - temp_rej1 = all_comps[(comptable['dice_FS0'] > comptable['dice_FR2']) & + temp_rej1 = all_comps[(comptable['dice_FS0'] > comptable['dice_FT2']) & (comptable['variance explained'] > np.median(comptable['variance explained']))] comptable.loc[temp_rej1, 'classification'] = 'rejected' @@ -198,8 +198,8 @@ def kundu_selection_v2(comptable, n_echos, n_vols): a. Not outlier variance b. Kappa>kappa_elbow c. Rho kappa_elbow) & diff --git a/tedana/stats.py b/tedana/stats.py index 601d800a0..988fa03c0 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -7,6 +7,7 @@ from scipy import stats from tedana import utils +from tedana.due import due, BibTeX, Doi LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') @@ -74,10 +75,14 @@ def computefeats2(data, mmix, mask=None, normalize=True): # demean masked data if mask is not None: data = data[mask, ...] + # normalize data (subtract mean and divide by standard deviation) in the last dimension + # so that least-squares estimates represent "approximate" correlation values (data_R) + # assuming mixing matrix (mmix) values are also normalized data_vn = stats.zscore(data, axis=-1) # get betas of `data`~`mmix` and limit to range [-0.999, 0.999] data_R = get_coeffs(data_vn, mmix, mask=None) + # Avoid abs(data_R) => 1, otherwise Fisher's transform will return Inf or -Inf data_R[data_R < -0.999] = -0.999 data_R[data_R > 0.999] = 0.999 @@ -86,9 +91,11 @@ def computefeats2(data, mmix, mask=None, normalize=True): if data_Z.ndim == 1: data_Z = np.atleast_2d(data_Z).T - # normalize data + # normalize data (only division by std) if normalize: + # subtract mean and dividing by standard deviation data_Zm = stats.zscore(data_Z, axis=0) + # adding back the mean data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True)) @@ -152,3 +159,86 @@ def get_coeffs(data, X, mask=None, add_const=False): betas = utils.unmask(betas, mask) return betas + + +@due.dcite(BibTeX(""" + @article{hughett2007accurate, + title={Accurate Computation of the F-to-z and t-to-z Transforms + for Large Arguments}, + author={Hughett, Paul}, + journal={Journal of Statistical Software}, + volume={23}, + number={1}, + pages={1--5}, + year={2007}, + publisher={Foundation for Open Access Statistics} + } + """), + description='Introduces T-to-Z transform.') +@due.dcite(Doi('10.5281/zenodo.32508'), + description='Python implementation of T-to-Z transform.') +def t_to_z(t_values, dof): + """ + Convert t-values to z-values. + + Parameters + ---------- + t_values + dof + + Returns + ------- + out + + Notes + ----- + From Vanessa Sochat's TtoZ package. + https://github.com/vsoch/TtoZ + """ + if not isinstance(t_values, np.ndarray): + ret_float = True + t_values = np.array([t_values]) + else: + ret_float = False + + RepLGR.info("T-statistics were converted to z-statistics using Dr. " + "Vanessa Sochat's implementation (Sochat, 2015) of the method " + "described in Hughett (2007).") + RefLGR.info('Sochat, V. (2015). TtoZ Original Release. Zenodo. ' + 'http://doi.org/10.5281/zenodo.32508.') + RefLGR.info('Hughett, P. (2007). Accurate Computation of the F-to-z and ' + 't-to-z Transforms for Large Arguments. Journal of ' + 'Statistical Software, 23(1), 1-5.') + + # Select just the nonzero voxels + nonzero = t_values[t_values != 0] + + # We will store our results here + z_values = np.zeros(len(nonzero)) + + # Select values less than or == 0, and greater than zero + c = np.zeros(len(nonzero)) + k1 = (nonzero <= c) + k2 = (nonzero > c) + + # Subset the data into two sets + t1 = nonzero[k1] + t2 = nonzero[k2] + + # Calculate p values for <=0 + p_values_t1 = stats.t.cdf(t1, df=dof) + z_values_t1 = stats.norm.ppf(p_values_t1) + + # Calculate p values for > 0 + p_values_t2 = stats.t.cdf(-t2, df=dof) + z_values_t2 = -stats.norm.ppf(p_values_t2) + z_values[k1] = z_values_t1 + z_values[k2] = z_values_t2 + + # Write new image to file + out = np.zeros(t_values.shape) + out[t_values != 0] = z_values + + if ret_float: + out = out[0] + return out diff --git a/tedana/tests/data/tedana_outputs.txt b/tedana/tests/data/cornell_three_echo_outputs.txt similarity index 64% rename from tedana/tests/data/tedana_outputs.txt rename to tedana/tests/data/cornell_three_echo_outputs.txt index 6e65c9579..abe7e92df 100644 --- a/tedana/tests/data/tedana_outputs.txt +++ b/tedana/tests/data/cornell_three_echo_outputs.txt @@ -1,5 +1,3 @@ -figures/Component_Overview.png -figures/Kappa_vs_Rho_Scatter.png adaptive_mask.nii.gz betas_OC.nii.gz betas_hik_OC.nii.gz @@ -42,6 +40,35 @@ figures/comp_035.png figures/comp_036.png figures/comp_037.png figures/comp_038.png +figures/comp_039.png +figures/comp_040.png +figures/comp_041.png +figures/comp_042.png +figures/comp_043.png +figures/comp_044.png +figures/comp_045.png +figures/comp_046.png +figures/comp_047.png +figures/comp_048.png +figures/comp_049.png +figures/comp_050.png +figures/comp_051.png +figures/comp_052.png +figures/comp_053.png +figures/comp_054.png +figures/comp_055.png +figures/comp_056.png +figures/comp_057.png +figures/comp_058.png +figures/comp_059.png +figures/comp_060.png +figures/comp_061.png +figures/comp_062.png +figures/comp_063.png +figures/comp_064.png +figures/comp_065.png +figures/comp_066.png +figures/comp_067.png dn_ts_OC.nii.gz feats_OC2.nii.gz figures @@ -56,4 +83,5 @@ pca_mixing.tsv report.txt s0v.nii.gz t2sv.nii.gz +tedana_report.html ts_OC.nii.gz diff --git a/tedana/tests/data/fiu_four_echo_outputs.txt b/tedana/tests/data/fiu_four_echo_outputs.txt new file mode 100644 index 000000000..7f9b8b17b --- /dev/null +++ b/tedana/tests/data/fiu_four_echo_outputs.txt @@ -0,0 +1,57 @@ +T1gs.nii.gz +adaptive_mask.nii.gz +betas_OC.nii.gz +betas_hik_OC_T1c.nii.gz +dn_ts_OC.nii.gz +dn_ts_OC_T1c.nii.gz +dn_ts_e1.nii.gz +dn_ts_e2.nii.gz +dn_ts_e3.nii.gz +dn_ts_e4.nii.gz +glsig.1D +hik_ts_OC_T1c.nii.gz +ica_components.nii.gz +ica_decomposition.json +ica_mixing.tsv +lowk_ts_OC.nii.gz +lowk_ts_e1.nii.gz +lowk_ts_e2.nii.gz +lowk_ts_e3.nii.gz +lowk_ts_e4.nii.gz +meica_mix_T1c.1D +pca_components.nii.gz +pca_decomposition.json +pca_mixing.tsv +report.txt +s0v.nii.gz +s0vG.nii.gz +sphis_hik.nii.gz +t2sv.nii.gz +t2svG.nii.gz +tedana_report.html +ts_OC.nii.gz +ts_OC_whitened.nii.gz +tsoc_nogs.nii.gz +tsoc_orig.nii.gz +figures +figures/comp_000.png +figures/comp_001.png +figures/comp_002.png +figures/comp_003.png +figures/comp_004.png +figures/comp_005.png +figures/comp_006.png +figures/comp_007.png +figures/comp_008.png +figures/comp_009.png +figures/comp_010.png +figures/comp_011.png +figures/comp_012.png +figures/comp_013.png +figures/comp_014.png +figures/comp_015.png +figures/comp_016.png +figures/comp_017.png +figures/comp_018.png +figures/comp_019.png +figures/comp_020.png diff --git a/tedana/tests/data/nih_five_echo_outputs_t2smap.txt b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt new file mode 100644 index 000000000..1d2c6c46b --- /dev/null +++ b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt @@ -0,0 +1,5 @@ +desc-full_S0map.nii.gz +desc-full_T2starmap.nii.gz +desc-optcom_bold.nii.gz +S0map.nii.gz +T2starmap.nii.gz diff --git a/tedana/tests/data/tedana_outputs_verbose.txt b/tedana/tests/data/nih_five_echo_outputs_verbose.txt similarity index 86% rename from tedana/tests/data/tedana_outputs_verbose.txt rename to tedana/tests/data/nih_five_echo_outputs_verbose.txt index cd5df4334..93496d5ee 100644 --- a/tedana/tests/data/tedana_outputs_verbose.txt +++ b/tedana/tests/data/nih_five_echo_outputs_verbose.txt @@ -17,20 +17,13 @@ hik_ts_e5.nii.gz ica_components.nii.gz ica_decomposition.json ica_mixing.tsv +ica_orth_mixing.tsv lowk_ts_OC.nii.gz lowk_ts_e1.nii.gz lowk_ts_e2.nii.gz lowk_ts_e3.nii.gz lowk_ts_e4.nii.gz lowk_ts_e5.nii.gz -meica_R2_pred.nii.gz -meica_S0_pred.nii.gz -meica_betas_catd.nii.gz -meica_metric_weights.nii.gz -mepca_R2_pred.nii.gz -mepca_S0_pred.nii.gz -mepca_betas_catd.nii.gz -mepca_metric_weights.nii.gz pca_components.nii.gz pca_decomposition.json pca_mixing.tsv @@ -39,11 +32,10 @@ s0v.nii.gz s0vG.nii.gz t2sv.nii.gz t2svG.nii.gz +tedana_report.html ts_OC.nii.gz ts_OC_whitened.nii.gz figures -figures/Component_Overview.png -figures/Kappa_vs_Rho_Scatter.png figures/comp_000.png figures/comp_001.png figures/comp_002.png @@ -115,6 +107,3 @@ figures/comp_067.png figures/comp_068.png figures/comp_069.png figures/comp_070.png -figures/comp_071.png -figures/comp_072.png -figures/comp_073.png diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index ed1fefaa2..457757a3c 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -15,7 +15,8 @@ import requests import pandas as pd -from tedana.workflows import tedana_workflow +from tedana.workflows import tedana as tedana_cli +from tedana.workflows import t2smap as t2smap_cli from tedana import io @@ -37,8 +38,8 @@ def check_integration_outputs(fname, outpath): # Checks for log file log_regex = ('^tedana_' - '[12][0-9]{3}-[0-9]{2}-[0-9]{2}T[0-9]{2}:' - '[0-9]{2}:[0-9]{2}.tsv$') + '[12][0-9]{3}-[0-9]{2}-[0-9]{2}T[0-9]{2}' + '[0-9]{2}[0-9]{2}.tsv$') logfiles = [out for out in existing if re.match(log_regex, out)] assert len(logfiles) == 1 @@ -86,19 +87,68 @@ def test_integration_five_echo(skip_integration): prepend = '/tmp/data/five-echo/p06.SBJ01_S09_Task11_e' suffix = '.sm.nii.gz' datalist = [prepend + str(i + 1) + suffix for i in range(5)] - tedana_workflow( + echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] + tedana_cli.tedana_workflow( data=datalist, - tes=[15.4, 29.7, 44.0, 58.3, 72.6], + tes=echo_times, out_dir=out_dir, - debug=True, verbose=True) + tedpca='aic', + fittype='curvefit', + fixed_seed=49, + tedort=True, + verbose=True) # Just a check on the component table pending a unit test of load_comptable comptable = os.path.join(out_dir, 'ica_decomposition.json') df = io.load_comptable(comptable) assert isinstance(df, pd.DataFrame) + # Test re-running, but use the CLI + out_dir2 = '/tmp/data/five-echo/TED.five-echo-manual' + acc_comps = df.loc[df['classification'] == 'accepted'].index.values + mixing = os.path.join(out_dir, 'ica_mixing.tsv') + t2smap = os.path.join(out_dir, 't2sv.nii.gz') + args = (['-d'] + datalist + ['-e'] + [str(te) for te in echo_times] + + ['--out-dir', out_dir2, '--debug', '--verbose', + '--manacc', ','.join(acc_comps.astype(str)), + '--ctab', comptable, '--mix', mixing, '--t2smap', t2smap]) + tedana_cli._main(args) + # compare the generated output files - fn = resource_filename('tedana', 'tests/data/tedana_outputs_verbose.txt') + fn = resource_filename('tedana', + 'tests/data/nih_five_echo_outputs_verbose.txt') + check_integration_outputs(fn, out_dir) + + +def test_integration_four_echo(skip_integration): + """ Integration test of the full tedana workflow using four-echo test data + """ + + if skip_integration: + pytest.skip('Skipping four-echo integration test') + out_dir = '/tmp/data/four-echo/TED.four-echo' + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + # download data and run the test + download_test_data('https://osf.io/gnj73/download', + os.path.dirname(out_dir)) + prepend = '/tmp/data/four-echo/' + prepend += 'sub-PILOT_ses-01_task-localizerDetection_run-01_echo-' + suffix = '_space-sbref_desc-preproc_bold+orig.HEAD' + datalist = [prepend + str(i + 1) + suffix for i in range(4)] + tedana_cli.tedana_workflow( + data=datalist, + tes=[11.8, 28.04, 44.28, 60.52], + out_dir=out_dir, + tedpca='kundu-stabilize', + gscontrol=['gsr', 't1c'], + png_cmap='bone', + debug=True, + verbose=True) + + # compare the generated output files + fn = resource_filename('tedana', 'tests/data/fiu_four_echo_outputs.txt') check_integration_outputs(fn, out_dir) @@ -109,18 +159,62 @@ def test_integration_three_echo(skip_integration): if skip_integration: pytest.skip('Skipping three-echo integration test') out_dir = '/tmp/data/three-echo/TED.three-echo' + out_dir2 = '/tmp/data/three-echo/TED.three-echo-rerun' if os.path.exists(out_dir): shutil.rmtree(out_dir) # download data and run the test download_test_data('https://osf.io/rqhfc/download', os.path.dirname(out_dir)) - tedana_workflow( + tedana_cli.tedana_workflow( data='/tmp/data/three-echo/three_echo_Cornell_zcat.nii.gz', tes=[14.5, 38.5, 62.5], out_dir=out_dir, - tedpca='kundu') + low_mem=True, + tedpca='mdl') + + # Test re-running, but use the CLI + args = (['-d', '/tmp/data/three-echo/three_echo_Cornell_zcat.nii.gz', + '-e', '14.5', '38.5', '62.5', + '--out-dir', out_dir2, '--debug', '--verbose', + '--ctab', os.path.join(out_dir, 'ica_decomposition.json'), + '--mix', os.path.join(out_dir, 'ica_mixing.tsv')]) + tedana_cli._main(args) # compare the generated output files - fn = resource_filename('tedana', 'tests/data/tedana_outputs.txt') + fn = resource_filename('tedana', + 'tests/data/cornell_three_echo_outputs.txt') check_integration_outputs(fn, out_dir) + + +def test_integration_t2smap(skip_integration): + """Integration test of the full t2smap workflow using five-echo test data + """ + if skip_integration: + pytest.skip('Skipping t2smap integration test') + out_dir = '/tmp/data/five-echo/t2smap_five-echo' + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + # download data and run the test + download_test_data('https://osf.io/9c42e/download', + os.path.dirname(out_dir)) + prepend = '/tmp/data/five-echo/p06.SBJ01_S09_Task11_e' + suffix = '.sm.nii.gz' + datalist = [prepend + str(i + 1) + suffix for i in range(5)] + echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] + args = (['-d'] + datalist + ['-e'] + [str(te) for te in echo_times] + + ['--out-dir', out_dir, '--fittype', 'curvefit']) + t2smap_cli._main(args) + + # compare the generated output files + fname = resource_filename('tedana', + 'tests/data/nih_five_echo_outputs_t2smap.txt') + # Gets filepaths generated by integration test + existing = [os.path.relpath(f, out_dir) for f in + glob.glob(os.path.join(out_dir, '**'), recursive=True)[1:]] + + # Compares remaining files with those expected + with open(fname, 'r') as f: + tocheck = f.read().splitlines() + assert sorted(tocheck) == sorted(existing) diff --git a/tedana/tests/test_mapca.py b/tedana/tests/test_mapca.py new file mode 100644 index 000000000..e0ccb2913 --- /dev/null +++ b/tedana/tests/test_mapca.py @@ -0,0 +1,173 @@ +""" +Tests for maPCA +""" + +import numpy as np +import nibabel as nib +from tedana import decomposition +from pytest import raises +from tedana.decomposition.ma_pca import _autocorr, _check_order, _parzen_win +from tedana.decomposition.ma_pca import _subsampling, _kurtn, _icatb_svd, _eigensp_adj + + +def test_autocorr(): + """ + Unit test on _autocorr function + """ + test_data = np.array([1, 2, 3, 4]) + test_result = np.array([30, 20, 11, 4]) + autocorr = _autocorr(test_data) + assert np.array_equal(autocorr, test_result) + + +def test_check_order(): + """ + Unit test on _check_order function + """ + test_order = -1 + with raises(ValueError) as errorinfo: + ord_out, w, trivwin = _check_order(test_order) + assert 'Order cannot be less than zero' in str(errorinfo.value) + + test_order = 0 + ord_out, w, trivwin = _check_order(test_order) + assert ord_out == test_order + assert trivwin + + test_order = 1 + ord_out, w, trivwin = _check_order(test_order) + assert ord_out == test_order + assert w == 1 + assert trivwin + + test_order = 4 + ord_out, w, trivwin = _check_order(test_order) + assert ord_out == test_order + assert not trivwin + + +def test_parzen_win(): + test_npoints = 3 + test_result = np.array([0.07407407, 1, 0.07407407]) + win = _parzen_win(test_npoints) + assert np.allclose(win, test_result) + + test_npoints = 1 + win = _parzen_win(test_npoints) + assert win == 1 + + +def test_ent_rate_sp(): + """ + Check that ent_rate_sp runs correctly, i.e. returns a float + """ + test_data = np.random.rand(200, 10, 10) + ent_rate = decomposition.ent_rate_sp(test_data, 1) + assert isinstance(ent_rate, float) + assert ent_rate.ndim == 0 + assert ent_rate.size == 1 + + # Checks ValueError with std = 0 + test_data = np.ones((200, 10, 10)) + with raises(ValueError) as errorinfo: + ent_rate = decomposition.ent_rate_sp(test_data, 1) + assert 'Divide by zero encountered' in str(errorinfo.value) + + # Checks ValueError with incorrect matrix dimensions + test_data = np.ones((200, 10, 10, 200)) + with raises(ValueError) as errorinfo: + ent_rate = decomposition.ent_rate_sp(test_data, 1) + assert 'Incorrect matrix dimensions' in str(errorinfo.value) + + +def test_subsampling(): + """ + Unit test for subsampling function + """ + test_data = np.array([1]) + with raises(ValueError) as errorinfo: + sub_data = _subsampling(test_data, 1) + assert 'Unrecognized matrix dimension' in str(errorinfo.value) + + test_data = np.random.rand(2, 3, 4) + sub_data = _subsampling(test_data, sub_depth=2) + assert sub_data.shape == (1, 2, 2) + + +def test_kurtn(): + """ + Unit test for _kurtn function + """ + test_data = np.random.rand(2, 3, 4) + kurt = _kurtn(test_data) + assert kurt.shape == (3, 1) + + +def test_icatb_svd(): + """ + Unit test for icatb_svd function. + """ + test_data = np.diag(np.random.rand(5)) + V, Lambda = _icatb_svd(test_data) + assert np.allclose(np.sum(V, axis=0), np.ones((5,))) + + +def test_eigensp_adj(): + """ + Unit test for eigensp_adj function + """ + test_eigen = np.array([0.9, 0.5, 0.2, 0.1, 0]) + n_effective = 2 + test_result = np.array([0.13508894, 0.11653465, 0.06727316, 0.05211424, 0.]) + lambd_adj = _eigensp_adj(test_eigen, n_effective, p=test_eigen.shape[0]) + assert np.allclose(lambd_adj, test_result) + + +def test_ma_pca(): + """ + Check that ma_pca runs correctly with all three options + """ + + timepoints = 200 + nvox = 20 + n_vox_total = nvox ** 3 + + # Creates fake data to test with + test_data = np.random.random((nvox, nvox, nvox, timepoints)) + time = np.linspace(0, 400, timepoints) + freq = 1 + test_data = test_data + np.sin(2 * np.pi * freq * time) + xform = np.eye(4) * 2 + test_img = nib.nifti1.Nifti1Image(test_data, xform) + + # Creates mask + test_mask = np.ones((nvox, nvox, nvox)) + test_mask_img = nib.nifti1.Nifti1Image(test_mask, xform) + + # Testing AIC option + u, s, varex_norm, v = decomposition.ma_pca(test_img, test_mask_img, 'aic') + + assert u.shape[0] == n_vox_total + assert s.shape[0] == 1 + assert varex_norm.shape[0] == 1 + assert v.shape[0] == timepoints + + del u, s, varex_norm, v + + # Testing KIC option + u, s, varex_norm, v = decomposition.ma_pca(test_img, test_mask_img, 'kic') + + assert u.shape[0] == n_vox_total + assert s.shape[0] == 1 + assert varex_norm.shape[0] == 1 + assert v.shape[0] == timepoints + + del u, s, varex_norm, v + + # Testing MDL option + u, s, varex_norm, v = decomposition.ma_pca(test_img, test_mask_img, 'mdl') + + assert u.shape[0] == n_vox_total + assert s.shape[0] == 1 + assert varex_norm.shape[0] == 1 + assert v.shape[0] == timepoints diff --git a/tedana/tests/test_metrics.py b/tedana/tests/test_metrics.py new file mode 100644 index 000000000..b737fb26e --- /dev/null +++ b/tedana/tests/test_metrics.py @@ -0,0 +1,215 @@ +""" +Tests for tedana.metrics +""" +import os.path as op + +import pytest +import numpy as np +import pandas as pd + +from tedana import io, utils +from tedana.metrics import dependence, collect +from tedana.tests.utils import get_test_data_path + + +@pytest.fixture(scope='module') +def testdata1(): + tes = np.array([14.5, 38.5, 62.5]) + in_files = [op.join(get_test_data_path(), 'echo{0}.nii.gz'.format(i + 1)) + for i in range(3)] + data_cat, ref_img = io.load_data(in_files, n_echos=len(tes)) + mask, adaptive_mask = utils.make_adaptive_mask(data_cat, getsum=True) + data_optcom = np.mean(data_cat, axis=1) + mixing = np.random.random((data_optcom.shape[1], 50)) + data_dict = {'data_cat': data_cat, + 'tes': tes, + 'mask': mask, + 'data_optcom': data_optcom, + 'adaptive_mask': adaptive_mask, + 'ref_img': ref_img, + 'mixing': mixing, + } + return data_dict + + +def test_smoke_generate_metrics(testdata1): + """ + Smoke test for tedana.metrics.collect.generate_metrics + """ + metrics = ['kappa', 'rho', 'countnoise', 'countsigFT2', 'countsigFS0', + 'dice_FT2', 'dice_FS0', 'signal-noise_t', 'variance explained', + 'normalized variance explained', 'd_table_score'] + comptable, mixing = collect.generate_metrics( + testdata1['data_cat'], testdata1['data_optcom'], testdata1['mixing'], + testdata1['mask'], testdata1['adaptive_mask'], + testdata1['tes'], testdata1['ref_img'], + metrics=metrics, sort_by='kappa', ascending=False) + assert isinstance(comptable, pd.DataFrame) + + +def test_smoke_calculate_weights(): + """Smoke test for tedana.metrics.dependence.calculate_weights + """ + n_voxels, n_volumes, n_components = 1000, 100, 50 + data_optcom = np.random.random((n_voxels, n_volumes)) + mixing = np.random.random((n_volumes, n_components)) + weights = dependence.calculate_weights(data_optcom, mixing) + assert weights.shape == (n_voxels, n_components) + + +def test_smoke_calculate_betas(): + """Smoke test for tedana.metrics.dependence.calculate_betas + """ + n_voxels, n_volumes, n_components = 1000, 100, 50 + data_optcom = np.random.random((n_voxels, n_volumes)) + mixing = np.random.random((n_volumes, n_components)) + betas = dependence.calculate_betas(data_optcom, mixing) + assert betas.shape == (n_voxels, n_components) + + +def test_smoke_calculate_psc(): + """Smoke test for tedana.metrics.dependence.calculate_psc + """ + n_voxels, n_volumes, n_components = 1000, 100, 50 + data_optcom = np.random.random((n_voxels, n_volumes)) + optcom_betas = np.random.random((n_voxels, n_components)) + psc = dependence.calculate_psc(data_optcom, optcom_betas) + assert psc.shape == (n_voxels, n_components) + + +def test_smoke_calculate_z_maps(): + """Smoke test for tedana.metrics.dependence.calculate_psc + """ + n_voxels, n_components = 1000, 50 + weights = np.random.random((n_voxels, n_components)) + z_maps = dependence.calculate_z_maps(weights, z_max=4) + assert z_maps.shape == (n_voxels, n_components) + + +def test_smoke_calculate_f_maps(): + """Smoke test for tedana.metrics.dependence.calculate_f_maps + """ + n_voxels, n_echos, n_volumes, n_components = 1000, 5, 100, 50 + data_cat = np.random.random((n_voxels, n_echos, n_volumes)) + Z_maps = np.random.normal(size=(n_voxels, n_components)) + mixing = np.random.random((n_volumes, n_components)) + adaptive_mask = np.random.randint(1, n_echos + 1, size=n_voxels) + tes = np.array([15, 25, 35, 45, 55]) + F_T2_maps, F_S0_maps = dependence.calculate_f_maps( + data_cat, + Z_maps, + mixing, + adaptive_mask, + tes, + f_max=500 + ) + assert F_T2_maps.shape == F_S0_maps.shape == (n_voxels, n_components) + + +def test_smoke_calculate_varex(): + """Smoke test for tedana.metrics.dependence.calculate_varex + """ + n_voxels, n_components = 1000, 50 + optcom_betas = np.random.random((n_voxels, n_components)) + varex = dependence.calculate_varex(optcom_betas) + assert varex.shape == (n_components,) + + +def test_smoke_calculate_varex_norm(): + """Smoke test for tedana.metrics.dependence.calculate_varex_norm + """ + n_voxels, n_components = 1000, 50 + weights = np.random.random((n_voxels, n_components)) + varex_norm = dependence.calculate_varex_norm(weights) + assert varex_norm.shape == (n_components,) + + +def test_smoke_compute_dice(): + """Smoke test for tedana.metrics.dependence.compute_dice + """ + n_voxels, n_components = 1000, 50 + clmaps1 = np.random.randint(0, 2, size=(n_voxels, n_components)) + clmaps2 = np.random.randint(0, 2, size=(n_voxels, n_components)) + dice = dependence.compute_dice(clmaps1, clmaps2, axis=0) + assert dice.shape == (n_components,) + dice = dependence.compute_dice(clmaps1, clmaps2, axis=1) + assert dice.shape == (n_voxels,) + + +def test_smoke_compute_signal_minus_noise_z(): + """Smoke test for tedana.metrics.dependence.compute_signal_minus_noise_z + """ + n_voxels, n_components = 1000, 50 + Z_maps = np.random.normal(size=(n_voxels, n_components)) + Z_clmaps = np.random.randint(0, 2, size=(n_voxels, n_components)) + F_T2_maps = np.random.random((n_voxels, n_components)) + signal_minus_noise_z, signal_minus_noise_p = dependence.compute_signal_minus_noise_z( + Z_maps, + Z_clmaps, + F_T2_maps, + z_thresh=1.95 + ) + assert signal_minus_noise_z.shape == signal_minus_noise_p.shape == (n_components,) + + +def test_smoke_compute_signal_minus_noise_t(): + """Smoke test for tedana.metrics.dependence.compute_signal_minus_noise_t + """ + n_voxels, n_components = 1000, 50 + Z_maps = np.random.normal(size=(n_voxels, n_components)) + Z_clmaps = np.random.randint(0, 2, size=(n_voxels, n_components)) + F_T2_maps = np.random.random((n_voxels, n_components)) + signal_minus_noise_t, signal_minus_noise_p = dependence.compute_signal_minus_noise_t( + Z_maps, + Z_clmaps, + F_T2_maps, + z_thresh=1.95 + ) + assert signal_minus_noise_t.shape == signal_minus_noise_p.shape == (n_components,) + + +def test_smoke_compute_countsignal(): + """Smoke test for tedana.metrics.dependence.compute_countsignal + """ + n_voxels, n_components = 1000, 50 + stat_cl_maps = np.random.randint(0, 2, size=(n_voxels, n_components)) + countsignal = dependence.compute_countsignal(stat_cl_maps) + assert countsignal.shape == (n_components,) + + +def test_smoke_compute_countnoise(): + """Smoke test for tedana.metrics.dependence.compute_countnoise + """ + n_voxels, n_components = 1000, 50 + stat_maps = np.random.normal(size=(n_voxels, n_components)) + stat_cl_maps = np.random.randint(0, 2, size=(n_voxels, n_components)) + countnoise = dependence.compute_countnoise(stat_maps, stat_cl_maps, stat_thresh=1.95) + assert countnoise.shape == (n_components,) + + +def test_smoke_generate_decision_table_score(): + """Smoke test for tedana.metrics.dependence.generate_decision_table_score + """ + n_voxels, n_components = 1000, 50 + kappa = np.random.random(n_components) + dice_FT2 = np.random.random(n_components) + signal_minus_noise_t = np.random.normal(size=n_components) + countnoise = np.random.randint(0, n_voxels, size=n_components) + countsigFT2 = np.random.randint(0, n_voxels, size=n_components) + decision_table_score = dependence.generate_decision_table_score( + kappa, dice_FT2, signal_minus_noise_t, + countnoise, countsigFT2 + ) + assert decision_table_score.shape == (n_components,) + + +def test_smoke_calculate_dependence_metrics(): + """Smoke test for tedana.metrics.dependence.calculate_dependence_metrics + """ + n_voxels, n_components = 1000, 50 + F_T2_maps = np.random.random((n_voxels, n_components)) + F_S0_maps = np.random.random((n_voxels, n_components)) + Z_maps = np.random.random((n_voxels, n_components)) + kappas, rhos = dependence.calculate_dependence_metrics( + F_T2_maps, F_S0_maps, Z_maps) + assert kappas.shape == rhos.shape == (n_components,) diff --git a/tedana/tests/test_model_fit_dependence_metrics.py b/tedana/tests/test_model_fit_dependence_metrics.py deleted file mode 100644 index e6dd2dacd..000000000 --- a/tedana/tests/test_model_fit_dependence_metrics.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Tests for tedana.metrics.fit -""" - -import numpy as np -import pytest - -from tedana.metrics import kundu_fit - - -def test_break_dependence_metrics(): - """ - Ensure that dependence_metrics fails when input data do not have the right - shapes. - """ - n_samples, n_echos, n_vols, n_comps = 10000, 4, 100, 50 - catd = np.empty((n_samples, n_echos, n_vols)) - tsoc = np.empty((n_samples, n_vols)) - mmix = np.empty((n_vols, n_comps)) - t2s = np.empty((n_samples, n_vols)) - tes = np.empty((n_echos)) - ref_img = '' - - # Shape of catd is wrong - catd = np.empty((n_samples + 1, n_echos, n_vols)) - with pytest.raises(ValueError): - kundu_fit.dependence_metrics(catd=catd, tsoc=tsoc, mmix=mmix, - t2s=t2s, tes=tes, ref_img=ref_img, - reindex=False, mmixN=None, - algorithm='kundu_v3') - - # Shape of t2s is wrong - catd = np.empty((n_samples, n_echos, n_vols)) - t2s = np.empty((n_samples + 1, n_vols)) - with pytest.raises(ValueError): - kundu_fit.dependence_metrics(catd=catd, tsoc=tsoc, mmix=mmix, - t2s=t2s, tes=tes, ref_img=ref_img, - reindex=False, mmixN=None, - algorithm='kundu_v3') - - # Shape of tsoc is wrong - t2s = np.empty((n_samples, n_vols)) - tsoc = np.empty((n_samples + 1, n_vols)) - with pytest.raises(ValueError): - kundu_fit.dependence_metrics(catd=catd, tsoc=tsoc, mmix=mmix, - t2s=t2s, tes=tes, ref_img=ref_img, - reindex=False, mmixN=None, - algorithm='kundu_v3') - - # Shape of catd is wrong - catd = np.empty((n_samples, n_echos + 1, n_vols)) - tsoc = np.empty((n_samples, n_vols)) - with pytest.raises(ValueError): - kundu_fit.dependence_metrics(catd=catd, tsoc=tsoc, mmix=mmix, - t2s=t2s, tes=tes, ref_img=ref_img, - reindex=False, mmixN=None, - algorithm='kundu_v3') - - # Shape of catd is wrong - catd = np.empty((n_samples, n_echos, n_vols + 1)) - with pytest.raises(ValueError): - kundu_fit.dependence_metrics(catd=catd, tsoc=tsoc, mmix=mmix, - t2s=t2s, tes=tes, ref_img=ref_img, - reindex=False, mmixN=None, - algorithm='kundu_v3') - - # Shape of t2s is wrong - catd = np.empty((n_samples, n_echos, n_vols)) - t2s = np.empty((n_samples, n_vols + 1)) - with pytest.raises(ValueError): - kundu_fit.dependence_metrics(catd=catd, tsoc=tsoc, mmix=mmix, - t2s=t2s, tes=tes, ref_img=ref_img, - reindex=False, mmixN=None, - algorithm='kundu_v3') diff --git a/tedana/tests/test_model_kundu_metrics.py b/tedana/tests/test_model_kundu_metrics.py deleted file mode 100644 index 2e4ad75b5..000000000 --- a/tedana/tests/test_model_kundu_metrics.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Tests for tedana.model.fit -""" - -import numpy as np -import pandas as pd - -from tedana.metrics import kundu_fit - - -def test_smoke_kundu_metrics(): - """ - Smoke test for kundu metrics function. Just make sure that kundu_metrics - runs without breaking when fed random data in the right formats. - """ - n_comps = 100 - n_voxels = 10000 - comptable = pd.DataFrame(columns=['kappa', 'rho', 'variance explained', - 'normalized variance explained'], - data=np.random.random((100, 4)), - index=np.arange(100)) - metric_maps = {} - metric_maps['Z_maps'] = np.random.random((n_voxels, n_comps)) - metric_maps['Z_clmaps'] = np.random.randint(low=0, high=2, - size=(n_voxels, n_comps)) - metric_maps['F_R2_maps'] = np.random.random((n_voxels, n_comps)) - metric_maps['F_S0_clmaps'] = np.random.randint(low=0, high=2, - size=(n_voxels, n_comps)) - metric_maps['F_R2_clmaps'] = np.random.randint(low=0, high=2, - size=(n_voxels, n_comps)) - metric_maps['Br_S0_clmaps'] = np.random.randint(low=0, high=2, - size=(n_voxels, n_comps)) - metric_maps['Br_R2_clmaps'] = np.random.randint(low=0, high=2, - size=(n_voxels, n_comps)) - - comptable = kundu_fit.kundu_metrics(comptable, metric_maps) - assert comptable is not None diff --git a/tedana/tests/test_viz.py b/tedana/tests/test_reporting.py similarity index 51% rename from tedana/tests/test_viz.py rename to tedana/tests/test_reporting.py index 4b15af7ca..c71cb7be2 100644 --- a/tedana/tests/test_viz.py +++ b/tedana/tests/test_reporting.py @@ -1,10 +1,8 @@ """ -Tests for tedana.viz +Tests for tedana.reporting """ - import numpy as np - -from tedana import viz +from tedana import reporting def test_smoke_trim_edge_zeros(): @@ -12,7 +10,6 @@ def test_smoke_trim_edge_zeros(): Ensures that trim_edge_zeros works with random inputs """ arr = np.random.random((100, 100)) + assert reporting.static_figures._trim_edge_zeros(arr) is not None - assert viz.trim_edge_zeros(arr) is not None - -# TODO: Test other functions in viz that generate images? +# TODO: Test other functions in reporting? diff --git a/tedana/tests/test_t2smap.py b/tedana/tests/test_t2smap.py index 8250dd223..2dddc5754 100644 --- a/tedana/tests/test_t2smap.py +++ b/tedana/tests/test_t2smap.py @@ -21,21 +21,21 @@ def test_basic_t2smap1(self): data = [op.join(data_dir, 'echo1.nii.gz'), op.join(data_dir, 'echo2.nii.gz'), op.join(data_dir, 'echo3.nii.gz')] - workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='t2s', - fitmode='all', label='t2smap') out_dir = 'TED.echo1.t2smap' + workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='t2s', + fitmode='all', out_dir=out_dir) # Check outputs - assert op.isfile(op.join(out_dir, 'ts_OC.nii.gz')) - img = nib.load(op.join(out_dir, 't2sv.nii.gz')) + assert op.isfile(op.join(out_dir, 'desc-optcom_bold.nii.gz')) + img = nib.load(op.join(out_dir, 'T2starmap.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 's0v.nii.gz')) + img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 't2svG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 's0vG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'ts_OC.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 def test_basic_t2smap2(self): @@ -47,21 +47,21 @@ def test_basic_t2smap2(self): data = [op.join(data_dir, 'echo1.nii.gz'), op.join(data_dir, 'echo2.nii.gz'), op.join(data_dir, 'echo3.nii.gz')] - workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='t2s', - fitmode='ts', label='t2smap') out_dir = 'TED.echo1.t2smap' + workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='t2s', + fitmode='ts', out_dir=out_dir) # Check outputs - assert op.isfile(op.join(out_dir, 'ts_OC.nii.gz')) - img = nib.load(op.join(out_dir, 't2sv.nii.gz')) + assert op.isfile(op.join(out_dir, 'desc-optcom_bold.nii.gz')) + img = nib.load(op.join(out_dir, 'T2starmap.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 's0v.nii.gz')) + img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 't2svG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 's0vG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 'ts_OC.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 def test_basic_t2smap3(self): @@ -73,49 +73,73 @@ def test_basic_t2smap3(self): data = [op.join(data_dir, 'echo1.nii.gz'), op.join(data_dir, 'echo2.nii.gz'), op.join(data_dir, 'echo3.nii.gz')] - workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='paid', - fitmode='all', label='t2smap') out_dir = 'TED.echo1.t2smap' + workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='paid', + fitmode='all', out_dir=out_dir) # Check outputs - assert op.isfile(op.join(out_dir, 'ts_OC.nii.gz')) - img = nib.load(op.join(out_dir, 't2sv.nii.gz')) + assert op.isfile(op.join(out_dir, 'desc-optcom_bold.nii.gz')) + img = nib.load(op.join(out_dir, 'T2starmap.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 's0v.nii.gz')) + img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 't2svG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 's0vG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'ts_OC.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 def test_basic_t2smap4(self): """ A very simple test, to confirm that t2smap creates output files when combmode is set to 'paid' and fitmode is set to 'ts'. - - Not sure why this fails. """ data_dir = get_test_data_path() data = [op.join(data_dir, 'echo1.nii.gz'), op.join(data_dir, 'echo2.nii.gz'), op.join(data_dir, 'echo3.nii.gz')] - workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='paid', - fitmode='ts', label='t2smap') out_dir = 'TED.echo1.t2smap' + workflows.t2smap_workflow(data, [14.5, 38.5, 62.5], combmode='paid', + fitmode='ts', out_dir=out_dir) # Check outputs - assert op.isfile(op.join(out_dir, 'ts_OC.nii.gz')) - img = nib.load(op.join(out_dir, 't2sv.nii.gz')) + assert op.isfile(op.join(out_dir, 'desc-optcom_bold.nii.gz')) + img = nib.load(op.join(out_dir, 'T2starmap.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 's0v.nii.gz')) + img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 't2svG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 's0vG.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 'ts_OC.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) + assert len(img.shape) == 4 + + def test_t2smap_cli(self): + """ + Run test_basic_t2smap1, but use the CLI method. + """ + data_dir = get_test_data_path() + data = [op.join(data_dir, 'echo1.nii.gz'), + op.join(data_dir, 'echo2.nii.gz'), + op.join(data_dir, 'echo3.nii.gz')] + out_dir = 'TED.echo1.t2smap' + args = (['-d'] + data + + ['-e', '14.5', '38.5', '62.5', '--combmode', 't2s', + '--fitmode', 'all', '--out-dir', out_dir]) + workflows.t2smap._main(args) + + # Check outputs + img = nib.load(op.join(out_dir, 'T2starmap.nii.gz')) + assert len(img.shape) == 3 + img = nib.load(op.join(out_dir, 'S0map.nii.gz')) + assert len(img.shape) == 3 + img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) + assert len(img.shape) == 3 + img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) + assert len(img.shape) == 3 + img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 def teardown_method(self): diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py index 3e660982e..1984f28a2 100644 --- a/tedana/tests/test_utils.py +++ b/tedana/tests/test_utils.py @@ -86,9 +86,9 @@ def test_make_adaptive_mask(): assert np.allclose(mask, utils.make_adaptive_mask(data)) # shapes are all the same assert mask.shape == masksum.shape == (64350,) - assert np.allclose(mask, masksum.astype(bool)) + assert np.allclose(mask, (masksum >= 3).astype(bool)) # mask has correct # of entries - assert mask.sum() == 50786 + assert mask.sum() == 41749 # masksum has correct values vals, counts = np.unique(masksum, return_counts=True) assert np.allclose(vals, np.array([0, 1, 2, 3])) @@ -99,7 +99,7 @@ def test_make_adaptive_mask(): mask, masksum = utils.make_adaptive_mask(data, mask=pjoin(datadir, 'mask.nii.gz'), getsum=True) - assert np.allclose(mask, masksum.astype(bool)) + assert np.allclose(mask, (masksum >= 3).astype(bool)) # SMOKE TESTS @@ -207,4 +207,20 @@ def test_smoke_threshold_map(): assert utils.threshold_map(img, min_cluster_size, sided='bi') is not None +def test_sec2millisec(): + """ + Ensure that sec2millisec returns 1000x the input values. + """ + assert utils.sec2millisec(5) == 5000 + assert utils.sec2millisec(np.array([5])) == np.array([5000]) + + +def test_millisec2sec(): + """ + Ensure that millisec2sec returns 1/1000x the input values. + """ + assert utils.millisec2sec(5000) == 5 + assert utils.millisec2sec(np.array([5000])) == np.array([5]) + + # TODO: "BREAK" AND UNIT TESTS diff --git a/tedana/utils.py b/tedana/utils.py index 94e555d74..1e4f63290 100644 --- a/tedana/utils.py +++ b/tedana/utils.py @@ -91,15 +91,16 @@ def make_adaptive_mask(data, mask=None, getsum=False): if mask is None: # make it a boolean mask to (where we have at least 1 echo with good signal) - mask = masksum.astype(bool) + mask = (masksum >= 3).astype(bool) else: # if the user has supplied a binary mask mask = load_image(mask).astype(bool) masksum = masksum * mask + mask = (masksum >= 3).astype(bool) # reduce mask based on masksum # TODO: Use visual report to make checking the reduced mask easier - if np.any(masksum[mask] == 0): - n_bad_voxels = np.sum(masksum[mask] == 0) + if np.any(masksum[mask] < 3): + n_bad_voxels = np.sum(masksum[mask] < 3) LGR.warning('{0} voxels in user-defined mask do not have good ' 'signal. Removing voxels from mask.'.format(n_bad_voxels)) mask = masksum.astype(bool) @@ -154,7 +155,7 @@ def unmask(data, mask): 'volume={5},' 'pages={1--34}}'), description='Introduction of Sorenson-Dice index by Sorenson in 1948.') -def dice(arr1, arr2): +def dice(arr1, arr2, axis=None): """ Compute Dice's similarity index between two numpy arrays. Arrays will be binarized before comparison. @@ -163,6 +164,9 @@ def dice(arr1, arr2): ---------- arr1, arr2 : array_like Input arrays, arrays to binarize and compare. + axis : None or int, optional + Axis along which the DSIs are computed. + The default is to compute the DSI of the flattened arrays. Returns ------- @@ -181,12 +185,15 @@ def dice(arr1, arr2): if arr1.shape != arr2.shape: raise ValueError('Shape mismatch: arr1 and arr2 must have the same shape.') - arr_sum = arr1.sum() + arr2.sum() - if arr_sum == 0: - dsi = 0 + if axis is not None and axis > (arr1.ndim - 1): + raise ValueError('Axis provided {} not supported by the input arrays.'.format(axis)) + + arr_sum = arr1.sum(axis=axis) + arr2.sum(axis=axis) + if np.all(arr_sum == 0): + dsi = np.zeros(arr_sum.shape) else: intersection = np.logical_and(arr1, arr2) - dsi = (2. * intersection.sum()) / arr_sum + dsi = (2. * intersection.sum(axis=axis)) / arr_sum return dsi @@ -238,7 +245,7 @@ def get_spectrum(data: np.array, tr: float = 1.0): def threshold_map(img, min_cluster_size, threshold=None, mask=None, - binarize=True, sided='two'): + binarize=True, sided='bi'): """ Cluster-extent threshold and binarize image. @@ -255,10 +262,15 @@ def threshold_map(img, min_cluster_size, threshold=None, mask=None, Boolean array for masking resultant data array. Default is None. binarize : bool, optional Default is True. - sided : {'two', 'one', 'bi'}, optional + sided : {'bi', 'two', 'one'}, optional How to apply thresholding. One-sided thresholds on the positive side. Two-sided thresholds positive and negative values together. Bi-sided - thresholds positive and negative values separately. Default is 'two'. + thresholds positive and negative values separately. Default is 'bi'. + + Returns + ------- + clust_thresholded : (M) :obj:`numpy.ndarray` + Cluster-extent thresholded (and optionally binarized) map. """ if not isinstance(img, np.ndarray): arr = img.get_data() @@ -324,3 +336,37 @@ def threshold_map(img, min_cluster_size, threshold=None, mask=None, clust_thresholded = clust_thresholded[mask] return clust_thresholded + + +def sec2millisec(arr): + """ + Convert seconds to milliseconds. + + Parameters + ---------- + arr : array_like + Values in seconds. + + Returns + ------- + array_like + Values in milliseconds. + """ + return arr * 1000 + + +def millisec2sec(arr): + """ + Convert milliseconds to seconds. + + Parameters + ---------- + arr : array_like + Values in milliseconds. + + Returns + ------- + array_like + Values in seconds. + """ + return arr / 1000. diff --git a/tedana/workflows/t2smap.py b/tedana/workflows/t2smap.py index 162b1f643..d6ab8ca75 100644 --- a/tedana/workflows/t2smap.py +++ b/tedana/workflows/t2smap.py @@ -8,6 +8,7 @@ import argparse import numpy as np from scipy import stats +from threadpoolctl import threadpool_limits from tedana import (combine, decay, io, utils) from tedana.workflows.parser_utils import is_valid_file @@ -48,6 +49,12 @@ def _get_parser(): type=float, help='Echo times (in ms). E.g., 15.0 39.0 63.0', required=True) + optional.add_argument('--out-dir', + dest='out_dir', + type=str, + metavar='PATH', + help='Output directory.', + default='.') optional.add_argument('--mask', dest='mask', metavar='FILE', @@ -56,6 +63,17 @@ def _get_parser(): 'Dependent ANAlysis. Must be in the same ' 'space as `data`.'), default=None) + optional.add_argument('--fittype', + dest='fittype', + action='store', + choices=['loglin', 'curvefit'], + help='Desired Fitting Method' + '"loglin" means that a linear model is fit' + ' to the log of the data, default' + '"curvefit" means that a more computationally' + 'demanding monoexponential model is fit' + 'to the raw data', + default='loglin') optional.add_argument('--fitmode', dest='fitmode', action='store', @@ -73,22 +91,16 @@ def _get_parser(): help=('Combination scheme for TEs: ' 't2s (Posse 1999, default), paid (Poser)'), default='t2s') - optional.add_argument('--label', - dest='label', - type=str, - help='Label for output directory.', - default=None) - optional.add_argument('--fittype', - dest='fittype', + optional.add_argument('--n-threads', + dest='n_threads', + type=int, action='store', - choices=['loglin', 'curvefit'], - help='Desired Fitting Method' - '"loglin" means that a linear model is fit' - ' to the log of the data, default' - '"curvefit" means that a more computationally' - 'demanding monoexponential model is fit' - 'to the raw data', - default='loglin') + help=('Number of threads to use. Used by ' + 'threadpoolctl to set the parameter outside ' + 'of the workflow function. Higher numbers of ' + 'threads tend to slow down performance on ' + 'typical datasets. Default is 1.'), + default=1) optional.add_argument('--debug', dest='debug', help=argparse.SUPPRESS, @@ -103,8 +115,9 @@ def _get_parser(): return parser -def t2smap_workflow(data, tes, mask=None, fitmode='all', combmode='t2s', - label=None, debug=False, fittype='loglin', quiet=False): +def t2smap_workflow(data, tes, out_dir='.', mask=None, + fittype='loglin', fitmode='all', combmode='t2s', + debug=False, quiet=False): """ Estimate T2 and S0, and optimally combine data across TEs. @@ -115,9 +128,17 @@ def t2smap_workflow(data, tes, mask=None, fitmode='all', combmode='t2s', list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. + out_dir : :obj:`str`, optional + Output directory. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. + fittype : {'loglin', 'curvefit'}, optional + Monoexponential fitting method. + 'loglin' means to use the the default linear fit to the log of + the data. + 'curvefit' means to use a monoexponential fit to the raw data, + which is slightly slower but may be more accurate. fitmode : {'all', 'ts'}, optional Monoexponential model fitting scheme. 'all' means that the model is fit, per voxel, across all timepoints. @@ -125,48 +146,48 @@ def t2smap_workflow(data, tes, mask=None, fitmode='all', combmode='t2s', Default is 'all'. combmode : {'t2s', 'paid'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default), 'paid' (Poser). - label : :obj:`str` or :obj:`None`, optional - Label for output directory. Default is None. - fittype : {'loglin', 'curvefit'}, optional - Monoexponential fitting method. - 'loglin' means to use the the default linear fit to the log of - the data. - 'curvefit' means to use a monoexponential fit to the raw data, - which is slightly slower but may be more accurate. Other Parameters ---------------- debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional - If True, suppresses logging/printing of messages. Default is False. + If True, suppress logging/printing of messages. Default is False. Notes ----- - This workflow writes out several files, which are written out to a folder - named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label] - if not. ``ref_label`` is determined based on the name of the first ``data`` - file. - - Files are listed below: - - ====================== ================================================= - Filename Content - ====================== ================================================= - t2sv.nii Limited estimated T2* 3D map or 4D timeseries. - Will be a 3D map if ``fitmode`` is 'all' and a - 4D timeseries if it is 'ts'. - s0v.nii Limited S0 3D map or 4D timeseries. - t2svG.nii Full T2* map/timeseries. The difference between - the limited and full maps is that, for voxels - affected by dropout where only one echo contains - good data, the full map uses the single echo's - value while the limited map has a NaN. - s0vG.nii Full S0 map/timeseries. - ts_OC.nii Optimally combined timeseries. - ====================== ================================================= + This workflow writes out several files, which are described below: + + ========================== ================================================= + Filename Content + ========================== ================================================= + T2starmap.nii.gz Limited estimated T2* 3D map or 4D timeseries. + Will be a 3D map if ``fitmode`` is 'all' and a + 4D timeseries if it is 'ts'. + S0map.nii.gz Limited S0 3D map or 4D timeseries. + desc-full_T2starmap.nii.gz Full T2* map/timeseries. The difference between + the limited and full maps is that, for voxels + affected by dropout where only one echo contains + good data, the full map uses the single echo's + value while the limited map has a NaN. + desc-full_S0map.nii.gz Full S0 map/timeseries. + desc-optcom_bold.nii.gz Optimally combined timeseries. + ========================== ================================================= """ + out_dir = op.abspath(out_dir) + if not op.isdir(out_dir): + os.mkdir(out_dir) + + if debug and not quiet: + logging.basicConfig(level=logging.DEBUG) + elif quiet: + logging.basicConfig(level=logging.WARNING) + else: + logging.basicConfig(level=logging.INFO) + + LGR.info('Using output directory: {}'.format(out_dir)) + # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) @@ -180,27 +201,11 @@ def t2smap_workflow(data, tes, mask=None, fitmode='all', combmode='t2s', n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) - try: - ref_label = op.basename(ref_img).split('.')[0] - except (TypeError, AttributeError): - ref_label = op.basename(str(data[0])).split('.')[0] - - if label is not None: - out_dir = 'TED.{0}.{1}'.format(ref_label, label) - else: - out_dir = 'TED.{0}'.format(ref_label) - out_dir = op.abspath(out_dir) - if not op.isdir(out_dir): - LGR.info('Creating output directory: {}'.format(out_dir)) - os.mkdir(out_dir) - else: - LGR.info('Using output directory: {}'.format(out_dir)) - if mask is None: LGR.info('Computing adaptive mask') else: LGR.info('Using user-defined mask') - mask, masksum = utils.make_adaptive_mask(catd, getsum=True) + mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True) LGR.info('Computing adaptive T2* map') if fitmode == 'all': @@ -216,12 +221,13 @@ def t2smap_workflow(data, tes, mask=None, fitmode='all', combmode='t2s', # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5, interpolation_method='lower') - LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) + cap_t2s_sec = utils.millisec2sec(cap_t2s * 10.) + LGR.debug('Setting cap on T2* map at {:.5f}s'.format(cap_t2s_sec)) t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s LGR.info('Computing optimal combination') # optimally combine data - OCcatd = combine.make_optcom(catd, tes, mask, t2s=t2s_full, + OCcatd = combine.make_optcom(catd, tes, masksum, t2s=t2s_full, combmode=combmode) # clean up numerical errors @@ -231,24 +237,23 @@ def t2smap_workflow(data, tes, mask=None, fitmode='all', combmode='t2s', s0_limited[s0_limited < 0] = 0 t2s_limited[t2s_limited < 0] = 0 - io.filewrite(t2s_limited, op.join(out_dir, 't2sv.nii'), ref_img) - io.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img) - io.filewrite(t2s_full, op.join(out_dir, 't2svG.nii'), ref_img) - io.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img) - io.filewrite(OCcatd, op.join(out_dir, 'ts_OC.nii'), ref_img) + io.filewrite(utils.millisec2sec(t2s_limited), + op.join(out_dir, 'T2starmap.nii.gz'), ref_img) + io.filewrite(s0_limited, op.join(out_dir, 'S0map.nii.gz'), ref_img) + io.filewrite(utils.millisec2sec(t2s_full), + op.join(out_dir, 'desc-full_T2starmap.nii.gz'), ref_img) + io.filewrite(s0_full, op.join(out_dir, 'desc-full_S0map.nii.gz'), ref_img) + io.filewrite(OCcatd, op.join(out_dir, 'desc-optcom_bold.nii.gz'), ref_img) def _main(argv=None): """T2smap entry point""" options = _get_parser().parse_args(argv) - if options.debug and not options.quiet: - logging.basicConfig(level=logging.DEBUG) - elif options.quiet: - logging.basicConfig(level=logging.WARNING) - else: - logging.basicConfig(level=logging.INFO) - - t2smap_workflow(**vars(options)) + kwargs = vars(options) + n_threads = kwargs.pop('n_threads') + n_threads = None if n_threads == -1 else n_threads + with threadpool_limits(limits=n_threads, user_api=None): + t2smap_workflow(**kwargs) if __name__ == '__main__': diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index f7a441e7b..5455ddbdb 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -2,27 +2,22 @@ Run the "canonical" TE-Dependent ANAlysis workflow. """ import os - -os.environ['MKL_NUM_THREADS'] = '1' -os.environ['NUMEXPR_NUM_THREADS'] = '1' -os.environ['OMP_NUM_THREADS'] = '1' -os.environ['VECLIB_MAXIMUM_THREADS'] = '1' -os.environ['OPENBLAS_NUM_THREADS'] = '1' - +import sys +import os.path as op import shutil import logging -import os.path as op -from glob import glob import datetime +from glob import glob import argparse import numpy as np import pandas as pd from scipy import stats +from threadpoolctl import threadpool_limits from nilearn.masking import compute_epi_mask -from tedana import (decay, combine, decomposition, io, metrics, selection, utils, - viz) +from tedana import (decay, combine, decomposition, io, metrics, + reporting, selection, utils) import tedana.gscontrol as gsc from tedana.stats import computefeats2 from tedana.workflows.parser_utils import is_valid_file, ContextFilter @@ -65,6 +60,12 @@ def _get_parser(): type=float, help='Echo times (in ms). E.g., 15.0 39.0 63.0', required=True) + optional.add_argument('--out-dir', + dest='out_dir', + type=str, + metavar='PATH', + help='Output directory.', + default='.') optional.add_argument('--mask', dest='mask', metavar='FILE', @@ -76,32 +77,18 @@ def _get_parser(): "function will be used to derive a mask " "from the first echo's data."), default=None) - optional.add_argument('--mix', - dest='mixm', - metavar='FILE', - type=lambda x: is_valid_file(parser, x), - help=('File containing mixing matrix. If not ' - 'provided, ME-PCA & ME-ICA is done.'), - default=None) - optional.add_argument('--ctab', - dest='ctab', - metavar='FILE', - type=lambda x: is_valid_file(parser, x), - help=('File containing a component table from which ' - 'to extract pre-computed classifications.'), - default=None) - optional.add_argument('--manacc', - dest='manacc', - help=('Comma separated list of manually ' - 'accepted components'), - default=None) - optional.add_argument('--sourceTEs', - dest='source_tes', - type=str, - help=('Source TEs for models. E.g., 0 for all, ' - '-1 for opt. com., and 1,2 for just TEs 1 and ' - '2. Default=-1.'), - default=-1) + optional.add_argument('--fittype', + dest='fittype', + action='store', + choices=['loglin', 'curvefit'], + help=('Desired T2*/S0 fitting method. ' + '"loglin" means that a linear model is fit ' + 'to the log of the data. ' + '"curvefit" means that a more computationally ' + 'demanding monoexponential model is fit ' + 'to the raw data. ' + 'Default is "loglin".'), + default='loglin') optional.add_argument('--combmode', dest='combmode', action='store', @@ -109,11 +96,41 @@ def _get_parser(): help=('Combination scheme for TEs: ' 't2s (Posse 1999, default)'), default='t2s') - optional.add_argument('--verbose', - dest='verbose', - action='store_true', - help='Generate intermediate and additional files.', - default=False) + optional.add_argument('--tedpca', + dest='tedpca', + help=('Method with which to select components in TEDPCA. ' + 'PCA decomposition with the mdl, kic and aic options ' + 'is based on a Moving Average (stationary Gaussian) ' + 'process and are ordered from most to least aggresive. ' + 'Default=\'mdl\'.'), + choices=['kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'], + default='mdl') + optional.add_argument('--seed', + dest='fixed_seed', + metavar='INT', + type=int, + help=('Value used for random initialization of ICA ' + 'algorithm. Set to an integer value for ' + 'reproducible ICA results. Set to -1 for ' + 'varying results across ICA calls. ' + 'Default=42.'), + default=42) + optional.add_argument('--maxit', + dest='maxit', + metavar='INT', + type=int, + help=('Maximum number of iterations for ICA.'), + default=500) + optional.add_argument('--maxrestart', + dest='maxrestart', + metavar='INT', + type=int, + help=('Maximum number of attempts for ICA. If ICA ' + 'fails to converge, the fixed seed will be ' + 'updated and ICA will be run again. If ' + 'convergence is achieved before maxrestart ' + 'attempts, ICA will finish early.'), + default=10) optional.add_argument('--tedort', dest='tedort', action='store_true', @@ -131,50 +148,24 @@ def _get_parser(): 'delimited list'), choices=['t1c', 'gsr'], default=None) - optional.add_argument('--tedpca', - dest='tedpca', - help='Method with which to select components in TEDPCA', - choices=['mle', 'kundu', 'kundu-stabilize'], - default='mle') - optional.add_argument('--out-dir', - dest='out_dir', - type=str, - help='Output directory.', - default='.') - optional.add_argument('--seed', - dest='fixed_seed', - type=int, - help=('Value used for random initialization of ICA algorithm. ' - 'Set to an integer value for reproducible ICA results. ' - 'Set to -1 for varying results across ICA calls. ' - 'Default=42.'), - default=42) - optional.add_argument('--no-png', - dest='no_png', + optional.add_argument('--no-reports', + dest='no_reports', action='store_true', help=('Creates a figures folder with static component ' 'maps, timecourse plots and other diagnostic ' - 'images'), + 'images and displays these in an interactive ' + 'reporting framework'), default=False) optional.add_argument('--png-cmap', dest='png_cmap', type=str, - help=('Colormap for figures'), + help='Colormap for figures', default='coolwarm') - optional.add_argument('--maxit', - dest='maxit', - type=int, - help=('Maximum number of iterations for ICA.'), - default=500) - optional.add_argument('--maxrestart', - dest='maxrestart', - type=int, - help=('Maximum number of attempts for ICA. If ICA ' - 'fails to converge, the fixed seed will be ' - 'updated and ICA will be run again. If ' - 'convergence is achieved before maxrestart ' - 'attempts, ICA will finish early.'), - default=10) + optional.add_argument('--verbose', + dest='verbose', + action='store_true', + help='Generate intermediate and additional files.', + default=False) optional.add_argument('--lowmem', dest='low_mem', action='store_true', @@ -182,17 +173,16 @@ def _get_parser(): 'use of IncrementalPCA. May increase workflow ' 'duration.'), default=False) - optional.add_argument('--fittype', - dest='fittype', + optional.add_argument('--n-threads', + dest='n_threads', + type=int, action='store', - choices=['loglin', 'curvefit'], - help='Desired Fitting Method ' - '"loglin" means that a linear model is fit ' - 'to the log of the data, default ' - '"curvefit" means that a more computationally ' - 'demanding monoexponential model is fit ' - 'to the raw data', - default='loglin') + help=('Number of threads to use. Used by ' + 'threadpoolctl to set the parameter outside ' + 'of the workflow function. Higher numbers of ' + 'threads tend to slow down performance on ' + 'typical datasets. Default is 1.'), + default=1) optional.add_argument('--debug', dest='debug', action='store_true', @@ -207,16 +197,45 @@ def _get_parser(): default=False) optional.add_argument('-v', '--version', action='version', version=verstr) parser._action_groups.append(optional) + + rerungrp = parser.add_argument_group('arguments for rerunning the workflow') + rerungrp.add_argument('--t2smap', + dest='t2smap', + metavar='FILE', + type=lambda x: is_valid_file(parser, x), + help=('Precalculated T2* map in the same space as ' + 'the input data.'), + default=None) + rerungrp.add_argument('--mix', + dest='mixm', + metavar='FILE', + type=lambda x: is_valid_file(parser, x), + help=('File containing mixing matrix. If not ' + 'provided, ME-PCA & ME-ICA is done.'), + default=None) + rerungrp.add_argument('--ctab', + dest='ctab', + metavar='FILE', + type=lambda x: is_valid_file(parser, x), + help=('File containing a component table from which ' + 'to extract pre-computed classifications.'), + default=None) + rerungrp.add_argument('--manacc', + dest='manacc', + help=('Comma separated list of manually ' + 'accepted components'), + default=None) + return parser -def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, - tedort=False, gscontrol=None, tedpca='mle', - source_tes=-1, combmode='t2s', verbose=False, stabilize=False, - out_dir='.', fixed_seed=42, maxit=500, maxrestart=10, - debug=False, quiet=False, no_png=False, - png_cmap='coolwarm', - low_mem=False, fittype='loglin'): +def tedana_workflow(data, tes, out_dir='.', mask=None, + fittype='loglin', combmode='t2s', tedpca='mdl', + fixed_seed=42, maxit=500, maxrestart=10, + tedort=False, gscontrol=None, + no_reports=False, png_cmap='coolwarm', + verbose=False, low_mem=False, debug=False, quiet=False, + t2smap=None, mixm=None, ctab=None, manacc=None): """ Run the "canonical" TE-Dependent ANAlysis workflow. @@ -227,49 +246,50 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. - mask : :obj:`str`, optional + out_dir : :obj:`str`, optional + Output directory. + mask : :obj:`str` or None, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. - mixm : :obj:`str`, optional - File containing mixing matrix. If not provided, ME-PCA and ME-ICA are - done. - ctab : :obj:`str`, optional - File containing component table from which to extract pre-computed - classifications. - manacc : :obj:`list`, :obj:`str`, or None, optional - List of manually accepted components. Can be a list of the components, - a comma-separated string with component numbers, or None. Default is - None. + fittype : {'loglin', 'curvefit'}, optional + Monoexponential fitting method. 'loglin' uses the the default linear + fit to the log of the data. 'curvefit' uses a monoexponential fit to + the raw data, which is slightly slower but may be more accurate. + Default is 'loglin'. + combmode : {'t2s'}, optional + Combination scheme for TEs: 't2s' (Posse 1999, default). + tedpca : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional + Method with which to select components in TEDPCA. Default is 'mdl'. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. - tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional - Method with which to select components in TEDPCA. Default is 'mle'. - source_tes : :obj:`int`, optional - Source TEs for models. 0 for all, -1 for optimal combination. - Default is -1. - combmode : {'t2s'}, optional - Combination scheme for TEs: 't2s' (Posse 1999, default). - fittype : {'loglin', 'curvefit'}, optional - Monoexponential fitting method. - 'loglin' means to use the the default linear fit to the log of - the data. - 'curvefit' means to use a monoexponential fit to the raw data, - which is slightly slower but may be more accurate. verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. - no_png : obj:'bool', optional - Do not generate .png plots and figures. Default is false. + no_reports : obj:'bool', optional + Do not generate .html reports and .png plots. Default is false such + that reports are generated. png_cmap : obj:'str', optional - Name of a matplotlib colormap to be used when generating figures. - Cannot be used with --no-png. Default 'coolwarm' - out_dir : :obj:`str`, optional - Output directory. + Name of a matplotlib colormap to be used when generating figures. + Cannot be used with --no-png. Default is 'coolwarm'. + t2smap : :obj:`str`, optional + Precalculated T2* map in the same space as the input data. Values in + the map must be in seconds. + mixm : :obj:`str` or None, optional + File containing mixing matrix, to be used when re-running the workflow. + If not provided, ME-PCA and ME-ICA are done. Default is None. + ctab : :obj:`str` or None, optional + File containing component table from which to extract pre-computed + classifications, to be used with 'mixm' when re-running the workflow. + Default is None. + manacc : :obj:`list`, :obj:`str`, or None, optional + List of manually accepted components. Can be a list of the components, + a comma-separated string with component numbers, or None. Default is + None. Other Parameters ---------------- @@ -318,8 +338,8 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, # create logfile name basename = 'tedana_' extension = 'tsv' - isotime = datetime.datetime.now().replace(microsecond=0).isoformat() - logname = op.join(out_dir, (basename + isotime + '.' + extension)) + start_time = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') + logname = op.join(out_dir, (basename + start_time + '.' + extension)) # set logging format log_formatter = logging.Formatter( @@ -366,24 +386,14 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, if not isinstance(gscontrol, list): gscontrol = [gscontrol] - # coerce data to samples x echos x time array - if isinstance(data, str): - if not op.exists(data): - raise ValueError('Zcat file {} does not exist'.format(data)) - data = [data] - LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) - if no_png and (png_cmap != 'coolwarm'): - LGR.warning('Overriding --no-png since --png-cmap provided.') - no_png = False - # check if TR is 0 img_t_r = ref_img.header.get_zooms()[-1] - if img_t_r == 0 and not no_png: + if img_t_r == 0: raise IOError('Dataset has a TR of 0. This indicates incorrect' ' header information. To correct this, we recommend' ' using this snippet:' @@ -416,61 +426,80 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, if ctab and not mixm: LGR.warning('Argument "ctab" requires argument "mixm".') ctab = None - elif ctab and (manacc is None): - LGR.warning('Argument "ctab" requires argument "manacc".') - ctab = None elif manacc is not None and not mixm: LGR.warning('Argument "manacc" requires argument "mixm".') manacc = None + if t2smap is not None and op.isfile(t2smap): + t2smap = op.abspath(t2smap) + # Allow users to re-run on same folder + if t2smap != op.join(out_dir, 't2sv.nii.gz'): + shutil.copyfile(t2smap, op.join(out_dir, 't2sv.nii.gz')) + shutil.copyfile(t2smap, op.join(out_dir, op.basename(t2smap))) + elif t2smap is not None: + raise IOError('Argument "t2smap" must be an existing file.') + RepLGR.info("TE-dependence analysis was performed on input data.") - if mask is None: + if mask and not t2smap: + # TODO: add affine check + LGR.info('Using user-defined mask') + RepLGR.info("A user-defined mask was applied to the data.") + elif t2smap and not mask: + LGR.info('Using user-defined T2* map to generate mask') + t2s_limited_sec = utils.load_image(t2smap) + t2s_limited = utils.sec2millisec(t2s_limited_sec) + t2s_full = t2s_limited.copy() + mask = (t2s_limited != 0).astype(int) + elif t2smap and mask: + LGR.info('Combining user-defined mask and T2* map to generate mask') + t2s_limited_sec = utils.load_image(t2smap) + t2s_limited = utils.sec2millisec(t2s_limited_sec) + t2s_full = t2s_limited.copy() + mask = utils.load_image(mask) + mask[t2s_limited == 0] = 0 # reduce mask based on T2* map + else: LGR.info('Computing EPI mask from first echo') first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) RepLGR.info("An initial mask was generated from the first echo using " "nilearn's compute_epi_mask function.") - else: - # TODO: add affine check - LGR.info('Using user-defined mask') - RepLGR.info("A user-defined mask was applied to the data.") mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) - os.chdir(out_dir) - - LGR.info('Computing T2* map') - t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay( - catd, tes, mask, masksum, fittype) - - # set a hard cap for the T2* map - # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile - cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5, - interpolation_method='lower') - LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) - t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s - io.filewrite(t2s_limited, op.join(out_dir, 't2sv.nii'), ref_img) - io.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img) - - if verbose: - io.filewrite(t2s_full, op.join(out_dir, 't2svG.nii'), ref_img) - io.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img) + if t2smap is None: + LGR.info('Computing T2* map') + t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay( + catd, tes, mask, masksum, fittype) + + # set a hard cap for the T2* map + # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile + cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5, + interpolation_method='lower') + LGR.debug('Setting cap on T2* map at {:.5f}s'.format( + utils.millisec2sec(cap_t2s))) + t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s + io.filewrite(utils.millisec2sec(t2s_limited), op.join(out_dir, 't2sv.nii'), ref_img) + io.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img) + + if verbose: + io.filewrite(utils.millisec2sec(t2s_full), op.join(out_dir, 't2svG.nii'), ref_img) + io.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data - data_oc = combine.make_optcom(catd, tes, mask, t2s=t2s_full, combmode=combmode) + data_oc = combine.make_optcom(catd, tes, masksum, t2s=t2s_full, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: - catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img) + catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img, + out_dir=out_dir) if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, - t2s_limited, t2s_full, ref_img, + masksum, t2s_full, ref_img, tes=tes, algorithm=tedpca, - source_tes=source_tes, kdaw=10., rdaw=1., out_dir=out_dir, verbose=verbose, @@ -478,51 +507,59 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit, maxrestart) - if verbose and (source_tes == -1): + if verbose: io.filewrite(utils.unmask(dd, mask), - op.join(out_dir, 'ts_OC_whitened.nii'), ref_img) + op.join(out_dir, 'ts_OC_whitened.nii.gz'), ref_img) LGR.info('Making second component selection guess from ICA results') - # Estimate betas and compute selection metrics for mixing matrix - # generated from dimensionally reduced data using full data (i.e., data - # with thermal noise) - comptable, metric_maps, betas, mmix = metrics.dependence_metrics( - catd, data_oc, mmix_orig, t2s_limited, tes, - ref_img, reindex=True, label='meica_', out_dir=out_dir, - algorithm='kundu_v2', verbose=verbose) + required_metrics = [ + 'kappa', 'rho', 'countnoise', 'countsigFT2', 'countsigFS0', + 'dice_FT2', 'dice_FS0', 'signal-noise_t', + 'variance explained', 'normalized variance explained', + 'd_table_score' + ] + comptable, mmix = metrics.collect.generate_metrics( + catd, data_oc, mmix_orig, mask, masksum, tes, ref_img, + metrics=required_metrics, sort_by='kappa', ascending=False + ) + comp_names = [io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) - mixing_df.to_csv('ica_mixing.tsv', sep='\t', index=False) + mixing_df.to_csv(op.join(out_dir, 'ica_mixing.tsv'), sep='\t', index=False) betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) io.filewrite(betas_oc, op.join(out_dir, 'ica_components.nii.gz'), ref_img) - - comptable = metrics.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = pd.read_table(op.join(out_dir, 'ica_mixing.tsv')).values - comptable, metric_maps, betas, mmix = metrics.dependence_metrics( - catd, data_oc, mmix_orig, t2s_limited, tes, - ref_img, label='meica_', out_dir=out_dir, - algorithm='kundu_v2', verbose=verbose) - betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) - io.filewrite(betas_oc, - op.join(out_dir, 'ica_components.nii.gz'), - ref_img) if ctab is None: - comptable = metrics.kundu_metrics(comptable, metric_maps) + required_metrics = [ + 'kappa', 'rho', 'countnoise', 'countsigFT2', 'countsigFS0', + 'dice_FT2', 'dice_FS0', 'signal-noise_t', + 'variance explained', 'normalized variance explained', + 'd_table_score' + ] + comptable, mmix = metrics.collect.generate_metrics( + catd, data_oc, mmix_orig, mask, masksum, tes, ref_img, + metrics=required_metrics, sort_by='kappa', ascending=False + ) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: - comptable = pd.read_csv(ctab, sep='\t', index_col='component') - comptable = selection.manual_selection(comptable, acc=manacc) + mmix = mmix_orig.copy() + comptable = io.load_comptable(ctab) + if manacc is not None: + comptable = selection.manual_selection(comptable, acc=manacc) + betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) + io.filewrite(betas_oc, + op.join(out_dir, 'ica_components.nii.gz'), + ref_img) # Save decomposition - data_type = 'optimally combined data' if source_tes == -1 else 'z-concatenated data' - comptable['Description'] = 'ICA fit to dimensionally reduced {0}.'.format(data_type) + comptable['Description'] = 'ICA fit to dimensionally-reduced optimally combined data.' mmix_dict = {} mmix_dict['Method'] = ('Independent components analysis with FastICA ' 'algorithm implemented by sklearn. Components ' @@ -538,10 +575,10 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, mmix_orig = mmix.copy() if tedort: - acc_idx = comptable.loc[ - ~comptable.classification.str.contains('rejected')].index.values - rej_idx = comptable.loc[ - comptable.classification.str.contains('rejected')].index.values + acc_idx = comptable.loc[~comptable.classification.str. + contains('rejected')].index.values + rej_idx = comptable.loc[comptable.classification.str.contains( + 'rejected')].index.values acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] @@ -551,39 +588,47 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, comp_names = [io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) - mixing_df.to_csv('ica_orth_mixing.tsv', sep='\t', index=False) + mixing_df.to_csv(op.join(out_dir, 'ica_orth_mixing.tsv'), sep='\t', index=False) RepLGR.info("Rejected components' time series were then " "orthogonalized with respect to accepted components' time " "series.") - io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, - n_vols=n_vols, ref_img=ref_img) + io.writeresults(data_oc, + mask=mask, + comptable=comptable, + mmix=mmix, + n_vols=n_vols, + ref_img=ref_img, + out_dir=out_dir) if 't1c' in gscontrol: - gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img) + gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img, out_dir=out_dir) if verbose: - io.writeresults_echoes(catd, mmix, mask, comptable, ref_img) + io.writeresults_echoes(catd, mmix, mask, comptable, ref_img, out_dir=out_dir) - if not no_png: + if not no_reports: LGR.info('Making figures folder with static component maps and ' 'timecourse plots.') # make figure folder first if not op.isdir(op.join(out_dir, 'figures')): os.mkdir(op.join(out_dir, 'figures')) - viz.write_comp_figs(data_oc, mask=mask, comptable=comptable, - mmix=mmix_orig, ref_img=ref_img, - out_dir=op.join(out_dir, 'figures'), - png_cmap=png_cmap) - - LGR.info('Making Kappa vs Rho scatter plot') - viz.write_kappa_scatter(comptable=comptable, - out_dir=op.join(out_dir, 'figures')) - - LGR.info('Making overall summary figure') - viz.write_summary_fig(comptable=comptable, - out_dir=op.join(out_dir, 'figures')) + reporting.static_figures.comp_figures(data_oc, mask=mask, + comptable=comptable, + mmix=mmix_orig, + ref_img=ref_img, + out_dir=op.join(out_dir, + 'figures'), + png_cmap=png_cmap) + + if sys.version_info.major == 3 and sys.version_info.minor < 6: + warn_msg = ("Reports requested but Python version is less than " + "3.6.0. Dynamic reports will not be generated.") + LGR.warn(warn_msg) + else: + LGR.info('Generating dynamic report') + reporting.generate_report(out_dir=out_dir, tr=img_t_r) LGR.info('Workflow completed') @@ -622,7 +667,7 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, with open(refname, 'r') as fo: reference_list = sorted(list(set(fo.readlines()))) references = '\n'.join(reference_list) - report += '\n\nReferences\n' + references + report += '\n\nReferences:\n\n' + references with open(repname, 'w') as fo: fo.write(report) os.remove(refname) @@ -634,7 +679,11 @@ def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, def _main(argv=None): """Tedana entry point""" options = _get_parser().parse_args(argv) - tedana_workflow(**vars(options)) + kwargs = vars(options) + n_threads = kwargs.pop('n_threads') + n_threads = None if n_threads == -1 else n_threads + with threadpool_limits(limits=n_threads, user_api=None): + tedana_workflow(**kwargs) if __name__ == '__main__':