From b8ab4a99e9bdcf1b09531f10b78970e23a6467c2 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 4 Nov 2021 21:16:33 -0600 Subject: [PATCH 1/5] fix Overflow in download analysis metadata --- qiita_db/handlers/analysis.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/qiita_db/handlers/analysis.py b/qiita_db/handlers/analysis.py index a3cf77d2e..a95fa8c45 100644 --- a/qiita_db/handlers/analysis.py +++ b/qiita_db/handlers/analysis.py @@ -6,7 +6,10 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from tornado import gen from tornado.web import HTTPError +from iostream import StreamClosedError +from json import dumps import qiita_db as qdb from .oauth2 import OauthBaseHandler, authenticate_oauth @@ -44,7 +47,7 @@ def _get_analysis(a_id): class APIAnalysisMetadataHandler(OauthBaseHandler): @authenticate_oauth - def get(self, analysis_id): + async def get(self, analysis_id): """Retrieves the analysis metadata Parameters @@ -56,15 +59,34 @@ def get(self, analysis_id): ------- dict The contents of the analysis keyed by sample id + + Notes + ----- + This response needed to be broken in chunks because we were hitting + the max size of a respose: 2G; based on: https://bit.ly/3CPvyjd """ + chunk_len = 1024 * 1024 * 1 # 1 MiB + with qdb.sql_connection.TRN: a = _get_analysis(analysis_id) mf_fp = qdb.util.get_filepath_information( a.mapping_file)['fullpath'] - response = None if mf_fp is not None: df = qdb.metadata_template.util.load_template_to_dataframe( mf_fp, index='#SampleID') - response = df.to_dict(orient='index') + response = dumps(df.to_dict(orient='index')) - self.write(response) + crange = range(chunk_len, len(response)+chunk_len, chunk_len) + for i, (win) in enumerate(crange): + chunk = response[i*chunk_len:win] + try: + self.write(chunk) + await self.flush() + except StreamClosedError: + break + finally: + del chunk + # pause the coroutine so other handlers can run + await gen.sleep(0.000000001) # 1 nanosecond + else: + self.write(None) From 554bd63bef2ee36677f9c98c998649bfb81893ba Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 4 Nov 2021 21:42:56 -0600 Subject: [PATCH 2/5] iostream -> tornado.iostream --- qiita_db/handlers/analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_db/handlers/analysis.py b/qiita_db/handlers/analysis.py index a95fa8c45..3eadcc824 100644 --- a/qiita_db/handlers/analysis.py +++ b/qiita_db/handlers/analysis.py @@ -8,7 +8,7 @@ from tornado import gen from tornado.web import HTTPError -from iostream import StreamClosedError +from tornado.iostream import StreamClosedError from json import dumps import qiita_db as qdb From 7b9cc9a4d6a17c0cf915f21e9f2eb43d489c0e41 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Mon, 8 Nov 2021 06:35:51 -0700 Subject: [PATCH 3/5] addressing @wasade review --- qiita_db/handlers/analysis.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/qiita_db/handlers/analysis.py b/qiita_db/handlers/analysis.py index 3eadcc824..56a3f7104 100644 --- a/qiita_db/handlers/analysis.py +++ b/qiita_db/handlers/analysis.py @@ -8,7 +8,6 @@ from tornado import gen from tornado.web import HTTPError -from tornado.iostream import StreamClosedError from json import dumps import qiita_db as qdb @@ -67,6 +66,7 @@ async def get(self, analysis_id): """ chunk_len = 1024 * 1024 * 1 # 1 MiB + respose = None with qdb.sql_connection.TRN: a = _get_analysis(analysis_id) mf_fp = qdb.util.get_filepath_information( @@ -76,17 +76,19 @@ async def get(self, analysis_id): mf_fp, index='#SampleID') response = dumps(df.to_dict(orient='index')) - crange = range(chunk_len, len(response)+chunk_len, chunk_len) - for i, (win) in enumerate(crange): - chunk = response[i*chunk_len:win] - try: - self.write(chunk) - await self.flush() - except StreamClosedError: - break - finally: - del chunk - # pause the coroutine so other handlers can run - await gen.sleep(0.000000001) # 1 nanosecond - else: - self.write(None) + if respose is not None: + crange = range(chunk_len, len(response)+chunk_len, chunk_len) + for i, (win) in enumerate(crange): + # sending the chunk and flushing + chunk = response[i*chunk_len:win] + self.write(chunk) + await self.flush() + + # cleaning chuck and pause the coroutine so other handlers + # can run, note that this is required/important based on the + # original implementation in https://bit.ly/3CPvyjd + del chunk + await gen.sleep(0.000000001) # 1 nanosecond + + else: + respose.write(None) From 2894850241116c9af223cf35ceb9e04046f97ba1 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Tue, 9 Nov 2021 07:30:01 -0700 Subject: [PATCH 4/5] setup.py --- scripts/qiita-test-install | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/qiita-test-install b/scripts/qiita-test-install index b1f3a0c35..7ffa7d9b4 100755 --- a/scripts/qiita-test-install +++ b/scripts/qiita-test-install @@ -232,11 +232,11 @@ class QiitaConfig(TestCase): string_acceptable_version)) def test_redbiom_version(self): - acceptable_version = (0, 3, 5) + acceptable_version = (0, 3, 8) string_acceptable_version = '.'.join(map(str, acceptable_version)) version = tuple(map(int, redbiom_lib_version.split('.'))) - self.assertTrue(acceptable_version == version, + self.assertTrue(acceptable_version >= version, 'Unsupported redbiom version. You have %s but the ' 'minimum required version is %s' % ('.'.join(map(str, version)), diff --git a/setup.py b/setup.py index 0991acb6a..808791617 100644 --- a/setup.py +++ b/setup.py @@ -105,7 +105,7 @@ install_requires=['psycopg2', 'click', 'bcrypt', 'pandas', 'biom-format', 'tornado<6.0', 'toredis', 'redis', 'scp', 'pyparsing', 'h5py', 'natsort', 'nose', 'pep8', - 'networkx', 'humanize', 'scikit-bio', 'wtforms', + 'networkx', 'humanize', 'scikit-bio', 'wtforms<3.0.0', 'openpyxl', 'sphinx-bootstrap-theme', 'Sphinx', 'nltk', 'gitpython', 'redbiom', 'pyzmq', 'sphinx_rtd_theme', 'paramiko', 'seaborn', 'matplotlib', 'scipy', 'nose', From 3635b063a2fe1ed7bbfc65f54ec802ce8950c120 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Tue, 9 Nov 2021 11:00:11 -0700 Subject: [PATCH 5/5] respose -> response --- qiita_db/handlers/analysis.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qiita_db/handlers/analysis.py b/qiita_db/handlers/analysis.py index 56a3f7104..1d4dddd5e 100644 --- a/qiita_db/handlers/analysis.py +++ b/qiita_db/handlers/analysis.py @@ -66,7 +66,7 @@ async def get(self, analysis_id): """ chunk_len = 1024 * 1024 * 1 # 1 MiB - respose = None + response = None with qdb.sql_connection.TRN: a = _get_analysis(analysis_id) mf_fp = qdb.util.get_filepath_information( @@ -76,7 +76,7 @@ async def get(self, analysis_id): mf_fp, index='#SampleID') response = dumps(df.to_dict(orient='index')) - if respose is not None: + if response is not None: crange = range(chunk_len, len(response)+chunk_len, chunk_len) for i, (win) in enumerate(crange): # sending the chunk and flushing @@ -91,4 +91,4 @@ async def get(self, analysis_id): await gen.sleep(0.000000001) # 1 nanosecond else: - respose.write(None) + self.write(None)