diff --git a/app/osparc/job_api.py b/app/osparc/job_api.py index 951b131..9397bdf 100644 --- a/app/osparc/job_api.py +++ b/app/osparc/job_api.py @@ -58,9 +58,7 @@ def start_python_osparc_job(dataset_info): "input_2": path_for_input_json, } - payload = start_osparc_job("python", input_file_paths) - - return payload + return start_osparc_job("python", input_file_paths) def start_matlab_osparc_job(matlab_zip_filepath): @@ -71,9 +69,7 @@ def start_matlab_osparc_job(matlab_zip_filepath): "input_1": matlab_zip_filepath, } - payload = start_osparc_job("matlab", input_file_paths) - - return payload + return start_osparc_job("matlab", input_file_paths) def start_osparc_job(job_type, input_file_paths): """ @@ -298,11 +294,11 @@ def check_job_status(job_type, job_id): # output_2 = 4.0 # we're only taking the first one - print(f"Now downloading to disk path:") + print("Now downloading to disk path:") results_file: File = outputs.results[output_result_to_use] #print(f"file id: {results_file.id}") download_path: str = files_api.download_file(file_id=results_file.id) - + print(f"Download path: {download_path}") payload = { @@ -425,6 +421,4 @@ def get_static_dir_for_job(job_id): """ takes job_id and returns the static dir for that job, where frontend can access it """ - dir_path_for_job_outputs = os.path.join(static_dir, "jobs-results", job_id) - - return dir_path_for_job_outputs + return os.path.join(static_dir, "jobs-results", job_id) diff --git a/app/routes.py b/app/routes.py index 16631d8..032422a 100644 --- a/app/routes.py +++ b/app/routes.py @@ -35,15 +35,17 @@ def create_job(): dataset_dict = request.json if dataset_dict.get("datasetIds", False) == False: - error_message = make_response("Invalid data: need a json with key 'datasetIds' and value an array of integers", 400) - return error_message + return make_response( + "Invalid data: need a json with key 'datasetIds' and value an array of integers", + 400, + ) + print("json:", request.json) payload = job_api.start_python_osparc_job(dataset_dict) - resp = make_response(json.dumps(payload), payload["status_code"]) - return resp + return make_response(json.dumps(payload), payload["status_code"]) # letting cors get setup in settings.py instead @@ -61,8 +63,7 @@ def check_job_status(job_type, job_id): elif job_type == "matlab": payload = job_api.check_matlab_job_status(job_id) - resp = make_response(json.dumps(payload), payload["status_code"]) - return resp + return make_response(json.dumps(payload), payload["status_code"]) # e.g., http://localhost:5000/api/results-images/example-job-id/Plots-3.x.png @app.route('/api/results-images//', methods=['GET']) diff --git a/assets/INPUT_FOLDER/main.py b/assets/INPUT_FOLDER/main.py index 9976ebd..af9554a 100644 --- a/assets/INPUT_FOLDER/main.py +++ b/assets/INPUT_FOLDER/main.py @@ -54,8 +54,7 @@ def keywords_finder(text): """Return keywords after removing list of not required words.""" - words = nlp(text).ents - return words + return nlp(text).ents def NestedDictValues(d): @@ -111,8 +110,6 @@ def build_similarity_matrix(sentences, stop_words): def summariser(merged_text, top_n=5): sentences = sent_tokenize(merged_text) stop_words = stopwords.words('english') - summarize_text = [] - sentence_similarity_martix = build_similarity_matrix(sentences, stop_words) sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) @@ -120,11 +117,7 @@ def summariser(merged_text, top_n=5): ranked_sentence = sorted( ((scores[i], s) for i, s in enumerate(sentences)), reverse=True) - # print("Indexes of top ranked_sentence order are ", ranked_sentence) - - for i in range(top_n): - summarize_text.append(ranked_sentence[i][1]) - + summarize_text = [ranked_sentence[i][1] for i in range(top_n)] return " ".join(summarize_text) @@ -169,11 +162,7 @@ def get_dataset_latest_version(datasetId): headers = {"Accept": "application/json"} response = requests.request("GET", url, headers=headers) response_json = json.loads(response.text) - if response.status_code == 200: - versionId = str(response_json['version']) - else: - versionId = "" - return versionId + return str(response_json['version']) if response.status_code == 200 else "" def get_dataset_file_response(datasetId, filepath): @@ -188,10 +177,7 @@ def get_dataset_file_response(datasetId, filepath): }} headers = {"Content-Type": "application/json"} response = requests.request("POST", url, json=payload, headers=headers) - if response.status_code == 200: - return response - else: - return response.reason + return response if response.status_code == 200 else response.reason def get_dataset_file_download(datasetId, filepath): @@ -206,8 +192,7 @@ def get_dataset_file_download(datasetId, filepath): }} headers = {"Content-Type": "application/json"} - response = requests.request("POST", url, json=payload, headers=headers) - return response + return requests.request("POST", url, json=payload, headers=headers) def get_dataset_description_text(datasetId): @@ -239,11 +224,10 @@ def get_dataset_protocolsio_link(datasetId): def get_protocolsio_text(datasetId): data_protocol = {} - protocol_url = get_dataset_protocolsio_link(datasetId) - if protocol_url: + if protocol_url := get_dataset_protocolsio_link(datasetId): doi = protocol_url.rsplit('/', 1)[-1] - url = "https://www.protocols.io/api/v3/protocols/" + str(doi) + url = f"https://www.protocols.io/api/v3/protocols/{str(doi)}" querystring = { "Authorization": "76d6ca8285076f48fe611091fd97eab4bc1c65051da75d7dc70ce746bd64dbe6"} headers = { @@ -321,13 +305,11 @@ def get_image_files(datasetId): # Create an in-memory stream of the content sio = io.BytesIO(response.content) img = Image.open(sio) - image_name = str(datasetId) + "-" + \ - str(os.path.basename(filepath)) + image_name = (f"{str(datasetId)}-" + str(os.path.basename(filepath))) # img.save(image_name) datafile_image[filepath] = img except: print("NOT SAVED") - pass return datafile_image @@ -338,8 +320,7 @@ def get_image_files_biolucida(datasetId): 'token': '' } response = requests.request("GET", url, headers=headers, data=payload) - datafile_image = json.loads(response.text) - return datafile_image + return json.loads(response.text) def get_images_all_datasets(list_datasetId): @@ -365,16 +346,12 @@ def get_images_all_datasets(list_datasetId): def get_knowledge_graph_data(datasetId): - # get species information from subjects file - # get specimen type and specimen anatomical location from samples.xlsx - data_knowledge_graph = {} filepath = "files/subjects.xlsx" response = get_dataset_file_response(datasetId, filepath) with io.BytesIO(response.content) as fh: df = pd.io.excel.read_excel(fh, engine='openpyxl') df.dropna(axis=0, how='all', inplace=True) - data_knowledge_graph['Species'] = df['species'].values[0] - + data_knowledge_graph = {'Species': df['species'].values[0]} filepath = "files/samples.xlsx" response = get_dataset_file_response(datasetId, filepath) with io.BytesIO(response.content) as fh: @@ -395,15 +372,13 @@ def alphanum_key(key): return [convert(c) def get_summary_table_data(datasetId): - # manifest.json: get dataset title, subtitle, publication date - # subjects.xlsx: species, n subjects, age range, sex - # samples.xlsx: n samples, specimen type, specimen anatomical location - data_table_summary = {} manifest_json = get_dataset_main_manifest(datasetId) - data_table_summary['Dataset id'] = datasetId - data_table_summary['Title'] = manifest_json['name'] - data_table_summary['Subtitle'] = manifest_json['description'] - data_table_summary['Publication_date'] = manifest_json['datePublished'] + data_table_summary = { + 'Dataset id': datasetId, + 'Title': manifest_json['name'], + 'Subtitle': manifest_json['description'], + 'Publication_date': manifest_json['datePublished'], + } # subjects file filepath = "files/subjects.xlsx" @@ -462,10 +437,7 @@ def get_all_datasets_text(list_datasetId): # protocol, and any text files in the datasets data_text = {} for datasetId in list_datasetId: - data_text[datasetId] = {} - # text from dataset description - data_text[datasetId]['description'] = get_dataset_description_text( - datasetId) + data_text[datasetId] = {'description': get_dataset_description_text(datasetId)} # text from protocol all nice and clean, includes title, description # and protocol steps data_text[datasetId]['protocol'] = get_protocolsio_text(datasetId) @@ -520,10 +492,8 @@ def get_abstract(data_text): # text_to_summarise = " ".join(text_to_summarise) text_to_summarise = " ".join(list(NestedDictValues(data_text))) - abstract = summariser(text_to_summarise, top_n=10) - # abstract = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." - return abstract + return summariser(text_to_summarise, top_n=10) def get_text_correlation(data_text): @@ -602,8 +572,7 @@ def get_all_datasets_mat_files(list_datasetId): filepath_list = [] for datasetId in list_datasetId: if datasetId in ['60', '64', '65']: - dataset_mat = get_dataset_mat_files(datasetId) - if dataset_mat: + if dataset_mat := get_dataset_mat_files(datasetId): datasetId_path = os.path.join( matlab_data_folder, str(datasetId)) os.makedirs(datasetId_path) @@ -626,7 +595,7 @@ def get_all_datasets_mat_files(list_datasetId): f.write(response.content) # with open(mat_file_path, 'w', encoding="utf-8") as f: # f.write(response.text) - if len(full_datasetId_list) > 0: + if full_datasetId_list: df["datasetId"] = full_datasetId_list df["filepath"] = filepath_list matlab_excel_file = os.path.join( @@ -690,12 +659,6 @@ def get_all_datasets_mat_files(list_datasetId): datasetIdsinput = json.load(open(input_file)) list_datasetId = datasetIdsinput['datasetIds'] list_datasetId = [str(x) for x in list_datasetId] -#list_datasetId = ['60', '64', '65', '16', '61', '89', '97'] -#list_datasetId = ['60', '64', '65'] - -# storage dict to be saved as a json and returned to front-end -dataset_data = {} - # knowledge graph data #dataset_data['knowledge_graph'] = {} # for datasetId in list_datasetId: @@ -704,10 +667,12 @@ def get_all_datasets_mat_files(list_datasetId): # summary table print("summary table") -dataset_data['summary table'] = {} -for datasetId in list_datasetId: - dataset_data['summary table'][datasetId] = get_summary_table_data( - datasetId) +dataset_data = { + 'summary table': { + datasetId: get_summary_table_data(datasetId) + for datasetId in list_datasetId + } +} # keywords print("dataset text")