-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sourcery refactored main branch #16
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,9 +58,7 @@ def start_python_osparc_job(dataset_info): | |
"input_2": path_for_input_json, | ||
} | ||
|
||
payload = start_osparc_job("python", input_file_paths) | ||
|
||
return payload | ||
return start_osparc_job("python", input_file_paths) | ||
|
||
|
||
def start_matlab_osparc_job(matlab_zip_filepath): | ||
|
@@ -71,9 +69,7 @@ def start_matlab_osparc_job(matlab_zip_filepath): | |
"input_1": matlab_zip_filepath, | ||
} | ||
|
||
payload = start_osparc_job("matlab", input_file_paths) | ||
|
||
return payload | ||
return start_osparc_job("matlab", input_file_paths) | ||
Comment on lines
-74
to
+72
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
def start_osparc_job(job_type, input_file_paths): | ||
""" | ||
|
@@ -298,11 +294,11 @@ def check_job_status(job_type, job_id): | |
# output_2 = 4.0 | ||
|
||
# we're only taking the first one | ||
print(f"Now downloading to disk path:") | ||
print("Now downloading to disk path:") | ||
results_file: File = outputs.results[output_result_to_use] | ||
#print(f"file id: {results_file.id}") | ||
download_path: str = files_api.download_file(file_id=results_file.id) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
print(f"Download path: {download_path}") | ||
|
||
payload = { | ||
|
@@ -425,6 +421,4 @@ def get_static_dir_for_job(job_id): | |
""" | ||
takes job_id and returns the static dir for that job, where frontend can access it | ||
""" | ||
dir_path_for_job_outputs = os.path.join(static_dir, "jobs-results", job_id) | ||
|
||
return dir_path_for_job_outputs | ||
return os.path.join(static_dir, "jobs-results", job_id) | ||
Comment on lines
-428
to
+424
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,15 +35,17 @@ def create_job(): | |
|
||
dataset_dict = request.json | ||
if dataset_dict.get("datasetIds", False) == False: | ||
error_message = make_response("Invalid data: need a json with key 'datasetIds' and value an array of integers", 400) | ||
return error_message | ||
return make_response( | ||
"Invalid data: need a json with key 'datasetIds' and value an array of integers", | ||
400, | ||
) | ||
|
||
|
||
print("json:", request.json) | ||
|
||
payload = job_api.start_python_osparc_job(dataset_dict) | ||
|
||
resp = make_response(json.dumps(payload), payload["status_code"]) | ||
return resp | ||
return make_response(json.dumps(payload), payload["status_code"]) | ||
Comment on lines
-38
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
# letting cors get setup in settings.py instead | ||
|
@@ -61,8 +63,7 @@ def check_job_status(job_type, job_id): | |
elif job_type == "matlab": | ||
payload = job_api.check_matlab_job_status(job_id) | ||
|
||
resp = make_response(json.dumps(payload), payload["status_code"]) | ||
return resp | ||
return make_response(json.dumps(payload), payload["status_code"]) | ||
Comment on lines
-64
to
+66
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
# e.g., http://localhost:5000/api/results-images/example-job-id/Plots-3.x.png | ||
@app.route('/api/results-images/<string:job_id>/<string:image_name>', methods=['GET']) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,8 +54,7 @@ | |
|
||
def keywords_finder(text): | ||
"""Return keywords after removing list of not required words.""" | ||
words = nlp(text).ents | ||
return words | ||
return nlp(text).ents | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def NestedDictValues(d): | ||
|
@@ -111,20 +110,14 @@ def build_similarity_matrix(sentences, stop_words): | |
def summariser(merged_text, top_n=5): | ||
sentences = sent_tokenize(merged_text) | ||
stop_words = stopwords.words('english') | ||
summarize_text = [] | ||
|
||
Comment on lines
-114
to
-115
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ):
|
||
sentence_similarity_martix = build_similarity_matrix(sentences, stop_words) | ||
|
||
sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) | ||
scores = nx.pagerank(sentence_similarity_graph) | ||
|
||
ranked_sentence = sorted( | ||
((scores[i], s) for i, s in enumerate(sentences)), reverse=True) | ||
# print("Indexes of top ranked_sentence order are ", ranked_sentence) | ||
|
||
for i in range(top_n): | ||
summarize_text.append(ranked_sentence[i][1]) | ||
|
||
summarize_text = [ranked_sentence[i][1] for i in range(top_n)] | ||
return " ".join(summarize_text) | ||
|
||
|
||
|
@@ -169,11 +162,7 @@ def get_dataset_latest_version(datasetId): | |
headers = {"Accept": "application/json"} | ||
response = requests.request("GET", url, headers=headers) | ||
response_json = json.loads(response.text) | ||
if response.status_code == 200: | ||
versionId = str(response_json['version']) | ||
else: | ||
versionId = "" | ||
return versionId | ||
return str(response_json['version']) if response.status_code == 200 else "" | ||
Comment on lines
-172
to
+165
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def get_dataset_file_response(datasetId, filepath): | ||
|
@@ -188,10 +177,7 @@ def get_dataset_file_response(datasetId, filepath): | |
}} | ||
headers = {"Content-Type": "application/json"} | ||
response = requests.request("POST", url, json=payload, headers=headers) | ||
if response.status_code == 200: | ||
return response | ||
else: | ||
return response.reason | ||
return response if response.status_code == 200 else response.reason | ||
Comment on lines
-191
to
+180
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def get_dataset_file_download(datasetId, filepath): | ||
|
@@ -206,8 +192,7 @@ def get_dataset_file_download(datasetId, filepath): | |
}} | ||
headers = {"Content-Type": "application/json"} | ||
|
||
response = requests.request("POST", url, json=payload, headers=headers) | ||
return response | ||
return requests.request("POST", url, json=payload, headers=headers) | ||
Comment on lines
-209
to
+195
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def get_dataset_description_text(datasetId): | ||
|
@@ -239,11 +224,10 @@ def get_dataset_protocolsio_link(datasetId): | |
|
||
def get_protocolsio_text(datasetId): | ||
data_protocol = {} | ||
protocol_url = get_dataset_protocolsio_link(datasetId) | ||
if protocol_url: | ||
if protocol_url := get_dataset_protocolsio_link(datasetId): | ||
doi = protocol_url.rsplit('/', 1)[-1] | ||
|
||
url = "https://www.protocols.io/api/v3/protocols/" + str(doi) | ||
url = f"https://www.protocols.io/api/v3/protocols/{str(doi)}" | ||
Comment on lines
-242
to
+230
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
querystring = { | ||
"Authorization": "76d6ca8285076f48fe611091fd97eab4bc1c65051da75d7dc70ce746bd64dbe6"} | ||
headers = { | ||
|
@@ -321,13 +305,11 @@ def get_image_files(datasetId): | |
# Create an in-memory stream of the content | ||
sio = io.BytesIO(response.content) | ||
img = Image.open(sio) | ||
image_name = str(datasetId) + "-" + \ | ||
str(os.path.basename(filepath)) | ||
image_name = (f"{str(datasetId)}-" + str(os.path.basename(filepath))) | ||
# img.save(image_name) | ||
datafile_image[filepath] = img | ||
except: | ||
print("NOT SAVED") | ||
pass | ||
Comment on lines
-324
to
-330
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
return datafile_image | ||
|
||
|
||
|
@@ -338,8 +320,7 @@ def get_image_files_biolucida(datasetId): | |
'token': '' | ||
} | ||
response = requests.request("GET", url, headers=headers, data=payload) | ||
datafile_image = json.loads(response.text) | ||
return datafile_image | ||
return json.loads(response.text) | ||
Comment on lines
-341
to
+323
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def get_images_all_datasets(list_datasetId): | ||
|
@@ -365,16 +346,12 @@ def get_images_all_datasets(list_datasetId): | |
|
||
|
||
def get_knowledge_graph_data(datasetId): | ||
# get species information from subjects file | ||
# get specimen type and specimen anatomical location from samples.xlsx | ||
data_knowledge_graph = {} | ||
filepath = "files/subjects.xlsx" | ||
response = get_dataset_file_response(datasetId, filepath) | ||
with io.BytesIO(response.content) as fh: | ||
df = pd.io.excel.read_excel(fh, engine='openpyxl') | ||
df.dropna(axis=0, how='all', inplace=True) | ||
data_knowledge_graph['Species'] = df['species'].values[0] | ||
|
||
data_knowledge_graph = {'Species': df['species'].values[0]} | ||
Comment on lines
-368
to
+354
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ):
|
||
filepath = "files/samples.xlsx" | ||
response = get_dataset_file_response(datasetId, filepath) | ||
with io.BytesIO(response.content) as fh: | ||
|
@@ -395,15 +372,13 @@ def alphanum_key(key): return [convert(c) | |
|
||
|
||
def get_summary_table_data(datasetId): | ||
# manifest.json: get dataset title, subtitle, publication date | ||
# subjects.xlsx: species, n subjects, age range, sex | ||
# samples.xlsx: n samples, specimen type, specimen anatomical location | ||
data_table_summary = {} | ||
manifest_json = get_dataset_main_manifest(datasetId) | ||
data_table_summary['Dataset id'] = datasetId | ||
data_table_summary['Title'] = manifest_json['name'] | ||
data_table_summary['Subtitle'] = manifest_json['description'] | ||
data_table_summary['Publication_date'] = manifest_json['datePublished'] | ||
data_table_summary = { | ||
'Dataset id': datasetId, | ||
'Title': manifest_json['name'], | ||
'Subtitle': manifest_json['description'], | ||
'Publication_date': manifest_json['datePublished'], | ||
} | ||
Comment on lines
-398
to
+381
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ):
|
||
|
||
# subjects file | ||
filepath = "files/subjects.xlsx" | ||
|
@@ -462,10 +437,7 @@ def get_all_datasets_text(list_datasetId): | |
# protocol, and any text files in the datasets | ||
data_text = {} | ||
for datasetId in list_datasetId: | ||
data_text[datasetId] = {} | ||
# text from dataset description | ||
data_text[datasetId]['description'] = get_dataset_description_text( | ||
datasetId) | ||
data_text[datasetId] = {'description': get_dataset_description_text(datasetId)} | ||
Comment on lines
-465
to
+440
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ):
|
||
# text from protocol all nice and clean, includes title, description | ||
# and protocol steps | ||
data_text[datasetId]['protocol'] = get_protocolsio_text(datasetId) | ||
|
@@ -520,10 +492,8 @@ def get_abstract(data_text): | |
# text_to_summarise = " ".join(text_to_summarise) | ||
|
||
text_to_summarise = " ".join(list(NestedDictValues(data_text))) | ||
abstract = summariser(text_to_summarise, top_n=10) | ||
|
||
# abstract = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." | ||
return abstract | ||
return summariser(text_to_summarise, top_n=10) | ||
Comment on lines
-523
to
+496
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def get_text_correlation(data_text): | ||
|
@@ -602,8 +572,7 @@ def get_all_datasets_mat_files(list_datasetId): | |
filepath_list = [] | ||
for datasetId in list_datasetId: | ||
if datasetId in ['60', '64', '65']: | ||
dataset_mat = get_dataset_mat_files(datasetId) | ||
if dataset_mat: | ||
if dataset_mat := get_dataset_mat_files(datasetId): | ||
Comment on lines
-605
to
+575
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
datasetId_path = os.path.join( | ||
matlab_data_folder, str(datasetId)) | ||
os.makedirs(datasetId_path) | ||
|
@@ -626,7 +595,7 @@ def get_all_datasets_mat_files(list_datasetId): | |
f.write(response.content) | ||
# with open(mat_file_path, 'w', encoding="utf-8") as f: | ||
# f.write(response.text) | ||
if len(full_datasetId_list) > 0: | ||
if full_datasetId_list: | ||
df["datasetId"] = full_datasetId_list | ||
df["filepath"] = filepath_list | ||
matlab_excel_file = os.path.join( | ||
|
@@ -690,12 +659,6 @@ def get_all_datasets_mat_files(list_datasetId): | |
datasetIdsinput = json.load(open(input_file)) | ||
list_datasetId = datasetIdsinput['datasetIds'] | ||
list_datasetId = [str(x) for x in list_datasetId] | ||
#list_datasetId = ['60', '64', '65', '16', '61', '89', '97'] | ||
#list_datasetId = ['60', '64', '65'] | ||
|
||
# storage dict to be saved as a json and returned to front-end | ||
dataset_data = {} | ||
|
||
Comment on lines
-693
to
-698
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
This removes the following comments ( why? ):
|
||
# knowledge graph data | ||
#dataset_data['knowledge_graph'] = {} | ||
# for datasetId in list_datasetId: | ||
|
@@ -704,10 +667,12 @@ def get_all_datasets_mat_files(list_datasetId): | |
|
||
# summary table | ||
print("summary table") | ||
dataset_data['summary table'] = {} | ||
for datasetId in list_datasetId: | ||
dataset_data['summary table'][datasetId] = get_summary_table_data( | ||
datasetId) | ||
dataset_data = { | ||
'summary table': { | ||
datasetId: get_summary_table_data(datasetId) | ||
for datasetId in list_datasetId | ||
} | ||
} | ||
|
||
# keywords | ||
print("dataset text") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
start_python_osparc_job
refactored with the following changes:inline-immediately-returned-variable
)