Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sourcery refactored main branch #16

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions app/osparc/job_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ def start_python_osparc_job(dataset_info):
"input_2": path_for_input_json,
}

payload = start_osparc_job("python", input_file_paths)

return payload
return start_osparc_job("python", input_file_paths)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function start_python_osparc_job refactored with the following changes:



def start_matlab_osparc_job(matlab_zip_filepath):
Expand All @@ -71,9 +69,7 @@ def start_matlab_osparc_job(matlab_zip_filepath):
"input_1": matlab_zip_filepath,
}

payload = start_osparc_job("matlab", input_file_paths)

return payload
return start_osparc_job("matlab", input_file_paths)
Comment on lines -74 to +72
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function start_matlab_osparc_job refactored with the following changes:


def start_osparc_job(job_type, input_file_paths):
"""
Expand Down Expand Up @@ -298,11 +294,11 @@ def check_job_status(job_type, job_id):
# output_2 = 4.0

# we're only taking the first one
print(f"Now downloading to disk path:")
print("Now downloading to disk path:")
results_file: File = outputs.results[output_result_to_use]
#print(f"file id: {results_file.id}")
download_path: str = files_api.download_file(file_id=results_file.id)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function check_job_status refactored with the following changes:

print(f"Download path: {download_path}")

payload = {
Expand Down Expand Up @@ -425,6 +421,4 @@ def get_static_dir_for_job(job_id):
"""
takes job_id and returns the static dir for that job, where frontend can access it
"""
dir_path_for_job_outputs = os.path.join(static_dir, "jobs-results", job_id)

return dir_path_for_job_outputs
return os.path.join(static_dir, "jobs-results", job_id)
Comment on lines -428 to +424
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_static_dir_for_job refactored with the following changes:

13 changes: 7 additions & 6 deletions app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,17 @@ def create_job():

dataset_dict = request.json
if dataset_dict.get("datasetIds", False) == False:
error_message = make_response("Invalid data: need a json with key 'datasetIds' and value an array of integers", 400)
return error_message
return make_response(
"Invalid data: need a json with key 'datasetIds' and value an array of integers",
400,
)


print("json:", request.json)

payload = job_api.start_python_osparc_job(dataset_dict)

resp = make_response(json.dumps(payload), payload["status_code"])
return resp
return make_response(json.dumps(payload), payload["status_code"])
Comment on lines -38 to +48
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function set_routes.create_job refactored with the following changes:



# letting cors get setup in settings.py instead
Expand All @@ -61,8 +63,7 @@ def check_job_status(job_type, job_id):
elif job_type == "matlab":
payload = job_api.check_matlab_job_status(job_id)

resp = make_response(json.dumps(payload), payload["status_code"])
return resp
return make_response(json.dumps(payload), payload["status_code"])
Comment on lines -64 to +66
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function set_routes.check_job_status refactored with the following changes:


# e.g., http://localhost:5000/api/results-images/example-job-id/Plots-3.x.png
@app.route('/api/results-images/<string:job_id>/<string:image_name>', methods=['GET'])
Expand Down
87 changes: 26 additions & 61 deletions assets/INPUT_FOLDER/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@

def keywords_finder(text):
"""Return keywords after removing list of not required words."""
words = nlp(text).ents
return words
return nlp(text).ents
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function keywords_finder refactored with the following changes:



def NestedDictValues(d):
Expand Down Expand Up @@ -111,20 +110,14 @@ def build_similarity_matrix(sentences, stop_words):
def summariser(merged_text, top_n=5):
sentences = sent_tokenize(merged_text)
stop_words = stopwords.words('english')
summarize_text = []

Comment on lines -114 to -115
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function summariser refactored with the following changes:

This removes the following comments ( why? ):

# print("Indexes of top ranked_sentence order are ", ranked_sentence)

sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
scores = nx.pagerank(sentence_similarity_graph)

ranked_sentence = sorted(
((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
# print("Indexes of top ranked_sentence order are ", ranked_sentence)

for i in range(top_n):
summarize_text.append(ranked_sentence[i][1])

summarize_text = [ranked_sentence[i][1] for i in range(top_n)]
return " ".join(summarize_text)


Expand Down Expand Up @@ -169,11 +162,7 @@ def get_dataset_latest_version(datasetId):
headers = {"Accept": "application/json"}
response = requests.request("GET", url, headers=headers)
response_json = json.loads(response.text)
if response.status_code == 200:
versionId = str(response_json['version'])
else:
versionId = ""
return versionId
return str(response_json['version']) if response.status_code == 200 else ""
Comment on lines -172 to +165
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_dataset_latest_version refactored with the following changes:



def get_dataset_file_response(datasetId, filepath):
Expand All @@ -188,10 +177,7 @@ def get_dataset_file_response(datasetId, filepath):
}}
headers = {"Content-Type": "application/json"}
response = requests.request("POST", url, json=payload, headers=headers)
if response.status_code == 200:
return response
else:
return response.reason
return response if response.status_code == 200 else response.reason
Comment on lines -191 to +180
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_dataset_file_response refactored with the following changes:



def get_dataset_file_download(datasetId, filepath):
Expand All @@ -206,8 +192,7 @@ def get_dataset_file_download(datasetId, filepath):
}}
headers = {"Content-Type": "application/json"}

response = requests.request("POST", url, json=payload, headers=headers)
return response
return requests.request("POST", url, json=payload, headers=headers)
Comment on lines -209 to +195
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_dataset_file_download refactored with the following changes:



def get_dataset_description_text(datasetId):
Expand Down Expand Up @@ -239,11 +224,10 @@ def get_dataset_protocolsio_link(datasetId):

def get_protocolsio_text(datasetId):
data_protocol = {}
protocol_url = get_dataset_protocolsio_link(datasetId)
if protocol_url:
if protocol_url := get_dataset_protocolsio_link(datasetId):
doi = protocol_url.rsplit('/', 1)[-1]

url = "https://www.protocols.io/api/v3/protocols/" + str(doi)
url = f"https://www.protocols.io/api/v3/protocols/{str(doi)}"
Comment on lines -242 to +230
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_protocolsio_text refactored with the following changes:

querystring = {
"Authorization": "76d6ca8285076f48fe611091fd97eab4bc1c65051da75d7dc70ce746bd64dbe6"}
headers = {
Expand Down Expand Up @@ -321,13 +305,11 @@ def get_image_files(datasetId):
# Create an in-memory stream of the content
sio = io.BytesIO(response.content)
img = Image.open(sio)
image_name = str(datasetId) + "-" + \
str(os.path.basename(filepath))
image_name = (f"{str(datasetId)}-" + str(os.path.basename(filepath)))
# img.save(image_name)
datafile_image[filepath] = img
except:
print("NOT SAVED")
pass
Comment on lines -324 to -330
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_image_files refactored with the following changes:

return datafile_image


Expand All @@ -338,8 +320,7 @@ def get_image_files_biolucida(datasetId):
'token': ''
}
response = requests.request("GET", url, headers=headers, data=payload)
datafile_image = json.loads(response.text)
return datafile_image
return json.loads(response.text)
Comment on lines -341 to +323
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_image_files_biolucida refactored with the following changes:



def get_images_all_datasets(list_datasetId):
Expand All @@ -365,16 +346,12 @@ def get_images_all_datasets(list_datasetId):


def get_knowledge_graph_data(datasetId):
# get species information from subjects file
# get specimen type and specimen anatomical location from samples.xlsx
data_knowledge_graph = {}
filepath = "files/subjects.xlsx"
response = get_dataset_file_response(datasetId, filepath)
with io.BytesIO(response.content) as fh:
df = pd.io.excel.read_excel(fh, engine='openpyxl')
df.dropna(axis=0, how='all', inplace=True)
data_knowledge_graph['Species'] = df['species'].values[0]

data_knowledge_graph = {'Species': df['species'].values[0]}
Comment on lines -368 to +354
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_knowledge_graph_data refactored with the following changes:

This removes the following comments ( why? ):

# get specimen type and specimen anatomical location from samples.xlsx
# get species information from subjects file

filepath = "files/samples.xlsx"
response = get_dataset_file_response(datasetId, filepath)
with io.BytesIO(response.content) as fh:
Expand All @@ -395,15 +372,13 @@ def alphanum_key(key): return [convert(c)


def get_summary_table_data(datasetId):
# manifest.json: get dataset title, subtitle, publication date
# subjects.xlsx: species, n subjects, age range, sex
# samples.xlsx: n samples, specimen type, specimen anatomical location
data_table_summary = {}
manifest_json = get_dataset_main_manifest(datasetId)
data_table_summary['Dataset id'] = datasetId
data_table_summary['Title'] = manifest_json['name']
data_table_summary['Subtitle'] = manifest_json['description']
data_table_summary['Publication_date'] = manifest_json['datePublished']
data_table_summary = {
'Dataset id': datasetId,
'Title': manifest_json['name'],
'Subtitle': manifest_json['description'],
'Publication_date': manifest_json['datePublished'],
}
Comment on lines -398 to +381
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_summary_table_data refactored with the following changes:

This removes the following comments ( why? ):

# subjects.xlsx: species, n subjects, age range, sex
# samples.xlsx: n samples, specimen type, specimen anatomical location
# manifest.json: get dataset title, subtitle, publication date


# subjects file
filepath = "files/subjects.xlsx"
Expand Down Expand Up @@ -462,10 +437,7 @@ def get_all_datasets_text(list_datasetId):
# protocol, and any text files in the datasets
data_text = {}
for datasetId in list_datasetId:
data_text[datasetId] = {}
# text from dataset description
data_text[datasetId]['description'] = get_dataset_description_text(
datasetId)
data_text[datasetId] = {'description': get_dataset_description_text(datasetId)}
Comment on lines -465 to +440
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_all_datasets_text refactored with the following changes:

This removes the following comments ( why? ):

# text from dataset description

# text from protocol all nice and clean, includes title, description
# and protocol steps
data_text[datasetId]['protocol'] = get_protocolsio_text(datasetId)
Expand Down Expand Up @@ -520,10 +492,8 @@ def get_abstract(data_text):
# text_to_summarise = " ".join(text_to_summarise)

text_to_summarise = " ".join(list(NestedDictValues(data_text)))
abstract = summariser(text_to_summarise, top_n=10)

# abstract = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
return abstract
return summariser(text_to_summarise, top_n=10)
Comment on lines -523 to +496
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_abstract refactored with the following changes:



def get_text_correlation(data_text):
Expand Down Expand Up @@ -602,8 +572,7 @@ def get_all_datasets_mat_files(list_datasetId):
filepath_list = []
for datasetId in list_datasetId:
if datasetId in ['60', '64', '65']:
dataset_mat = get_dataset_mat_files(datasetId)
if dataset_mat:
if dataset_mat := get_dataset_mat_files(datasetId):
Comment on lines -605 to +575
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_all_datasets_mat_files refactored with the following changes:

datasetId_path = os.path.join(
matlab_data_folder, str(datasetId))
os.makedirs(datasetId_path)
Expand All @@ -626,7 +595,7 @@ def get_all_datasets_mat_files(list_datasetId):
f.write(response.content)
# with open(mat_file_path, 'w', encoding="utf-8") as f:
# f.write(response.text)
if len(full_datasetId_list) > 0:
if full_datasetId_list:
df["datasetId"] = full_datasetId_list
df["filepath"] = filepath_list
matlab_excel_file = os.path.join(
Expand Down Expand Up @@ -690,12 +659,6 @@ def get_all_datasets_mat_files(list_datasetId):
datasetIdsinput = json.load(open(input_file))
list_datasetId = datasetIdsinput['datasetIds']
list_datasetId = [str(x) for x in list_datasetId]
#list_datasetId = ['60', '64', '65', '16', '61', '89', '97']
#list_datasetId = ['60', '64', '65']

# storage dict to be saved as a json and returned to front-end
dataset_data = {}

Comment on lines -693 to -698
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 693-710 refactored with the following changes:

This removes the following comments ( why? ):

#list_datasetId = ['60', '64', '65', '16', '61', '89', '97']
# storage dict to be saved as a json and returned to front-end
#list_datasetId = ['60', '64', '65']

# knowledge graph data
#dataset_data['knowledge_graph'] = {}
# for datasetId in list_datasetId:
Expand All @@ -704,10 +667,12 @@ def get_all_datasets_mat_files(list_datasetId):

# summary table
print("summary table")
dataset_data['summary table'] = {}
for datasetId in list_datasetId:
dataset_data['summary table'][datasetId] = get_summary_table_data(
datasetId)
dataset_data = {
'summary table': {
datasetId: get_summary_table_data(datasetId)
for datasetId in list_datasetId
}
}

# keywords
print("dataset text")
Expand Down