Skip to content

Commit

Permalink
'Refactored by Sourcery'
Browse files Browse the repository at this point in the history
  • Loading branch information
Sourcery AI committed May 25, 2022
1 parent 9ba2afb commit 26e3365
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 78 deletions.
16 changes: 5 additions & 11 deletions app/osparc/job_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ def start_python_osparc_job(dataset_info):
"input_2": path_for_input_json,
}

payload = start_osparc_job("python", input_file_paths)

return payload
return start_osparc_job("python", input_file_paths)


def start_matlab_osparc_job(matlab_zip_filepath):
Expand All @@ -71,9 +69,7 @@ def start_matlab_osparc_job(matlab_zip_filepath):
"input_1": matlab_zip_filepath,
}

payload = start_osparc_job("matlab", input_file_paths)

return payload
return start_osparc_job("matlab", input_file_paths)

def start_osparc_job(job_type, input_file_paths):
"""
Expand Down Expand Up @@ -298,11 +294,11 @@ def check_job_status(job_type, job_id):
# output_2 = 4.0

# we're only taking the first one
print(f"Now downloading to disk path:")
print("Now downloading to disk path:")
results_file: File = outputs.results[output_result_to_use]
#print(f"file id: {results_file.id}")
download_path: str = files_api.download_file(file_id=results_file.id)

print(f"Download path: {download_path}")

payload = {
Expand Down Expand Up @@ -425,6 +421,4 @@ def get_static_dir_for_job(job_id):
"""
takes job_id and returns the static dir for that job, where frontend can access it
"""
dir_path_for_job_outputs = os.path.join(static_dir, "jobs-results", job_id)

return dir_path_for_job_outputs
return os.path.join(static_dir, "jobs-results", job_id)
13 changes: 7 additions & 6 deletions app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,17 @@ def create_job():

dataset_dict = request.json
if dataset_dict.get("datasetIds", False) == False:
error_message = make_response("Invalid data: need a json with key 'datasetIds' and value an array of integers", 400)
return error_message
return make_response(
"Invalid data: need a json with key 'datasetIds' and value an array of integers",
400,
)


print("json:", request.json)

payload = job_api.start_python_osparc_job(dataset_dict)

resp = make_response(json.dumps(payload), payload["status_code"])
return resp
return make_response(json.dumps(payload), payload["status_code"])


# letting cors get setup in settings.py instead
Expand All @@ -61,8 +63,7 @@ def check_job_status(job_type, job_id):
elif job_type == "matlab":
payload = job_api.check_matlab_job_status(job_id)

resp = make_response(json.dumps(payload), payload["status_code"])
return resp
return make_response(json.dumps(payload), payload["status_code"])

# e.g., http://localhost:5000/api/results-images/example-job-id/Plots-3.x.png
@app.route('/api/results-images/<string:job_id>/<string:image_name>', methods=['GET'])
Expand Down
87 changes: 26 additions & 61 deletions assets/INPUT_FOLDER/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@

def keywords_finder(text):
"""Return keywords after removing list of not required words."""
words = nlp(text).ents
return words
return nlp(text).ents


def NestedDictValues(d):
Expand Down Expand Up @@ -111,20 +110,14 @@ def build_similarity_matrix(sentences, stop_words):
def summariser(merged_text, top_n=5):
sentences = sent_tokenize(merged_text)
stop_words = stopwords.words('english')
summarize_text = []

sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
scores = nx.pagerank(sentence_similarity_graph)

ranked_sentence = sorted(
((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
# print("Indexes of top ranked_sentence order are ", ranked_sentence)

for i in range(top_n):
summarize_text.append(ranked_sentence[i][1])

summarize_text = [ranked_sentence[i][1] for i in range(top_n)]
return " ".join(summarize_text)


Expand Down Expand Up @@ -169,11 +162,7 @@ def get_dataset_latest_version(datasetId):
headers = {"Accept": "application/json"}
response = requests.request("GET", url, headers=headers)
response_json = json.loads(response.text)
if response.status_code == 200:
versionId = str(response_json['version'])
else:
versionId = ""
return versionId
return str(response_json['version']) if response.status_code == 200 else ""


def get_dataset_file_response(datasetId, filepath):
Expand All @@ -188,10 +177,7 @@ def get_dataset_file_response(datasetId, filepath):
}}
headers = {"Content-Type": "application/json"}
response = requests.request("POST", url, json=payload, headers=headers)
if response.status_code == 200:
return response
else:
return response.reason
return response if response.status_code == 200 else response.reason


def get_dataset_file_download(datasetId, filepath):
Expand All @@ -206,8 +192,7 @@ def get_dataset_file_download(datasetId, filepath):
}}
headers = {"Content-Type": "application/json"}

response = requests.request("POST", url, json=payload, headers=headers)
return response
return requests.request("POST", url, json=payload, headers=headers)


def get_dataset_description_text(datasetId):
Expand Down Expand Up @@ -239,11 +224,10 @@ def get_dataset_protocolsio_link(datasetId):

def get_protocolsio_text(datasetId):
data_protocol = {}
protocol_url = get_dataset_protocolsio_link(datasetId)
if protocol_url:
if protocol_url := get_dataset_protocolsio_link(datasetId):
doi = protocol_url.rsplit('/', 1)[-1]

url = "https://www.protocols.io/api/v3/protocols/" + str(doi)
url = f"https://www.protocols.io/api/v3/protocols/{str(doi)}"
querystring = {
"Authorization": "76d6ca8285076f48fe611091fd97eab4bc1c65051da75d7dc70ce746bd64dbe6"}
headers = {
Expand Down Expand Up @@ -321,13 +305,11 @@ def get_image_files(datasetId):
# Create an in-memory stream of the content
sio = io.BytesIO(response.content)
img = Image.open(sio)
image_name = str(datasetId) + "-" + \
str(os.path.basename(filepath))
image_name = (f"{str(datasetId)}-" + str(os.path.basename(filepath)))
# img.save(image_name)
datafile_image[filepath] = img
except:
print("NOT SAVED")
pass
return datafile_image


Expand All @@ -338,8 +320,7 @@ def get_image_files_biolucida(datasetId):
'token': ''
}
response = requests.request("GET", url, headers=headers, data=payload)
datafile_image = json.loads(response.text)
return datafile_image
return json.loads(response.text)


def get_images_all_datasets(list_datasetId):
Expand All @@ -365,16 +346,12 @@ def get_images_all_datasets(list_datasetId):


def get_knowledge_graph_data(datasetId):
# get species information from subjects file
# get specimen type and specimen anatomical location from samples.xlsx
data_knowledge_graph = {}
filepath = "files/subjects.xlsx"
response = get_dataset_file_response(datasetId, filepath)
with io.BytesIO(response.content) as fh:
df = pd.io.excel.read_excel(fh, engine='openpyxl')
df.dropna(axis=0, how='all', inplace=True)
data_knowledge_graph['Species'] = df['species'].values[0]

data_knowledge_graph = {'Species': df['species'].values[0]}
filepath = "files/samples.xlsx"
response = get_dataset_file_response(datasetId, filepath)
with io.BytesIO(response.content) as fh:
Expand All @@ -395,15 +372,13 @@ def alphanum_key(key): return [convert(c)


def get_summary_table_data(datasetId):
# manifest.json: get dataset title, subtitle, publication date
# subjects.xlsx: species, n subjects, age range, sex
# samples.xlsx: n samples, specimen type, specimen anatomical location
data_table_summary = {}
manifest_json = get_dataset_main_manifest(datasetId)
data_table_summary['Dataset id'] = datasetId
data_table_summary['Title'] = manifest_json['name']
data_table_summary['Subtitle'] = manifest_json['description']
data_table_summary['Publication_date'] = manifest_json['datePublished']
data_table_summary = {
'Dataset id': datasetId,
'Title': manifest_json['name'],
'Subtitle': manifest_json['description'],
'Publication_date': manifest_json['datePublished'],
}

# subjects file
filepath = "files/subjects.xlsx"
Expand Down Expand Up @@ -462,10 +437,7 @@ def get_all_datasets_text(list_datasetId):
# protocol, and any text files in the datasets
data_text = {}
for datasetId in list_datasetId:
data_text[datasetId] = {}
# text from dataset description
data_text[datasetId]['description'] = get_dataset_description_text(
datasetId)
data_text[datasetId] = {'description': get_dataset_description_text(datasetId)}
# text from protocol all nice and clean, includes title, description
# and protocol steps
data_text[datasetId]['protocol'] = get_protocolsio_text(datasetId)
Expand Down Expand Up @@ -520,10 +492,8 @@ def get_abstract(data_text):
# text_to_summarise = " ".join(text_to_summarise)

text_to_summarise = " ".join(list(NestedDictValues(data_text)))
abstract = summariser(text_to_summarise, top_n=10)

# abstract = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
return abstract
return summariser(text_to_summarise, top_n=10)


def get_text_correlation(data_text):
Expand Down Expand Up @@ -602,8 +572,7 @@ def get_all_datasets_mat_files(list_datasetId):
filepath_list = []
for datasetId in list_datasetId:
if datasetId in ['60', '64', '65']:
dataset_mat = get_dataset_mat_files(datasetId)
if dataset_mat:
if dataset_mat := get_dataset_mat_files(datasetId):
datasetId_path = os.path.join(
matlab_data_folder, str(datasetId))
os.makedirs(datasetId_path)
Expand All @@ -626,7 +595,7 @@ def get_all_datasets_mat_files(list_datasetId):
f.write(response.content)
# with open(mat_file_path, 'w', encoding="utf-8") as f:
# f.write(response.text)
if len(full_datasetId_list) > 0:
if full_datasetId_list:
df["datasetId"] = full_datasetId_list
df["filepath"] = filepath_list
matlab_excel_file = os.path.join(
Expand Down Expand Up @@ -690,12 +659,6 @@ def get_all_datasets_mat_files(list_datasetId):
datasetIdsinput = json.load(open(input_file))
list_datasetId = datasetIdsinput['datasetIds']
list_datasetId = [str(x) for x in list_datasetId]
#list_datasetId = ['60', '64', '65', '16', '61', '89', '97']
#list_datasetId = ['60', '64', '65']

# storage dict to be saved as a json and returned to front-end
dataset_data = {}

# knowledge graph data
#dataset_data['knowledge_graph'] = {}
# for datasetId in list_datasetId:
Expand All @@ -704,10 +667,12 @@ def get_all_datasets_mat_files(list_datasetId):

# summary table
print("summary table")
dataset_data['summary table'] = {}
for datasetId in list_datasetId:
dataset_data['summary table'][datasetId] = get_summary_table_data(
datasetId)
dataset_data = {
'summary table': {
datasetId: get_summary_table_data(datasetId)
for datasetId in list_datasetId
}
}

# keywords
print("dataset text")
Expand Down

0 comments on commit 26e3365

Please sign in to comment.