Skip to content

Commit

Permalink
Merge pull request #48 from camicroscope/develop
Browse files Browse the repository at this point in the history
For 3.9.0
  • Loading branch information
birm authored Mar 4, 2021
2 parents 122437a + dd9bbc7 commit fef55f9
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 45 deletions.
165 changes: 126 additions & 39 deletions OmniLoad.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@

import openslide # to get required slide metadata
import csv # to read csv
import sys # for csv limit
import os # for os and filepath utils
import argparse # to read arguments
import time # for timestamp
import os # for os/fs systems
import json # for json in and out
import requests # for api and pathdb in and out
import hashlib

# for large csv fields, especially segmentations
csv.field_size_limit(sys.maxsize)

parser = argparse.ArgumentParser(description='Load slides or results to caMicroscope.')
# read in collection
parser.add_argument('-i', type=str, default="slide", choices=['slide', 'heatmap', 'mark', 'user'],
parser.add_argument('-i', type=str, default="slide", choices=['slide', 'heatmap', 'mark', 'user', 'segmentation'],
help='Input type')
# read in filepath
parser.add_argument('-f', type=str, default="manifest.csv",
Expand All @@ -21,25 +25,37 @@
# read in pathdb collection
parser.add_argument('-pc', type=str, help='Pathdb Collection Name')
# read in dest uri or equivalent
parser.add_argument('-d', type=str, default="http://localhost:4010/data/Slide/post",
parser.add_argument('-d', type=str, default="http://ca-back:4010/data/Slide/post",
help='Output destination')
# read in lookup type
parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="camic", choices=['camic', 'pathdb'])
# read in lookup uri or equivalent
parser.add_argument('-ld', type=str, default="http://localhost:4010/data/Slide/find",
parser.add_argument('-ld', type=str, default="http://ca-back:4010/data/Slide/find",
help='Slide ID lookup source')

args = parser.parse_args()
print(args)

def file_md5(fileName):
m = hashlib.md5()
blocksize = 2 ** 20
with open(fileName, "rb") as f:
while True:
buf = f.read(blocksize)
if not buf:
break
m.update(buf)
return m.hexdigest()

# get fields openslide expects
def openslidedata(manifest):
for img in manifest:
img['location'] = img['location'] or img['filename'] or img['file']
img['location'] = img.get("path", "") or img.get("location", "") or img.get("filename", "") or img.get("file", "")
slide = openslide.OpenSlide(img['location'])
slideData = slide.properties
img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_X, None)
img['mpp-y'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None)
img['mpp'] = img['mpp-x'] or img['mpp-y']
img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None) or slideData.get(
"openslide.level[0].height", None)
img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None) or slideData.get(
Expand All @@ -48,13 +64,83 @@ def openslidedata(manifest):
img['level_count'] = int(slideData.get('level_count', 1))
img['objective'] = float(slideData.get(openslide.PROPERTY_NAME_OBJECTIVE_POWER, 0) or
slideData.get("aperio.AppMag", -1.0))
img['md5sum'] = file_md5(filepath)
img['md5sum'] = file_md5(img['location'])
img['comment'] = slideData.get(openslide.PROPERTY_NAME_COMMENT, None)
# required values which are often unused
img['study'] = img.get('study', "")
img['specimen'] = img.get('specimen', "")
return manifest

def getWithAuth(url):
x = requests.get(lookup_url)
retry = True
while (x.status_code == 401 and retry):
token = input("API returned 401, try a (different) token? : ")
if (token and token != "no" and token != "n"):
x = requests.get(lookup_url, auth=token)
else:
retry = False
return x

def postWithAuth(url, data):
x = requests.post(args.d, json=data)
retry = True
while (x.status_code == 401 and retry):
token = input("API returned 401, try a (different) token? : ")
if (token and token != "no" and token != "n"):
x = requests.post(args.d, json=data, auth=token)
else:
retry = False
return x

def convertSegmentations(poly, name, area):
# interpret the objectively bad polygon representation
poly = poly.replace("[","")
poly = poly.replace("]","")
poly = poly.split(":")
new_poly = []
x_max = -1.
x_min = 9e99
y_max = -1.
y_min = 9e99
for i in range(0,len(poly),2):
x_max = max(x_max, float(poly[i]))
x_min = min(x_min, float(poly[i]))
y_max = max(y_max, float(poly[i+1]))
y_min = min(y_min, float(poly[i+1]))
new_poly.append([float(poly[i]), float(poly[i+1])])
# construct result
# complete loop
new_poly.append(new_poly[0])
provenance = {}
provenance['image'] = {}
# may need better execution id
provenance['analysis'] = {"source":"computer", "coordinate":"image", "execution_id":name, "name":name, "computation":"segmentation"}
properties = {}
properties['annotations'] = {"name": name, 'AreaInPixels':area, "PhysicalSize":area}
geometries = {"type":"FeatureCollection"}
feature = {"type":"Feature"}
geometry = {"type":"Polygon"}
geometry['coordinates'] = [new_poly]
bound = {"type":"Polygon"}
feature['geometry'] = geometry
feature['bound'] = bound
# get bound
bound['coordinates'] = [[[x_min, y_min], [x_min, y_max], [x_max, y_max], [x_max, y_min], [x_min, y_min]]]
geometries['features'] = [feature]
res = {}
res['geometries'] = geometries
res['provenance'] = provenance
res['properties'] = properties
res['footprint'] = area
res['x'] = x_min
res['y'] = y_min
res['object_type'] = "unknown"
res['parent_id'] = "self"
return res

## START script

manifest = []

# context for file
Expand All @@ -77,62 +163,63 @@ def openslidedata(manifest):
if (args.lt == "camic"):
for x in manifest:
# TODO more flexible with manifest fields
lookup_url = args.ld + "?name=" + x.slide
r = requests.get(lookup_url)
lookup_url = args.ld + "?name=" + x['slide']
r = getWithAuth(lookup_url)
res = r.json()
if (len(res)) == 0:
print("[WARN] - no match for slide '" + x.slide + "', skipping")
try:
x['id'] = res[0]["_id"]["$oid"]
except:
print("[WARN] - no match for slide '" + str(x) + "', skipping")
del x
x.id = res[0]["_id"]["$oid"]
if (args.lt == "pathdb"):
raise NotImplementedError("pathdb lookup is broken now")
for x in manifest:
# TODO there's an error with the url construction when testing, something's up
lookup_url = args.ld + args.pc + "/"
lookup_url = args.ld + "/" + args.pc + "/"
lookup_url += x.get("studyid", "") or x.get("study")
lookup_url += x.get("clinicaltrialsubjectid", "") or x.get("subject")
lookup_url += x.get("imageid", "") or x.get("image", "") or x.get("slide", "")
lookup_url += "?_format=json"
r = requests.get(lookup_url)
r = getWithAuth(lookup_url)
res = r.json()
if (len(res)) == 0:
try:
x['id'] = res[0]["nid"][0].value
except:
print("[WARN] - no match for slide '" + str(x) + "', skipping")
del x
else:
x.id = res[0]["PathDBID"]


# TODO add validation (!!)
print("[WARNING] -- Validation not Implemented")

def postWithAuth(data, url):
x = requests.post(args.d, json=manifest)
retry = True
while (x.status_code == 401 and retry):
token = input("API returned 401, try a (different) token? : ")
if (token and token != "no" and token != "n"):
x = requests.post(args.d, json=manifest, auth=token)
else:
retry = False
return x

# take appropriate destination action
if (args.o == "jsonfile"):
with open(args.d, 'w') as f:
json.dump(manifest, f)
elif (args.o == "camic"):
if (args.i == "slide"):
x = postWithAuth(args.d, manifest)
x.raise_for_status()
r = postWithAuth(args.d, manifest)
print(r.json())
r.raise_for_status()
else:
with open(x.path) as f:
file = json.load(f)
for rec in file:
rec[slide] = x.id
x = postWithAuth(args.d, file)
x.raise_for_status()
for x in manifest:
with open(x['path']) as f:
if (args.i == "segmentation"):
reader = csv.DictReader(f)
segs = [row for row in reader]
fil = []
for rec in segs:
res = convertSegmentations(rec['Polygon'], x['segname'], rec['AreaInPixels'])
res['provenance']['image']['slide'] = x['id']
fil.append(res)
else:
fil = json.load(f)
for rec in fil:
# TODO safer version of this?
rec['provenance']['image']['slide'] = x['id']
r = postWithAuth(args.d, fil)
print(r.json())
r.raise_for_status()
elif (args.o == "pathdb"):
#! TODO
#! TODO - need the url and pattern for adding a slide to pathdb
if (args.i != "slide"):
raise AssertionError("Pathdb only holds slide data.")
raise NotImplementedError("Output type: " + args.o + " not yet implemented")
Expand Down
27 changes: 21 additions & 6 deletions SlideServer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
app.config['ROI_FOLDER'] = "/images/roiDownload"


ALLOWED_EXTENSIONS = set(['svs', 'tif', 'tiff', 'vms', 'vmu', 'ndpi', 'scn', 'mrxs', 'bif', 'svslide'])
ALLOWED_EXTENSIONS = set(['svs', 'tif', 'tiff', 'vms', 'vmu', 'ndpi', 'scn', 'mrxs', 'bif', 'svslide', 'png', 'jpg'])


def allowed_file(filename):
Expand All @@ -70,8 +70,11 @@ def makePyramid(filename, dest):
try:
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
destpath = os.path.join(app.config['UPLOAD_FOLDER'], dest)
pyvips.Image.new_from_file(filepath, access='sequential').tiffsave(destpath, tile=True, compression="lzw", tile_width=256, tile_height=256, pyramid=True, bigtiff=True, xres=0.254, yres=0.254)
return flask.Response(json.dumps({"status": "OK"}), status=200)
savedImg = pyvips.Image.new_from_file(filepath, access='sequential').tiffsave(destpath, tile=True, compression="lzw", tile_width=256, tile_height=256, pyramid=True, bigtiff=True, xres=0.254, yres=0.254)
while not os.path.exists(filepath):
os.sync()
sleep(750)
return flask.Response(json.dumps({"status": "OK", "srcFile":filename, "destFile":dest, "details":savedImg}), status=200)
except BaseException as e:
return flask.Response(json.dumps({"type": "pyvips", "error": str(e)}), status=500)

Expand Down Expand Up @@ -177,18 +180,30 @@ def testRoute():

@app.route("/data/one/<filepath>", methods=['GET'])
def singleSlide(filepath):
return json.dumps(dev_utils.getMetadata(filepath, app.config['UPLOAD_FOLDER']))
res = dev_utils.getMetadata(filepath, app.config['UPLOAD_FOLDER'])
if (hasattr(res, 'error')):
return flask.Response(json.dumps(res), status=500)
else:
return flask.Response(json.dumps(res), status=200)


@app.route("/data/thumbnail/<filepath>", methods=['GET'])
def singleThumb(filepath):
size = flask.request.args.get('size', default=50, type=int)
return json.dumps(getThumbnail(filepath, size))
res = getThumbnail(filepath, size)
if (hasattr(res, 'error')):
return flask.Response(json.dumps(res), status=500)
else:
return flask.Response(json.dumps(res), status=200)


@app.route("/data/many/<filepathlist>", methods=['GET'])
def multiSlide(filepathlist):
return json.dumps(dev_utils.getMetadataList(json.loads(filepathlist), app.config['UPLOAD_FOLDER']))
res = dev_utils.getMetadataList(json.loads(filepathlist), app.config['UPLOAD_FOLDER'])
if (hasattr(res, 'error')):
return flask.Response(json.dumps(res), status=500)
else:
return flask.Response(json.dumps(res), status=200)


@app.route("/getSlide/<image_name>")
Expand Down

0 comments on commit fef55f9

Please sign in to comment.