Skip to content

Commit

Permalink
Merge pull request #59 from camicroscope/develop
Browse files Browse the repository at this point in the history
For 3.9.10
  • Loading branch information
birm authored Aug 5, 2022
2 parents 53ae352 + ab0dafc commit f61ebb7
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 31 deletions.
128 changes: 101 additions & 27 deletions NCISlideUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,68 @@
import subprocess
import time
from multiprocessing.pool import ThreadPool

import json
import openslide

import os
import requests
from dev_utils import file_md5
from dev_utils import postslide
from dev_utils import post_url

# GLOBALS (for now)
# config = {'thumbnail_size': 100, 'thread_limit': 20}
config = { 'thread_limit': 20}
config = {'thread_limit': 20}
manifest_path = 'manifest.csv'
# NCI DOE added flat file START
collections_path = 'specialties_list.json'
flat_file_path = 'flat_file.csv'
apiKey = 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyVHlwZSI6IkFkbWluIiwidXNlckZpbHRlciI6WyJQdWJsaWMiXSwic3ViIjoibGluYW5sZGpAZ21haWwuY29tIiwiZW1haWwiOiJsaW5hbmxkakBnbWFpbC5jb20iLCJuYW1lIjoiTmFuIExpIiwicGljdHVyZSI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hLS9BT2gxNEdpRnBfRlRFYWxGbmlzclcwQzF6NDFPbW1wZS1uTTd0Wkx4SXRNbE9nPXM5Ni1jIiwiaWF0IjoxNjM0ODQ0MjQ5LCJleHAiOjE2MzQ5MzA2NDl9.oRDeM_i1i4fQB3wlVmodAF4NG_umCZL2DIObWYMCviJwWXPAfNDtyMEY2GwMzgeMcQNPjIbDem6mhuDvhyOSmQc0J5lpxJpZYCVKnOQ95Q2rNy1F9gQjpuJ_vfIKRoakH9lE_W3leg8ff-zvUbgpOyzQxEg4louUGGpqG_5FVQnHG88CGAzzG7MvCb6wuyDrRvhxGBIRicjFN_zj8ZeXzmXD7U9KhOgKAW21XWhL4RyBhQyq8CORPx23omRKk7u72oTY5dlfzHj6O9Ll92MqJQEF1Xz08nVLlSMSw7pTKmmGWkK2DUKsp9sRvc2uFXButUpIrvaqh1ukCV6HU0hGIg'

# NCI DOE added flat file END

# process expects a single image metadata as dictionary


def process(img):
try:
img = openslidedata(img)
img['study'] = img.get('study', "")
img['specimen'] = img.get('specimen', "")
img['location'] = img['location'] or img['filename']
img = postslide(img, post_url, apiKey)
print('process img:')
print(img)
except BaseException as e:
img['_status'] = e
return img
# check slides
sid = None
token_id = img['token_id']
slide_name = img['name']
res = requests.get(slide_find_url, params={'name': slide_name})
if res.status_code == 200:
rs = res.json()
# the slide doesn't exist
if len(rs) < 1:
try:
img = openslidedata(img)
img['study'] = img.get('study', "")
img['specimen'] = img.get('specimen', "")
img['location'] = img['location'] or img['filename']
img = postslide(img, post_url)
res = requests.get(slide_find_url, params={'name': slide_name})
sid = res.json()[0]['_id']['$oid']
print('process img:')
print(img)
except BaseException as e:
img['_status'] = e

else:
sid = res.json()[0]['_id']['$oid']
print(sid)
img['_status'] = 'existed'
# add slide to collection
cid = subspecialties_map.get(token_id.lower())
if sid is not None or cid is not None:
res = requests.post(add_slide_to_collection_url, data=json.dumps({'cid': cid, 'sids': [sid]}), headers={
'content-type': 'application/json'})
return img
else:
img['_status'] = res.status_code
return img


def gen_thumbnail(filename, slide, size, imgtype="png"):
dest = filename + "." + imgtype

slide.get_thumbnail([size, size]).save(dest, imgtype.upper())


Expand All @@ -45,14 +73,16 @@ def openslidedata(metadata):
metadata['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_X, None)
metadata['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None)
metadata['mpp'] = metadata['mpp-x'] or metadata['mpp-x'] or None
metadata['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None)
metadata['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None)
metadata['height'] = slideData.get(
openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None)
metadata['width'] = slideData.get(
openslide.PROPERTY_NAME_BOUNDS_WIDTH, None)
metadata['vendor'] = slideData.get(openslide.PROPERTY_NAME_VENDOR, None)
metadata['comment'] = slideData.get(openslide.PROPERTY_NAME_COMMENT, None)
metadata['level_count'] = int(slideData.get('level_count', 1))
metadata['objective'] = float(slideData.get("aperio.AppMag", 0.0))
metadata['md5sum'] = file_md5(metadata['location'])

# NCI DOE metadata START
if metadata['height'] is None:
metadata['height'] = slideData.get('aperio.OriginalHeight', None)
Expand All @@ -62,18 +92,20 @@ def openslidedata(metadata):
metadata['width'] = slideData.get('aperio.OriginalWidth', None)
if metadata['width'] is None:
metadata['width'] = slideData.get('openslide.level[0].width', None)
metadata['token_id'] = slideData.get('aperio.CustomField.TokenID', None)
metadata['token_id'] = slideData.get(
'aperio.CustomField.TokenID', metadata['token_id'])
metadata['proc_seq'] = slideData.get('aperio.CustomField.Proc_Seq', None)
metadata['spec_site'] = slideData.get('aperio.CustomField.Spec_Site', None)
metadata['image_id'] = slideData.get('aperio.CustomField.ImageID', None)
metadata['image_id'] = slideData.get('aperio.CustomField.ImageID', None)
flat_matedata = flat_map[metadata['token_id'].lower()]

metadata['registry_code'] = flat_matedata.get('registry',None)
metadata['primary_tumor_site_code'] = flat_matedata.get('primary_site',None)
metadata['primary_tumor_site_term'] = flat_matedata.get('site_text',None)
metadata['morphology_code'] = flat_matedata.get('histology_icdo3',None)
metadata['seer_coded_histology'] = flat_matedata.get('hist_text',None)
metadata['behavior_code'] = flat_matedata.get('behavior_icdo3',None)
metadata['registry_code'] = flat_matedata.get('registry', None)
metadata['primary_tumor_site_code'] = flat_matedata.get(
'primary_site', None)
metadata['primary_tumor_site_term'] = flat_matedata.get('site_text', None)
metadata['morphology_code'] = flat_matedata.get('histology_icdo3', None)
metadata['seer_coded_histology'] = flat_matedata.get('hist_text', None)
metadata['behavior_code'] = flat_matedata.get('behavior_icdo3', None)

metadata['timestamp'] = time.time()
# NCI DOE metadata END
Expand All @@ -85,11 +117,53 @@ def openslidedata(metadata):

# NCI DOE create a metadata dict START
flat_map = {}
subspecialties_map = {}
slide_find_url = 'http://ca-back:4010/data/Slide/find'
slide_post_url = 'http://ca-back:4010/data/Slide/post'
collection_find_url = 'http://ca-back:4010/data/Collection/find'
collection_post_url = 'http://ca-back:4010/data/Collection/post'
add_slide_to_collection_url = 'http://ca-back:4010/data/Collection/addSlidesToCollection'


def addSpecialty(data):
# check specialty exists
res = requests.get(collection_find_url, params=data)
if res.status_code == 200:
rs = res.json()
# return collection id if exist
if len(rs) > 0:
return rs[0]['_id']['$oid']
# add the new one and return collection id if not exist
else:
res = requests.post(collection_post_url, data=json.dumps(data), headers={
'content-type': 'application/json'})
return res.json()['ops'][0]['_id']
else:
return None


# read the specialty list
if os.path.exists(collections_path):
with open(collections_path, 'r', encoding='utf-8-sig') as j:
collections = json.load(j)
for collection in collections:
# add specialty
pid = addSpecialty({'text': collection['specialty']})
for sub in collection['subspecialties']:
# add specialty
cid = addSpecialty(
{'text': sub, 'pid': pid})
# save the token id and collection id as map
if cid is not None:
subspecialties_map[sub.lower()] = cid


# get flat file and create dict as map [tokenId, data]
with open(flat_file_path, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
flat_map[row['tokenid'].lower()] = row

# NCI DOE create a metadata dict END

# get manifest
Expand Down
4 changes: 2 additions & 2 deletions SlideUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# GLOBALS (for now)
config = {'thumbnail_size': 100, 'thread_limit': 20}
manifest_path = 'manifest.csv'

apiKey = '<apiKey>'

# process expects a single image metadata as dictionary
def process(img):
Expand All @@ -21,7 +21,7 @@ def process(img):
img['study'] = img.get('study', "")
img['specimen'] = img.get('specimen', "")
img['location'] = img['location'] or img['filename']
img = postslide(img, post_url)
img = postslide(img, post_url, apiKey)
except BaseException as e:
img['_status'] = e
return img
Expand Down
3 changes: 2 additions & 1 deletion dev_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def getMetadata(filename, upload_folder, extended):


def postslide(img, url, token=''):
url = url + '?token='+ token
if token != '':
url = url + '?token='+token
payload = json.dumps(img)
res = requests.post(url, data=payload, headers={'content-type': 'application/json'})
if res.status_code < 300:
Expand Down
50 changes: 50 additions & 0 deletions make_thumbs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import requests
import openslide
import pycurl
from multiprocessing.pool import ThreadPool

SLIDE_LIST_URL = "http://ca-back:4010/data/Slide/find"
IIP_BASE = "http://ca-back:4010/img/IIP/raw/?FIF="
UPDATE_URL = "http://ca-back:4010/data/Slide/update"
# TODO -- token input?
IM_SIZE = 256
THREADS = 5
REGNERATE = False
SAVE_DIR = "/images/thumbnails/"

def setThumb(id, val):
requests.post(UPDATE_URL + "?_id=" + id, json={'thumbnail': val})

def gen_thumbnail(filename, slide, size, imgtype="png"):
dest = SAVE_DIR + filename + "." + imgtype
print(dest)
slide.get_thumbnail([size, size]).save(dest, imgtype.upper())

def process(record):
file = record["location"]
name = record["name"]
# skip ones which already have a thumbnail, unless otherwise specified
if REGNERATE or not record.get("thumbnail", False):
try:
slide = openslide.OpenSlide(file)
gen_thumbnail(name, slide, IM_SIZE, imgtype="png")
setThumb(record['_id']["$oid"], name+".png")
return ""
except BaseException as e:
try:
url = IIP_BASE + file + "&WID=200&CVT=png"
c = pycurl.Curl()
c.setopt(c.URL, url)
with open(SAVE_DIR+name+".png", "wb") as f:
c.setopt(c.WRITEFUNCTION, f.write)
c.perform()
setThumb(record['_id']["$oid"], name+".png")
except BaseException as y:
return [name, y]

# do it
manifest = requests.get(SLIDE_LIST_URL).json()
print(manifest[0])

res = ThreadPool(THREADS).imap_unordered(process, manifest)
print([x for x in filter(None,[r for r in res])])
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ numpy
Pillow
google-api-python-client
google-auth-httplib2
google-auth-oauthlib
google-auth-oauthlib
pycurl

0 comments on commit f61ebb7

Please sign in to comment.