Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add browse dataset option #216

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions cirrocumulus/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
CIRRO_LIBRARY,
CIRRO_MIXPANEL,
CIRRO_SERVE,
CIRRO_SERVER_DATA_DIR,
CIRRO_SPECIES,
CIRRO_STATIC_DIR,
CIRRO_UPLOAD,
Expand Down Expand Up @@ -167,6 +168,43 @@ def handle_server():
other=["Gallus gallus", "Macaca fascicularis", "Macaca mulatta", "Rattus norvegicus"],
)

# browse server-side data files to allow user to select the desired dataset instead of necessarily typing the URL:
d["server_files"] = []
if CIRRO_SERVER_DATA_DIR in os.environ and os.environ[CIRRO_SERVER_DATA_DIR]:
VALID_EXTENSIONS = [
".h5ad",
".h5",
".zip",
".tar",
".tar.gz",
".loom",
".h5seurat",
".rds",
".zarr",
]
for root, dirs, files in os.walk(os.environ[CIRRO_SERVER_DATA_DIR]):
for file in dirs + files:
if any([file.endswith(ext) for ext in VALID_EXTENSIONS]):
d["server_files"] += [os.path.join(root, file)]
# if this is a directory, e.g. .zarr "files", prevent further recursion:
if file in dirs:
dirs.remove(file)
elif (
file in dirs
): # check if `file` is a MEX formatted directory (we must look at the subfiles)
# https://www.10xgenomics.com/support/software/xenium-panel-designer/latest/tutorials/create-single-cell-reference
count_tsvgz = 0 # MEX directoris have two subfiles with .tsv.gz extension
count_mtxgz = 0 # MEX directories also have one subfile with .mtx.gz extension
for subfile in os.listdir(os.path.join(root, file)):
if subfile.endswith(".tsv.gz"):
count_tsvgz += 1
elif subfile.endswith(".mtx.gz"):
count_mtxgz += 1
if count_tsvgz == 2 and count_mtxgz == 1:
d["server_files"] += [os.path.join(root, file)]
dirs.remove(file)
break

# from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false
d["library"] = load_json(CIRRO_LIBRARY) or [
"10x 3' v1",
Expand Down
3 changes: 3 additions & 0 deletions cirrocumulus/envir.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
# comma separated list of paths to allow all logged in users to download from
CIRRO_STATIC_DIR = "CIRRO_STATIC_DIR"

# directory in server whose files can be available to user for direct selection
CIRRO_SERVER_DATA_DIR = "CIRRO_SERVER_DATA_DIR"

SERVER_CAPABILITY_RENAME_CATEGORIES = "SERVER_CAPABILITY_RENAME_CATEGORIES"
SERVER_CAPABILITY_JOBS = "SERVER_CAPABILITY_JOBS"
SERVER_CAPABILITY_FEATURE_SETS = "SERVER_CAPABILITY_FEATURE_SETS"
Expand Down
10 changes: 10 additions & 0 deletions cirrocumulus/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
CIRRO_JOB_RESULTS,
CIRRO_JOB_TYPE,
CIRRO_SERVE,
CIRRO_SERVER_DATA_DIR,
CIRRO_UPLOAD,
)
from cirrocumulus.launch import create_app
Expand Down Expand Up @@ -100,6 +101,10 @@ def create_parser(description=False):
"--results", help="URL to save user computed results (e.g. differential expression) to"
)
parser.add_argument("--ontology", help="Path to ontology in OBO format for annotation")
parser.add_argument(
"--datadir",
help='Path to directory in the server where the user can select its dataset from with no need to type the full URL in the "New Dataset" window.',
)
return parser


Expand Down Expand Up @@ -127,6 +132,11 @@ def main(argsv):
os.environ[CIRRO_JOB_RESULTS] = args.results
get_fs(os.environ[CIRRO_JOB_RESULTS]).makedirs(os.environ[CIRRO_JOB_RESULTS], exist_ok=True)

if args.datadir is not None:
if not os.path.isdir(args.datadir):
raise ValueError("--datadir is not a valid path to a directory")
os.environ[CIRRO_SERVER_DATA_DIR] = args.datadir

run_args = [
"gunicorn",
"-b",
Expand Down
21 changes: 20 additions & 1 deletion src/EditNewDatasetDialog.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,11 @@ function EditNewDatasetDialog(props) {
const otherSpecies = serverInfo.species.other;
const libraryOptions = serverInfo.library;

const dataFiles = serverInfo.server_files;

const canUpload = serverInfo.upload;
const isNew = dataset == null;
const mustBrowse = dataFiles.length > 0;
let saveEnabled = !loading && name.trim() !== '';
const isAuthEnabled = serverInfo.auth.clientId !== '';

Expand Down Expand Up @@ -340,7 +343,7 @@ function EditNewDatasetDialog(props) {
</div>
<TextField
size={'small'}
style={{display: isNew && canUpload ? 'none' : ''}}
style={{display: isNew && (canUpload || mustBrowse) ? 'none' : ''}}
required={true}
disabled={loading || !isNew}
autoComplete="off"
Expand All @@ -357,6 +360,22 @@ function EditNewDatasetDialog(props) {
label={'URL'}
fullWidth
/>
<Select
size={'small'}
style={{display: isNew && mustBrowse && !canUpload ? '' : 'none'}}
required={true}
disabled={loading || !isNew}
onChange={(event) => setUrl(event.target.value)}
margin="dense"
label={'Browse server'}
fullWidth
>
{dataFiles.map((file) => (
<MenuItem key={file} value={file}>
{file}
</MenuItem>
))}
</Select>
<TextField
size={'small'}
disabled={loading}
Expand Down
Loading