Skip to content

Commit

Permalink
As a Developer, provide a way to remove all automatic annotations fro…
Browse files Browse the repository at this point in the history
…m analyses, so they can be re-annotated with the correct information (#99)

* Adding the remove annotation and annotation queueing scripts

* Adding documentation and run feedback

* Updated the analysis annotation script to give relevant output

* Fixed the annotation config that broke annotation

* Linting should be passing

* Removed debugging print statements

* Added a note to the annotate-all-existing-analyses.sh that it may need to be run more than once

* Removed a space
  • Loading branch information
JmScherer authored and SeriousHorncat committed Jul 5, 2023
1 parent bfbe92e commit 068fffb
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 8 deletions.
77 changes: 77 additions & 0 deletions etc/api/annotate-all-existing-analyses.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#! /bin/bash
# ./annotate-all-existing-analyses.sh

usage() {
echo " "
echo "usage: $0"
echo " "
echo " -h Prints usage"
echo " "
echo "Kicks off annotation jobs for all the existing analyses in Rosalution"
echo " "
echo "To run the annotations, please log in to Rosalution to retrieve the"
echo "Client ID and Client Secret credentials. These can be found by"
echo "clicking on your username or going to: <rosalution url>/rosalution/account"
echo " "
echo "Note: This script may need to be run multiple times due to the Rosalution"
echo "annotation system not being built for large sets of annotations queued."
echo " "
echo "Please install jq for this script to work. https://stedolan.github.io/jq/"
echo " "
exit
}

while getopts ":h" opt; do
case $opt in
h) usage;;
\?) echo "Invalid option -$OPTARG" && exit 127;;
esac
done

if ! jq --version &> /dev/null
then
echo "Error: jq could not be found. Exiting."
usage
fi

echo "Please enter your Client Id";
read -r CLIENT_ID;

echo "Please enter your Client Secret";
read -r -s CLIENT_SECRET;

if [ -z "${CLIENT_ID}" ] || [ -z "${CLIENT_SECRET}" ]; then
echo " "
echo "Please enter required credentials."
usage
fi

echo "Fetching valid authentication token..."

AUTH_TOKEN=$(curl -s -X 'POST' \
"http://local.rosalution.cgds/rosalution/api/auth/token" \
-H "accept: application/json" \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "grant_type=&scope=&client_id=$CLIENT_ID&client_secret=$CLIENT_SECRET" | jq -r '.access_token')

echo "Fetching existing analyses in Rosalution..."

RESPONSE=$(curl -s -X "GET" \
"http://local.rosalution.cgds/rosalution/api/analysis/" \
-H "accept: application/json" \
-H "Authorization: Bearer $AUTH_TOKEN")

ANALYSES=()
while IFS='' read -r line; do ANALYSES+=("$line"); done < <(echo "$RESPONSE" | jq -c '[.[].name]')

echo "${ANALYSES[@]}" | jq -r '.[]' | while read -r ANALYSIS; do
echo "Starting annotations for analysis $ANALYSIS..."
curl -s -X "POST" \
"http://local.rosalution.cgds/rosalution/api/annotate/$ANALYSIS" \
-H "accept: application/json" \
-H "Authorization: Bearer $AUTH_TOKEN" \
> /dev/null
sleep 5
done

echo "Done. Exiting."
16 changes: 8 additions & 8 deletions etc/fixtures/initial-seed/annotations-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,9 @@
{
"data_set": "Rat_Alliance_Genome_url",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/gene/{Rat Gene Identifier}",
"annotation_source_type": "forge",
"base_string": "https://www.alliancegenome.org/gene/{Rat Gene Identifier}",
"attribute": "{ \"Rat_Alliance_Genome_url\": .Rat_Alliance_Genome_url }",
"dependencies": ["Rat Gene Identifier"]
},
Expand All @@ -343,9 +343,9 @@
{
"data_set": "Mouse_Alliance_Genome_url",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/gene/{Mouse Gene Identifier}",
"annotation_source_type": "forge",
"base_string": "https://www.alliancegenome.org/gene/{Mouse Gene Identifier}",
"attribute": "{ \"Mouse_Alliance_Genome_url\": .Mouse_Alliance_Genome_url }",
"dependencies": ["Mouse Gene Identifier"]
},
Expand All @@ -361,9 +361,9 @@
{
"data_set": "Zebrafish_Alliance_Genome_url",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/gene/{Zebrafish Gene Identifier}",
"annotation_source_type": "forge",
"base_string": "https://www.alliancegenome.org/gene/{Zebrafish Gene Identifier}",
"attribute": "{ \"Zebrafish_Alliance_Genome_url\": .Zebrafish_Alliance_Genome_url }",
"dependencies": ["Zebrafish Gene Identifier"]
},
Expand All @@ -379,9 +379,9 @@
{
"data_set": "C-Elegens_Alliance_Genome_url",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/gene/{C-Elegens Gene Identifier}",
"annotation_source_type": "forge",
"base_string": "https://www.alliancegenome.org/gene/{C-Elegens Gene Identifier}",
"attribute": "{ \"C-Elegens_Alliance_Genome_url\": .C-Elegens_Alliance_Genome_url }",
"dependencies": ["C-Elegens Gene Identifier"]
},
Expand Down
60 changes: 60 additions & 0 deletions etc/fixtures/remove-automatic-annotations.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
const usage = `
Script usage for 'remove-automatic-annotations.js':
mongosh /tmp/fixtures/remove-automatic-annotations.js
Script Options:
help If true, prints this message.
databaseName Database name to use - default: rosalution_db
For mongosh connection and authentication usage, please run: mongosh help
Examples:
mongosh --host localhost --port 27017 /tmp/fixtures/remove-automatic-annotations.js
mongosh --host localhost --port 27017 --eval "help=true;databaseName='your_db_name'" /tmp/fixtures/remove-automatic-annotations.js
`

if(help == true) {
print(usage);
quit(1);
}

if(typeof databaseName == 'undefined')
databaseName = 'rosalution_db';
else if(typeof databaseName !== 'string') {
print("databaseName must be a string");
quit(1);
}

db = db.getSiblingDB(databaseName);

console.log(`Removing non-manual annotations from ${databaseName}...`);

try {
const genomic_units = db.genomic_units.find();

if(genomic_units.size() == 0)
throw new Error(`No genomic units found in ${databaseName}. Aborting.`)

const newUnits = [];
genomic_units.forEach(unit => {
unit.annotations.forEach(annotation => {
for(const [key, value] of Object.entries(annotation)) {
annotation[key] = annotation[key].filter(object => object['data_source'] == 'rosalution-manual')
if(annotation[key].length < 1)
delete annotation[key];
}
})

newUnits.push(unit);
});

newUnits.forEach(unit => {unit.annotations = unit.annotations.filter(object => Object.keys(object).length !== 0)});
newUnits.forEach(unit => {db.genomic_units.updateOne({'_id': unit._id}, {'$set': unit})});

} catch (err) {
console.log(err.stack);
console.log(usage);
quit(1);
}

console.log(`Annotation removal complete.`);

0 comments on commit 068fffb

Please sign in to comment.