Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement service providing frequency information for an ontology term according to some identified corpus #151

Merged
merged 2 commits into from
Jun 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/main/scala/org/phenoscape/kb/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,27 @@ object Main extends HttpApp with App {
}
}
}
} ~
path("frequency") {
get {
//FIXME not sure IRI for identifying corpus is best approach, particularly when scores are not stored ahead of time in a graph
parameters('terms.as[Seq[String]], 'corpus_graph.as[IRI]) { (iriStrings, corpusIRI) =>
complete {
import Similarity.TermFrequencyTable.TermFrequencyTableCSV
val iris = iriStrings.map(IRI.create).toSet
Similarity.frequency(iris, corpusIRI)
}
}
} ~
post {
formFields('terms.as[Seq[String]], 'corpus_graph.as[IRI]) { (iriStrings, corpusIRI) =>
complete {
import Similarity.TermFrequencyTable.TermFrequencyTableCSV
val iris = iriStrings.map(IRI.create).toSet
Similarity.frequency(iris, corpusIRI)
}
}
}
}
} ~
pathPrefix("characterstate") {
Expand Down
37 changes: 36 additions & 1 deletion src/main/scala/org/phenoscape/kb/Similarity.scala
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ object Similarity {
}



private def classSubsumers(iri: IRI): Future[Set[IRI]] = {
val query: QueryText =
sparql"""
Expand Down Expand Up @@ -270,6 +269,42 @@ object Similarity {
App.executeSPARQLQueryString(query.text, qs => IRI.create(qs.getResource("subsumer").getURI)).map(_.toSet)
}

def frequency(terms: Set[IRI], corpus: IRI): Future[TermFrequencyTable] = {
import scalaz.Scalaz._
corpus match {
case TaxaCorpus =>
val values = if (terms.nonEmpty) terms.map(t => sparql" $t ").reduce(_ |+| _) else sparql""
val query: QueryText =
sparql"""
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?term (COUNT(DISTINCT ?profile) AS ?count)
FROM $KBMainGraph
WHERE {
VALUES ?term { $values }
?profile ^$has_phenotypic_profile/$rdfsIsDefinedBy $VTO .
GRAPH $KBClosureGraph {
?profile $rdfType ?term .
}
}
GROUP BY ?term
"""
App.executeSPARQLQueryString(query.text, qs =>
IRI.create(qs.getResource("term").getURI) ->
qs.getLiteral("count").getInt).map(_.toMap)
case _ => Future.successful(Map.empty)
}
}

type TermFrequencyTable = Map[IRI, Int]

object TermFrequencyTable {

implicit val TermFrequencyTableCSV: ToEntityMarshaller[TermFrequencyTable] = Marshaller.stringMarshaller(MediaTypes.`text/csv`).compose { table =>
table.keys.toSeq.sortBy(_.toString).map(k => s"$k,${table(k)}").mkString("\n")
}

}

}

case class SimilarityMatch(corpusProfile: MinimalTerm, medianScore: Double, expectScore: Double) extends JSONResultItem {
Expand Down
52 changes: 52 additions & 0 deletions swagger/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,58 @@ paths:
http://purl.obolibrary.org/obo/UBERON_0000061,1,1,1
http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FBFO_0000051%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0000475%3E,1,1,1
http://purl.obolibrary.org/obo/UBERON_0010314,0,0,1
/similarity/frequency:
get:
tags:
- Semantic similarity
summary: Get frequency score for ontology terms
description: Get frequency score (subsumed items) for ontology terms according to some identified corpus.
produces:
- text/csv
parameters:
- name: terms
in: query
description: JSON array of term IRIs, e.g. `["http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E"]`
required: true
type: string
- name: corpus_graph
in: query
description: IRI of corpus defined in KB, e.g. `http://kb.phenoscape.org/sim/taxa`
required: true
type: string
responses:
200:
description: 'Term frequency table: first column is term IRI, second column is occurrence count.'
examples:
text/csv: |
http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E,237
post:
tags:
- Semantic similarity
summary: Get frequency score for ontology terms
description: Get frequency score (subsumed items) for ontology terms according to some identified corpus.
consumes:
- application/x-www-form-urlencoded
produces:
- text/csv
parameters:
- name: terms
in: query
description: JSON array of term IRIs, e.g. `["http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E"]`
required: true
type: string
- name: corpus_graph
in: query
description: IRI of corpus defined in KB, e.g. `http://kb.phenoscape.org/sim/taxa`
required: true
type: string
format: IRI
responses:
200:
description: 'Term frequency table: first column is term IRI, second column is occurrence count.'
examples:
text/csv: |
http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E,237
/entity/search:
get:
tags:
Expand Down