phenoscape · balhoff · Jun 18, 2019 · Jun 18, 2019 · Jun 18, 2019
diff --git a/src/main/scala/org/phenoscape/kb/Main.scala b/src/main/scala/org/phenoscape/kb/Main.scala
@@ -328,6 +328,27 @@ object Main extends HttpApp with App {
                       }
                     }
                   }
+              } ~
+              path("frequency") {
+                get {
+                  //FIXME not sure IRI for identifying corpus is best approach, particularly when scores are not stored ahead of time in a graph
+                  parameters('terms.as[Seq[String]], 'corpus_graph.as[IRI]) { (iriStrings, corpusIRI) =>
+                    complete {
+                      import Similarity.TermFrequencyTable.TermFrequencyTableCSV
+                      val iris = iriStrings.map(IRI.create).toSet
+                      Similarity.frequency(iris, corpusIRI)
+                    }
+                  }
+                } ~
+                  post {
+                    formFields('terms.as[Seq[String]], 'corpus_graph.as[IRI]) { (iriStrings, corpusIRI) =>
+                      complete {
+                        import Similarity.TermFrequencyTable.TermFrequencyTableCSV
+                        val iris = iriStrings.map(IRI.create).toSet
+                        Similarity.frequency(iris, corpusIRI)
+                      }
+                    }
+                  }
               }
           } ~
           pathPrefix("characterstate") {

diff --git a/src/main/scala/org/phenoscape/kb/Similarity.scala b/src/main/scala/org/phenoscape/kb/Similarity.scala
@@ -238,7 +238,6 @@ object Similarity {
     }
 
 
-
   private def classSubsumers(iri: IRI): Future[Set[IRI]] = {
     val query: QueryText =
       sparql"""
@@ -270,6 +269,42 @@ object Similarity {
     App.executeSPARQLQueryString(query.text, qs => IRI.create(qs.getResource("subsumer").getURI)).map(_.toSet)
   }
 
+  def frequency(terms: Set[IRI], corpus: IRI): Future[TermFrequencyTable] = {
+    import scalaz.Scalaz._
+    corpus match {
+      case TaxaCorpus =>
+        val values = if (terms.nonEmpty) terms.map(t => sparql" $t ").reduce(_ |+| _) else sparql""
+        val query: QueryText =
+          sparql"""
+              PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+              SELECT ?term (COUNT(DISTINCT ?profile) AS ?count)
+              FROM $KBMainGraph
+              WHERE {
+                VALUES ?term { $values }
+                ?profile ^$has_phenotypic_profile/$rdfsIsDefinedBy $VTO .
+                GRAPH $KBClosureGraph {
+                  ?profile $rdfType ?term .
+                }
+              }
+              GROUP BY ?term
+            """
+        App.executeSPARQLQueryString(query.text, qs =>
+          IRI.create(qs.getResource("term").getURI) ->
+            qs.getLiteral("count").getInt).map(_.toMap)
+      case _          => Future.successful(Map.empty)
+    }
+  }
+
+  type TermFrequencyTable = Map[IRI, Int]
+
+  object TermFrequencyTable {
+
+    implicit val TermFrequencyTableCSV: ToEntityMarshaller[TermFrequencyTable] = Marshaller.stringMarshaller(MediaTypes.`text/csv`).compose { table =>
+      table.keys.toSeq.sortBy(_.toString).map(k => s"$k,${table(k)}").mkString("\n")
+    }
+
+  }
+
 }
 
 case class SimilarityMatch(corpusProfile: MinimalTerm, medianScore: Double, expectScore: Double) extends JSONResultItem {

diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
@@ -864,6 +864,58 @@ paths:
               http://purl.obolibrary.org/obo/UBERON_0000061,1,1,1
               http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FBFO_0000051%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0000475%3E,1,1,1
               http://purl.obolibrary.org/obo/UBERON_0010314,0,0,1
+  /similarity/frequency:
+    get:
+      tags:
+        - Semantic similarity
+      summary: Get frequency score for ontology terms
+      description: Get frequency score (subsumed items) for ontology terms according to some identified corpus.
+      produces:
+        - text/csv
+      parameters:
+        - name: terms
+          in: query
+          description: JSON array of term IRIs, e.g. `["http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E"]`
+          required: true
+          type: string
+        - name: corpus_graph
+          in: query
+          description: IRI of corpus defined in KB, e.g. `http://kb.phenoscape.org/sim/taxa`
+          required: true
+          type: string
+      responses:
+        200:
+          description: 'Term frequency table: first column is term IRI, second column is occurrence count.'
+          examples:
+            text/csv: |
+              http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E,237
+    post:
+      tags:
+        - Semantic similarity
+      summary: Get frequency score for ontology terms
+      description: Get frequency score (subsumed items) for ontology terms according to some identified corpus.
+      consumes:
+        - application/x-www-form-urlencoded
+      produces:
+        - text/csv
+      parameters:
+        - name: terms
+          in: query
+          description: JSON array of term IRIs, e.g. `["http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E"]`
+          required: true
+          type: string
+        - name: corpus_graph
+          in: query
+          description: IRI of corpus defined in KB, e.g. `http://kb.phenoscape.org/sim/taxa`
+          required: true
+          type: string
+          format: IRI
+      responses:
+        200:
+          description: 'Term frequency table: first column is term IRI, second column is occurrence count.'
+          examples:
+            text/csv: |
+              http://purl.org/phenoscape/expression?value=%3Chttp%3A%2F%2Fpurl.org%2Fphenoscape%2Fvocab.owl%23phenotype_of%3E+some+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0008897%3E,237
   /entity/search:
     get:
       tags: