Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PRIDE package v1.0.3 #62

Merged
merged 1 commit into from
Nov 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
349 changes: 349 additions & 0 deletions StagingArea/pride/pride@1.0.3.fsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,349 @@
let [<Literal>]PACKAGE_METADATA = """(*
---
Name: pride
Summary: Validates if the ARC contains the necessary metadata to be publishable via PRIDE.
Description: |
Validates if the ARC contains the necessary metadata to be publishable via PRIDE.
The following metadata is required:
- Investigation has title and description
- Investigation has Keywords comment in correct format
- All persons in Investigation Contacts must have a first name, last name, affiliation and valid email
- Study has protocol, tissue & species in correct format
- Assay has technology type, instrument model, and fixed and/or variable modification in correct format
MajorVersion: 1
MinorVersion: 0
PatchVersion: 3
Publish: true
Authors:
- FullName: Oliver Maus
Email: maus@nfdi4plants.org
Affiliation: DataPLANT
- FullName: Christopher Lux
Email: lux@csbiology.de
Affiliation: RPTU Kaiserslautern
AffiliationLink: http://rptu.de/startseite
Tags:
- Name: validation
- Name: pride
- Name: proteomics
ReleaseNotes: |
- process graph tokens correctly resolved
---
*)"""


#r "nuget: ARCExpect, 4.0.1"


open ControlledVocabulary
open Expecto
open ARCExpect
open ARCTokenization
open ARCTokenization.StructuralOntology
open System.IO


// Input:
let arcDir = Directory.GetCurrentDirectory()


// Values:
let absoluteDirectoryPaths = FileSystem.parseARCFileSystem arcDir

let investigationMetadata =
absoluteDirectoryPaths
|> Investigation.parseMetadataSheetsFromTokens() arcDir
|> List.concat

let studyMetadata =
absoluteDirectoryPaths
|> Study.parseMetadataSheetsFromTokens() arcDir
|> List.concat

let assayMetadata =
absoluteDirectoryPaths
|> Assay.parseMetadataSheetsFromTokens() arcDir
|> List.concat

let studyProcessGraphTokens =
try
absoluteDirectoryPaths
|> Study.parseProcessGraphColumnsFromTokens arcDir
|> Seq.collect Map.values
|> List.concat
with
| _ -> List.empty

let assayProcessGraphTokens =
try
absoluteDirectoryPaths
|> Assay.parseProcessGraphColumnsFromTokens arcDir
|> Seq.collect Map.values
|> List.concat
with
| _ -> List.empty

let organismTokens =
studyProcessGraphTokens
|> List.tryFind (fun cvpList -> cvpList.Head |> Param.getValueAsTerm = (CvTerm.create("OBI:0100026","organism","OBI")))
|> Option.defaultValue []

let tissueTokens =
studyProcessGraphTokens
|> List.tryFind (fun cvpList -> cvpList.Head |> Param.getValueAsTerm = (CvTerm.create("NCIT:C12801","Tissue","NCIT")))
|> Option.defaultValue []

let instrumentTokens =
assayProcessGraphTokens
|> List.tryFind (fun cvpList -> cvpList.Head |> Param.getValueAsTerm = (CvTerm.create("MS:1000031","instrument model","MS")))
|> Option.defaultValue []

let modTokens =
assayProcessGraphTokens
|> List.filter (
fun cvpList ->
cvpList.Head |> Param.getValueAsTerm = (CvTerm.create("MS:1003021","Fixed modification","MS")) ||
cvpList.Head |> Param.getValueAsTerm = (CvTerm.create("MS:1003022","Variable modification","MS"))
)
|> List.concat

let techTypeName = CvTerm.create("ASSMSO:00000011", "Assay Technology Type", "ASSMSO")
let techTypeTAN = CvTerm.create("ASSMSO:00000013", "Assay Technology Type Term Accession Number", "ASSMSO")
let techTypeTSR = CvTerm.create("ASSMSO:00000015", "Assay Technology Type Term Source REF", "ASSMSO")


// Helper functions (to deposit in ARCExpect later):

open System.Text


let characterLimit (lowerLimit : int option) (upperLimit : int option) =
match lowerLimit, upperLimit with
| None, None -> System.Text.RegularExpressions.Regex(@"^.{0,}$")
| Some ll, None -> System.Text.RegularExpressions.Regex($"^.{{{ll},}}$")
| None, Some ul -> System.Text.RegularExpressions.Regex($"^.{{0,{ul}}}$")
| Some ll, Some ul -> System.Text.RegularExpressions.Regex($"^.{{{ll},{ul}}}$")


type ErrorMessage with

static member ofIParamCollection error iParamCollection =

let iParam = Seq.head iParamCollection

let str = new StringBuilder()
str.AppendFormat("['{0}', ..] {1}\n", Param.getCvName iParam, error) |> ignore

match Param.tryGetValueOfCvParamAttr "FilePath" iParam with
| Some path ->
str.AppendFormat(" > filePath '{0}'\n", path) |> ignore
| None -> ()

match Param.tryGetValueOfCvParamAttr "Worksheet" iParam with
| Some sheet ->
str.AppendFormat(" > sheet '{0}'", sheet) |> ignore
| None -> ()

match Param.tryGetValueOfCvParamAttr "Row" iParam with
| Some row ->
str.AppendFormat(" > row '{0}'", row) |> ignore
| None -> ()

match Param.tryGetValueOfCvParamAttr "Column" iParam with
| Some column ->
str.AppendFormat(" > column '{0}'", column) |> ignore
| None -> ()

match Param.tryGetValueOfCvParamAttr "Line" iParam with
| Some line ->
str.AppendFormat(" > line '{0}'", line) |> ignore
| None -> ()

match Param.tryGetValueOfCvParamAttr "Position" iParam with
| Some position ->
str.AppendFormat(" > position '{0}'", position) |> ignore
| None -> ()
str.ToString()


type Validate.ParamCollection with

static member AllTermsSatisfyPredicate (projection : #IParam -> bool) (paramCollection : #seq<#IParam>) =
match Seq.forall projection paramCollection with
| true -> ()
| false ->
ErrorMessage.ofIParamCollection $"does not satisfy the requirements" paramCollection
|> Expecto.Tests.failtestNoStackf "%s"


// Validation Cases:
let investigationCases =
testList INVMSO.``Investigation Metadata``.INVESTIGATION.key.Name [
// Investigation has title
ARCExpect.validationCase (TestID.Name INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Title``.Name) {
investigationMetadata
|> Validate.ParamCollection.ContainsNonKeyParamWithTerm
INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Title``
}

// Investigation has description
ARCExpect.validationCase (TestID.Name INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Description``.Name) {
investigationMetadata
|> Validate.ParamCollection.ContainsNonKeyParamWithTerm
INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Description``
}

// Investigation has contacts with name, last name, affiliation and email
// Investigation Person First Name
ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``.Name} exists") {
investigationMetadata
|> Validate.ParamCollection.ContainsNonKeyParamWithTerm INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``
}
ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``.Name} is not empty") {
investigationMetadata
|> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``)
|> Seq.iter Validate.Param.ValueIsNotEmpty
}

// Investigation Person Last Name
ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``.Name} exists") {
investigationMetadata
|> Validate.ParamCollection.ContainsNonKeyParamWithTerm INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``
}
ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``.Name} is not empty") {
investigationMetadata
|> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``)
|> Seq.iter Validate.Param.ValueIsNotEmpty
}

// Investigation Person Affiliation
ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Affiliation``.Name} exists") {
investigationMetadata
|> Validate.ParamCollection.ContainsNonKeyParamWithTerm INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Affiliation``
}
ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Affiliation``.Name} is not empty") {
investigationMetadata
|> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Affiliation``)
|> Seq.filter (Param.getValueAsString >> (<>) "Metadata Section Key")
|> Seq.iter Validate.Param.ValueIsNotEmpty
}

// Investigation Person Email
ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Email``.Name} exists") {
investigationMetadata
|> Validate.ParamCollection.ContainsNonKeyParamWithTerm INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Email``
}
ARCExpect.validationCase (TestID.Name INVMSO.``Investigation Metadata``. ``INVESTIGATION CONTACTS``.``Investigation Person Email``.Name) {
investigationMetadata
|> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Email``)
|> Seq.filter (Param.getValueAsString >> (<>) "Metadata Section Key")
|> Seq.iter (Validate.Param.ValueMatchesRegex StringValidationPattern.email)
}
]

let studyCases =
testList STDMSO.``Study Metadata``.STUDY.key.Name [
// Study has protocol in correct format
ARCExpect.validationCase (TestID.Name STDMSO.``Study Metadata``.``STUDY PROTOCOLS``.key.Name) {
studyMetadata
|> Validate.ParamCollection.ContainsParamWithTerm STDMSO.``Study Metadata``.``STUDY PROTOCOLS``.key
}

// Study protocol is in correct format
ARCExpect.validationCase (TestID.Name "STUDY PROTOCOLS description") {
studyMetadata|> Seq.filter (fun iparam -> Param.getTerm iparam = STDMSO.``Study Metadata``.``STUDY PROTOCOLS``.``Study Protocol Description``)
|> Seq.filter (Param.getValueAsString >> (<>) "Metadata Section Key")
|> Seq.iter (Validate.Param.ValueMatchesRegex (characterLimit (Some 50) (Some 500)))
}

// Study has tissue header in process graph
ARCExpect.validationCase (TestID.Name "Tissue") {
tissueTokens
|> Validate.ParamCollection.SatisfiesPredicate (
fun iparams ->
iparams
|> List.exists (fun iparam -> iparam.Value = ParamValue.CvValue (CvTerm.create("NCIT:C12801","Tissue","NCIT")))
)
}

// Study has tissue values as terms (CvValues)
ARCExpect.validationCase (TestID.Name "Tissue terms") {
tissueTokens
|> Validate.ParamCollection.AllTermsSatisfyPredicate (fun ip -> match ip.Value with CvValue _ -> true | _ -> false)
}

// Study has species in correct format
ARCExpect.validationCase (TestID.Name "Organism") {
organismTokens
|> Validate.ParamCollection.SatisfiesPredicate (
fun iparams ->
iparams
|> List.exists (fun iparam -> iparam.Value = ParamValue.CvValue (CvTerm.create("OBI:0100026","organism","OBI")))
)
}

// Study has species values as terms (CvValues)
ARCExpect.validationCase (TestID.Name "Organism terms") {
tissueTokens
|> Validate.ParamCollection.AllTermsSatisfyPredicate (fun ip -> match ip.Value with CvValue _ -> true | _ -> false)
}
]

let assayCases =
testList ASSMSO.``Assay Metadata``.ASSAY.key.Name [

// Assay has technology type in correct format
ARCExpect.validationCase (TestID.Name techTypeTAN.Name) {
assayMetadata
|> Validate.ParamCollection.ContainsParamWithTerm techTypeTAN
}
ARCExpect.validationCase (TestID.Name techTypeName.Name) {
assayMetadata
|> Validate.ParamCollection.ContainsParamWithTerm techTypeName
}
ARCExpect.validationCase (TestID.Name techTypeTSR.Name) {
assayMetadata
|> Validate.ParamCollection.ContainsParamWithTerm techTypeTSR
}

// Assay has instrument model
ARCExpect.validationCase (TestID.Name "instrument model") {
instrumentTokens
|> Validate.ParamCollection.SatisfiesPredicate (
fun iparams ->
iparams
|> List.exists (fun iparam -> iparam.Value = ParamValue.CvValue (CvTerm.create("MS:1000031","instrument model","MS")))
)
}

// Assay has instrument model in correct format
ARCExpect.validationCase (TestID.Name "instrument model terms") {
instrumentTokens
|> Validate.ParamCollection.AllTermsSatisfyPredicate (fun ip -> match ip.Value with CvValue _ -> true | _ -> false)
}

// Assay has fixed or variable modification in correct format
ARCExpect.validationCase (TestID.Name "modification") {
modTokens
|> Validate.ParamCollection.SatisfiesPredicate (
fun iparams ->
iparams
|> List.exists (
fun iparam ->
iparam.Value = ParamValue.CvValue (CvTerm.create("MS:1003021","Fixed modification","MS")) ||
iparam.Value = ParamValue.CvValue (CvTerm.create("MS:1003022","Variable modification","MS"))
)
)
}
]


// Execution:

Setup.ValidationPackage(
metadata = Setup.Metadata(PACKAGE_METADATA),
CriticalValidationCases = [investigationCases; studyCases; assayCases]
)
|> Execute.ValidationPipeline(
basePath = arcDir
)