This repository has been archived by the owner on Jan 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcsr.py
157 lines (128 loc) · 5.31 KB
/
csr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from datetime import date
from typing import Sequence, Optional, Union, Dict, List, Any
from pydantic import BaseModel, validator, Field
from csr.entity_validation import validate_entity_data
from csr.exceptions import DataException
class Individual(BaseModel):
"""
Individual entity
"""
individual_id: str = Field(..., min_length=1, identity=True)
taxonomy: Optional[str]
gender: Optional[str]
birth_date: Optional[date]
death_date: Optional[date]
ic_type: Optional[str]
ic_version: Optional[float]
ic_given_date: Optional[date]
ic_withdrawn_date: Optional[date]
report_her_susc: Optional[str]
report_inc_findings: Optional[str]
diagnosis_count: Optional[int]
age_first_diagnosis: Optional[int]
class Diagnosis(BaseModel):
"""
Diagnosis entity
"""
diagnosis_id: str = Field(..., min_length=1, identity=True)
individual_id: str = Field(..., min_length=1, references='Individual')
tumor_type: Optional[str]
topography: Optional[str]
treatment_protocol: Optional[str]
tumor_stage: Optional[str]
diagnosis_date: Optional[date]
diagnosis_center: Optional[str]
class Biosource(BaseModel):
"""
Biosource entity
"""
biosource_id: str = Field(..., min_length=1, identity=True)
biosource_dedicated: Optional[str]
individual_id: str = Field(..., min_length=1, references='Individual')
diagnosis_id: Optional[str] = Field(None, min_length=1, references='Diagnosis')
src_biosource_id: Optional[str] = Field(None, min_length=1, references='Biosource')
tissue: Optional[str]
biosource_date: Optional[date]
disease_status: Optional[str]
tumor_percentage: Optional[int]
biosource_status: Optional[str]
@validator('src_biosource_id')
def check_self_reference(cls, src_biosource_id, values):
if src_biosource_id == values['biosource_id']:
raise DataException(f'Biosource cannot be derived from itself')
return src_biosource_id
class Biomaterial(BaseModel):
"""
Biomaterial entity
"""
biomaterial_id: str = Field(..., min_length=1, identity=True)
src_biosource_id: str = Field(..., min_length=1, references='Biosource')
src_biomaterial_id: Optional[str] = Field(None, min_length=1, references='Biomaterial')
biomaterial_date: Optional[date]
type: Optional[str]
library_strategy: Optional[List[str]]
analysis_type: Optional[List[str]]
@validator('src_biomaterial_id')
def check_self_reference(cls, src_biomaterial_id, values):
if src_biomaterial_id == values['biomaterial_id']:
raise DataException(f'Biomaterial cannot be derived from itself')
return src_biomaterial_id
@validator('library_strategy')
def validate_molecule_type_agrees_with_library_strategy(cls, library_strategy, values):
if 'type' in values and library_strategy is not None:
if values['type'] == 'DNA' and library_strategy.__contains__('RNA-Seq'):
raise DataException(f'Not allowed RNA-Seq library strategy for molecule type: DNA')
if values['type'] == 'RNA' and library_strategy.__contains__('WXS'):
raise DataException(f'Not allowed WXS library strategy for molecule type: RNA')
if values['type'] == 'RNA' and library_strategy.__contains__('WGS'):
raise DataException(f'Not allowed WGS library strategy for molecule type: RNA')
if values['type'] == 'RNA' and library_strategy.__contains__('DNA-meth_array'):
raise DataException(f'Not allowed DNA-meth_array library strategy for molecule type: RNA')
return library_strategy
class Study(BaseModel):
"""
Study
"""
study_id: str = Field(..., min_length=1, identity=True)
acronym: Optional[str]
title: Optional[str]
datadictionary: Optional[str]
class IndividualStudy(BaseModel):
"""
Study to individual mapping
"""
study_id_individual_study_id: str = Field(..., min_length=1, identity=True)
individual_study_id: str
individual_id: str = Field(..., min_length=1, references='Individual')
study_id: str = Field(..., min_length=1, references='Study')
class Radiology(BaseModel):
"""
Radiology entity: contains metadata about the radiology images
"""
radiology_id: str = Field(..., min_length=1, identity=True)
examination_date: date
image_type: str
field_strength: Optional[str]
individual_id: str = Field(..., min_length=1, references='Individual')
diagnosis_id: Optional[str] = Field(..., min_length=1, references='Diagnosis')
body_part: str
SubjectEntity = Union[Individual, Diagnosis, Biosource, Biomaterial, Radiology]
class CentralSubjectRegistry(BaseModel):
"""
Central subject registry
"""
entity_data: Dict[str, Sequence[Any]]
@staticmethod
def create(entity_data: Dict[str, Sequence[Any]]):
validate_entity_data(entity_data, list(SubjectEntity.__args__))
return CentralSubjectRegistry(entity_data=entity_data)
StudyEntity = Union[Study, IndividualStudy]
class StudyRegistry(BaseModel):
"""
Study registry
"""
entity_data: Dict[str, Sequence[Any]]
@staticmethod
def create(entity_data: Dict[str, Sequence[Any]]):
validate_entity_data(entity_data, list(StudyEntity.__args__))
return StudyRegistry(entity_data=entity_data)