Skip to content

Commit 8f79ec6

Browse files
committed
Add modality log2bf to study object
1 parent 77e19d0 commit 8f79ec6

File tree

1 file changed

+50
-2
lines changed

1 file changed

+50
-2
lines changed

flotilla/data_model/study.py

+50-2
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,7 @@ def plot_classifier(self, trait, sample_subset=None,
10121012
**kwargs)
10131013

10141014
def modality_assignments(self, sample_subset=None, feature_subset=None,
1015-
expression_thresh=-np.inf, min_samples=10):
1015+
expression_thresh=-np.inf, min_samples=20):
10161016
"""Get modality assignments of splicing data
10171017
10181018
Parameters
@@ -1029,7 +1029,8 @@ def modality_assignments(self, sample_subset=None, feature_subset=None,
10291029
Minimum expression value, of the original input. E.g. if the
10301030
original input is already log-transformed, then this threshold is
10311031
on the log values.
1032-
1032+
min_samples : int, optional
1033+
Minimum number of samples per event to calculate a modality
10331034
Returns
10341035
-------
10351036
modalities : pandas.DataFrame
@@ -1053,6 +1054,51 @@ def modality_assignments(self, sample_subset=None, feature_subset=None,
10531054
sample_ids, feature_ids, data=data,
10541055
groupby=self.sample_id_to_phenotype, min_samples=min_samples)
10551056

1057+
def modality_log2bf(self, sample_subset=None, feature_subset=None,
1058+
expression_thresh=-np.inf, min_samples=20):
1059+
"""Get modality assignments of splicing data
1060+
1061+
Parameters
1062+
----------
1063+
sample_subset : str or None, optional
1064+
Which subset of the samples to use, based on some phenotype
1065+
column in the experiment design data. If None, all samples are
1066+
used.
1067+
feature_subset : str or None, optional
1068+
Which subset of the features to used, based on some feature type
1069+
in the expression data (e.g. "variant"). If None, all features
1070+
are used.
1071+
expression_thresh : float, optional
1072+
Minimum expression value, of the original input. E.g. if the
1073+
original input is already log-transformed, then this threshold is
1074+
on the log values.
1075+
min_samples : int, optional
1076+
Minimum number of samples per event to calculate a modality
1077+
1078+
Returns
1079+
-------
1080+
modalities : pandas.DataFrame
1081+
A (n_phenotypes, n_events) shaped DataFrame of the assigned
1082+
modality
1083+
"""
1084+
min_expression = self.expression.data.min().min()
1085+
if expression_thresh > -np.inf and expression_thresh > min_expression:
1086+
data = self.filter_splicing_on_expression(
1087+
expression_thresh=expression_thresh,
1088+
sample_subset=sample_subset)
1089+
sample_ids = None
1090+
feature_ids = None
1091+
else:
1092+
sample_ids = self.sample_subset_to_sample_ids(sample_subset)
1093+
feature_ids = self.feature_subset_to_feature_ids(
1094+
'splicing', feature_subset, rename=False)
1095+
data = None
1096+
1097+
return self.splicing.modality_log2bf(
1098+
sample_ids, feature_ids, data=data,
1099+
groupby=self.sample_id_to_phenotype, min_samples=min_samples)
1100+
1101+
10561102
def modality_counts(self, sample_subset=None, feature_subset=None,
10571103
expression_thresh=-np.inf, min_samples=10):
10581104
"""Get number of splicing events in modality categories
@@ -1071,6 +1117,8 @@ def modality_counts(self, sample_subset=None, feature_subset=None,
10711117
Minimum expression value, of the original input. E.g. if the
10721118
original input is already log-transformed, then this threshold is
10731119
on the log values.
1120+
min_samples : int, optional
1121+
Minimum number of samples per event to calculate a modality
10741122
10751123
Returns
10761124
-------

0 commit comments

Comments
 (0)