diff --git a/bean/annotate/translate_allele.py b/bean/annotate/translate_allele.py index 6310717..42953aa 100755 --- a/bean/annotate/translate_allele.py +++ b/bean/annotate/translate_allele.py @@ -137,7 +137,7 @@ def _parse_range(chr_range: str) -> Tuple[str, int, int]: return (chrom, start, end) -def _parse_description(desc_str: str): +def _parse_description(desc_str: str) -> Tuple[Tuple[str, int, int], str]: """Parse description line of fasta file to get range.""" sstr = desc_str.split(" ") strand = None @@ -153,21 +153,38 @@ def _parse_description(desc_str: str): return genome_range, strand -def get_cds_seq_pos_from_fasta(fasta_file_name: str) -> Tuple[List[str], List[int]]: +def get_cds_seq_pos_from_fasta( + fasta_file_name: str, +) -> Tuple[str, List[str], List[int], str]: """Obtain tuple of lists, first with nucleotide and second with genomic position of the nucleotide.""" exons = list(SeqIO.parse(fasta_file_name, "fasta")) translated_seq = [] genomic_pos = [] + (exon_chrom, exon_start, exon_end), strand = _parse_description( + exons[0].description + ) + if strand == "-": + exons = exons[::-1] for exon in exons: (exon_chrom, exon_start, exon_end), strand = _parse_description( exon.description ) + exon_seq = [] + exon_pos = [] for i, nt in enumerate(str(exon.seq)): if nt.islower(): continue - translated_seq.append(nt) - genomic_pos.append(exon_start + i) - return (exon_chrom, translated_seq, genomic_pos, strand) + exon_seq.append(nt) + if strand == "+": + exon_pos.append(exon_start + i) + else: + exon_pos.append(exon_end - i) + if strand == "-": + exon_seq = revcomp(exon_seq) + exon_pos = exon_pos[::-1] + translated_seq.extend(exon_seq) + genomic_pos.extend(exon_pos) + return exon_chrom, translated_seq, genomic_pos, {"+": 1, "-": -1}[strand] def _translate_single_codon( @@ -207,12 +224,10 @@ def __init__(self): self.genomic_pos: Sequence = [] @classmethod - def from_fasta(cls, fasta_file_name, gene_name, suppressMessage=True): + def from_fasta(cls, fasta_file_name, gene_name=None, suppressMessage=True): cds = cls() - # if fasta_file_name is not None: - # if not suppressMessage: - # raise ValueError("No fasta file provided as reference: using LDLR") - # fasta_file_name = os.path.dirname(be.__file__) + "/annotate/ldlr_exons.fa" + if gene_name is None: + gene_name = os.path.basename(fasta_file_name).upper().split(".FA")[0] if gene_name not in cls.gene_info_dict: chrom, translated_seq, genomic_pos, strand = get_cds_seq_pos_from_fasta( fasta_file_name @@ -252,18 +267,10 @@ def from_gene_name(cls, gene_name, ref_version: str = "GRCh38"): cds.translated_seq = deepcopy(cls.gene_info_dict[gene_name]["translated_seq"]) cds.genomic_pos = cls.gene_info_dict[gene_name]["genomic_pos"] cds.nt = cds.gene_info_dict[gene_name]["translated_seq"] # in sense strand - # print(cds.gene_name + ":" + "".join(cds.nt)) - # if cds.strand == -1: - # cds.nt = revcomp(cds.nt) cds.pos = np.array(cds.genomic_pos) cds.edited_nt = deepcopy(cds.nt) return cds - # def translate(self): - # if self.strand == -1: - # self.edited_nt = revcomp(self.edited_nt) - # self.aa = _translate(self.edited_nt, codon_map) - def _get_relative_nt_pos(self, absolute_pos): """0-based relative position""" nt_relative_pos = np.where(self.pos == absolute_pos)[0] @@ -665,16 +672,16 @@ def annotate_edit( ) edit_info["coding"] = "" edit_info.loc[edit_info.pos.map(lambda s: s.startswith("A")), "coding"] = "coding" - edit_info.loc[ - edit_info.pos.map(lambda s: not s.startswith("A")), "coding" - ] = "noncoding" + edit_info.loc[edit_info.pos.map(lambda s: not s.startswith("A")), "coding"] = ( + "noncoding" + ) if control_tag is not None: - edit_info.loc[ - edit_info.pos.map(lambda s: control_tag in s), "group" - ] = "negctrl" - edit_info.loc[ - edit_info.pos.map(lambda s: control_tag in s), "coding" - ] = "negctrl" + edit_info.loc[edit_info.pos.map(lambda s: control_tag in s), "group"] = ( + "negctrl" + ) + edit_info.loc[edit_info.pos.map(lambda s: control_tag in s), "coding"] = ( + "negctrl" + ) edit_info.loc[ (edit_info.coding == "noncoding") & (edit_info.group != "negctrl"), "int_pos" ] = edit_info.loc[ @@ -691,9 +698,9 @@ def annotate_edit( (edit_info.alt == edit_info.ref) & (edit_info.coding == "coding"), "group" ] = "syn" if splice_sites is not None: - edit_info.loc[ - edit_info.pos.isin(splice_sites.astype(str)), "group" - ] = "splicing" + edit_info.loc[edit_info.pos.isin(splice_sites.astype(str)), "group"] = ( + "splicing" + ) edit_info.loc[edit_info.int_pos < -100, "group"] = "negctrl" edit_info.loc[edit_info.int_pos < -100, "coding"] = "negctrl" return edit_info diff --git a/bean/cli/qc.py b/bean/cli/qc.py index 31ab475..8c0c736 100755 --- a/bean/cli/qc.py +++ b/bean/cli/qc.py @@ -41,8 +41,8 @@ def main(args): posctrl_col=args.posctrl_col, posctrl_val=args.posctrl_val, lfc_thres=args.lfc_thres, - replicate_label=args.replicate_label, - condition_label=args.condition_label, + replicate_label=args.replicate_col, + condition_label=args.condition_col, comp_cond1=args.lfc_cond1, comp_cond2=args.lfc_cond2, ctrl_cond=args.control_condition, @@ -50,6 +50,8 @@ def main(args): recalculate_edits=(not args.dont_recalculate_edits), base_edit_data=args.base_edit_data, remove_bad_replicates=args.remove_bad_replicates, + reporter_length=args.reporter_length, + reporter_right_flank_length=args.reporter_right_flank_length, ), kernel_name="bean_python3", ) diff --git a/bean/framework/ReporterScreen.py b/bean/framework/ReporterScreen.py index 3d4794c..f7e25fd 100755 --- a/bean/framework/ReporterScreen.py +++ b/bean/framework/ReporterScreen.py @@ -356,6 +356,8 @@ def get_edit_mat_from_uns( rel_pos_is_reporter=False, target_pos_col="target_pos", edit_count_key="edit_counts", + reporter_length: int = 32, + reporter_right_flank_length: int = 6, ): """ Get the edit matrix from `.uns[edit_count_key]` to store the result in `.layers["edits"]` @@ -374,6 +376,10 @@ def get_edit_mat_from_uns( target_base_edit = self.target_base_changes if match_target_position is None: match_target_position = not self.tiling + if "reporter_length" in self.uns: + reporter_length = self.uns["reporter_length"] + if "reproter_right_flank_length" in self.uns: + reporter_right_flank_length = self.uns["reporter_right_flank_length"] if edit_count_key not in self.uns or len(self.uns[edit_count_key]) == 0: raise ValueError( "Edit count isn't calculated. " @@ -400,7 +406,9 @@ def get_edit_mat_from_uns( drop=True ) edits["guide_start_pos"] = ( - 32 - 6 - guide_len[edits.guide_idx].reset_index(drop=True) + reporter_length + - reporter_right_flank_length + - guide_len[edits.guide_idx].reset_index(drop=True) ) if not match_target_position: edits["rel_pos"] = edits.edit.map(lambda e: e.rel_pos) diff --git a/bean/notebooks/sample_quality_report.ipynb b/bean/notebooks/sample_quality_report.ipynb index 03153c7..34c2577 100755 --- a/bean/notebooks/sample_quality_report.ipynb +++ b/bean/notebooks/sample_quality_report.ipynb @@ -60,7 +60,9 @@ "recalculate_edits = True\n", "tiling = None\n", "base_edit_data = True\n", - "remove_bad_replicates = False" + "remove_bad_replicates = False\n", + "reporter_length = 32\n", + "reporter_right_flank_length = 6" ] }, { @@ -84,6 +86,8 @@ " tiling = bdata.uns['tiling']\n", "else:\n", " raise ValueError(\"Ambiguous assignment if the screen is a tiling screen. Provide `--tiling=True` or `tiling=False`.\")\n", + "bdata.uns[\"reporter_length\"] = reporter_length\n", + "bdata.uns[\"reporter_right_flank_length\"] = reporter_right_flank_length\n", "if posctrl_col:\n", " if posctrl_col not in bdata.guides.columns:\n", " raise ValueError(f\"--posctrl-col argument '{posctrl_col}' is not present in the input ReporterScreen.guides.columns {bdata.guides.columns}. If you do not want to use positive control gRNA annotation for LFC calculation, feed --posctrl-col='' instead.\")\n", @@ -128,6 +132,18 @@ "bdata.samples" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for qc_col in [\"gini_X\", \"median_corr_X\", f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\",\"mean_editing_rate\", \"mask\"]:\n", + " if qc_col in bdata.samples:\n", + " del bdata.samples[qc_col]\n", + "n_cols_samples = len(bdata.samples.columns)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -384,25 +400,28 @@ "metadata": {}, "outputs": [], "source": [ - "bdata.samples[\"mask\"] = 1\n", + "mdata = bdata.samples.copy()\n", "# Data has positive control\n", - "bdata.samples.loc[\n", + "for col in mdata.columns.tolist():\n", + " mdata[col]=1.0\n", + "\n", + "mdata.loc[\n", " bdata.samples.median_corr_X.isnull() | (bdata.samples.median_corr_X < count_correlation_thres),\n", - " \"mask\",\n", - "] = 0\n", + " \"median_corr_X\",\n", + "] = 0.0\n", "if \"mean_editing_rate\" in bdata.samples.columns.tolist():\n", - " bdata.samples.loc[bdata.samples.mean_editing_rate < edit_rate_thres, \"mask\"] = 0\n", + " mdata.loc[bdata.samples.mean_editing_rate < edit_rate_thres, \"mean_editing_rate\"] = 0\n", "\n", - "bdata.samples.loc[\n", + "mdata.loc[\n", " bdata.samples[f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\"] < lfc_thres,\n", - " \"mask\",\n", - "] = 0\n", + " f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\",\n", + "] = 0.0\n", "if posctrl_col:\n", " print(\"filter with posctrl LFC\")\n", - " bdata.samples.loc[\n", + " mdata.loc[\n", " bdata.samples[f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\"].isnull(),\n", - " \"mask\",\n", - " ] = 0" + " f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\",\n", + " ] = 0.0\n" ] }, { @@ -411,7 +430,19 @@ "metadata": {}, "outputs": [], "source": [ - "bdata.samples.style.background_gradient(cmap=\"coolwarm_r\")" + "from matplotlib import colors\n", + "def b_g(s, cmap='coolwarm_r', low=0, high=1):\n", + " a = mdata.loc[:,s.name].copy()\n", + " if s.name not in mdata.columns.tolist()[n_cols_samples:]:\n", + " a[:] = 1.0\n", + " # rng = a.max() - a.min()\n", + " # norm = colors.Normalize(a.min() - (rng * low),\n", + " # a.max() + (rng * high))\n", + " # normed = norm(a.values)\n", + " c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(a.values)]\n", + " return ['background-color: %s' % color for color in c]\n", + "print(\"Failing QC is shown as red:\")\n", + "bdata.samples.style.apply(b_g)" ] }, { @@ -421,6 +452,10 @@ "outputs": [], "source": [ "# leave replicate with more than 1 sorting bin data\n", + "print(mdata)\n", + "print(n_cols_samples)\n", + "\n", + "bdata.samples[\"mask\"] = mdata.iloc[:,n_cols_samples:].astype(int).all(axis=1).astype(int).tolist()\n", "if remove_bad_replicates:\n", " rep_n_samples = bdata.samples.groupby(replicate_label)[\"mask\"].sum()\n", " print(rep_n_samples)\n", diff --git a/bean/preprocessing/data_class.py b/bean/preprocessing/data_class.py index 5c1f430..ee101b1 100755 --- a/bean/preprocessing/data_class.py +++ b/bean/preprocessing/data_class.py @@ -232,6 +232,8 @@ def get_size_factor(self, X: np.array): assert geom_mean_x.shape == (n_guides,) norm_count = X / geom_mean_x[:, None] size_factor = np.median(norm_count, axis=0) + if any(size_factor == 0): + size_factor = np.mean(norm_count, axis=0) assert size_factor.shape == (n_samples,) return size_factor diff --git a/bean/preprocessing/get_alpha0.py b/bean/preprocessing/get_alpha0.py index f73f454..e5fe5fb 100755 --- a/bean/preprocessing/get_alpha0.py +++ b/bean/preprocessing/get_alpha0.py @@ -100,11 +100,11 @@ def get_fitted_alpha0( a0 = ((n - 1) / (r - 1 + 1 / (1 - p)) - 1).mean(axis=0) x, y = get_valid_vals(n.log(), a0.log()) - if len(y) < 10: + if len(y) < 5: if popt is None: popt = (-1.510, 0.7861) print( - f"Cannot fit log(a0) ~ log(q): data too sparse! Using pre-fitted values [b0, b1]={popt}" + f"Cannot fit log(a0) ~ log(q): data too sparse ({len(y)} valid values)! Using pre-fitted values [b0, b1]={popt}" ) else: popt, pcov = curve_fit(linear, x, y) diff --git a/bean/preprocessing/utils.py b/bean/preprocessing/utils.py index 42fc101..4e3d290 100755 --- a/bean/preprocessing/utils.py +++ b/bean/preprocessing/utils.py @@ -26,17 +26,31 @@ def prepare_bdata(bdata: be.ReporterScreen, args, warn, prefix: str): bdata = bdata.copy() bdata.samples["replicate"] = bdata.samples[args.replicate_col].astype("category") bdata.guides = bdata.guides.loc[:, ~bdata.guides.columns.duplicated()].copy() + + # filter out 0-count gRNAs & samples + if args.selection == "sorting" or args.exclude_control_condition_for_inference: + bdata_test = bdata[ + :, bdata.samples[args.condition_col] != args.control_condition + ] + else: + bdata_test = bdata + if any(bdata_test.X.sum(axis=1) == 0): + warn( + f"Filtering out {sum(bdata_test.X.sum(axis=1) == 0)} gRNAs without any counts over all samples." + ) + bdata = bdata[bdata_test.X.sum(axis=1) > 0, :] + if any(bdata[:, bdata.samples.mask == 1].X.sum(axis=0) == 0): + raise ValueError( + f"Sample {bdata.samples.index[(bdata.samples.mask == 1) & (bdata[:,bdata.samples.mask == 1].X.sum(axis=0) == 0)]} has 0 counts. Make sure you mask that sample." + ) + if args.library_design == "variant": if bdata.guides[args.target_col].isnull().any(): raise ValueError( f"Some target column (bdata.guides[{args.target_col}]) value is null. Check your input file." ) bdata = bdata[bdata.guides[args.target_col].argsort(), :] - if any(bdata.X.sum(axis=1) > 0): - warn( - f"Filtering out {sum(bdata.X.sum(axis=1) > 0)} gRNAs without any counts over all samples." - ) - bdata = bdata[bdata.X.sum(axis=1) > 0, :] + n_no_support_targets, bdata = filter_no_info_target( bdata, condit_col=args.condition_col, diff --git a/bean/qc/parser.py b/bean/qc/parser.py index dd41f66..69c4c8e 100755 --- a/bean/qc/parser.py +++ b/bean/qc/parser.py @@ -84,7 +84,7 @@ def parse_args(parser=None): help="Specify that the guide library is tiling library without 'n guides per target' design", ) input_parser.add_argument( - "--replicate-label", + "--replicate-col", help="Label of column in `bdata.samples` that describes replicate ID.", type=str, default="replicate", @@ -96,7 +96,7 @@ def parse_args(parser=None): default=None, ) input_parser.add_argument( - "--condition-label", + "--condition-col", help="Label of column in `bdata.samples` that describes experimental condition. (sorting bin, time, etc.)", type=str, default="condition", @@ -117,11 +117,13 @@ def parse_args(parser=None): "--edit-start-pos", help="Edit start position to quantify editing rate on, 0-based inclusive.", default=2, + type=int, ) input_parser.add_argument( "--edit-end-pos", help="Edit end position to quantify editing rate on, 0-based exclusive.", default=7, + type=int, ) input_parser.add_argument( @@ -149,5 +151,16 @@ def parse_args(parser=None): type=str, default="bulk", ) - + parser.add_argument( + "--reporter-length", + type=int, + default=32, + help="Length of reporter sequence in the construct.", + ) + parser.add_argument( + "--reporter-right-flank-length", + type=int, + default=6, + help="Length of the right-flanking nucleotides of protospacer in the reporter.", + ) return parser diff --git a/bean/qc/utils.py b/bean/qc/utils.py index b56738a..7ca4b90 100755 --- a/bean/qc/utils.py +++ b/bean/qc/utils.py @@ -3,9 +3,39 @@ import pandas as pd from copy import deepcopy from bean.framework.ReporterScreen import ReporterScreen, concat +import bean as be def check_args(args): + bdata = be.read_h5ad(args.bdata_path) + if args.replicate_col not in bdata.samples.columns: + raise ValueError( + f"Specified --replicate-col `{args.replicate_col}` does not exist in ReporterScreen.samples.columns ({bdata.samples.columns}). Please check your input." + ) + if args.condition_col not in bdata.samples.columns: + raise ValueError( + f"Specified --condition-col `{args.condition_col}` does not exist in ReporterScreen.samples.columns ({bdata.samples.columns}). Please check your input." + ) + if not bdata.tiling and args.target_pos_col not in bdata.guides.columns: + raise ValueError( + f"Specified --target-pos-col `{args.target_pos_col}` does not exist in ReporterScreen.guides.columns ({bdata.guides.columns}). Please check your input." + ) + if args.posctrl_col and args.posctrl_col not in bdata.guides.columns: + raise ValueError( + f"Specified --posctrl-col `{args.posctrl_col}` does not exist in ReporterScreen.guides.columns ({bdata.guides.columns}). Please check your input." + ) + if ( + args.posctrl_col + and args.posctrl_val not in bdata.guides[args.posctrl_col].tolist() + ): + raise ValueError( + f"Specified --control-condition `{args.posctrl_val}` does not exist in ReporterScreen.guides[{args.posctrl_col}] ({bdata.guides[args.posctrl_col]}). Please check your input." + ) + if args.control_condition not in bdata.samples[args.condition_col].tolist(): + raise ValueError( + f"Specified --control-condition `{args.control_condition}` does not exist in ReporterScreen.samples[{args.condition_col}] :\n{bdata.samples[args.condition_col]}.\n Please check your input. \nFeed the condition where the editing rate would be quantified as the --control-condition argument, usually the condition with the least selection. (Closest to T0 for survival, pre-sort or bulk for sorting screens)." + ) + lfc_conds = args.lfc_conds.split(",") if not len(lfc_conds) == 2: raise ValueError( @@ -13,6 +43,14 @@ def check_args(args): ) args.lfc_cond1 = lfc_conds[0] args.lfc_cond2 = lfc_conds[1] + if args.lfc_cond1 not in bdata.samples[args.condition_col].tolist(): + raise ValueError( + f"Specified --lfc-conds `{args.lfc_cond1}` does not exist in ReporterScreen.samples[{args.condition_col}]:\n{bdata.samples[args.condition_col]}. Please check your input." + ) + if args.lfc_cond2 not in bdata.samples[args.condition_col].tolist(): + raise ValueError( + f"Specified --lfc-conds `{args.lfc_cond2}` does not exist in ReporterScreen.samples[{args.condition_col}]:\n{bdata.samples[args.condition_col]}. Please check your input." + ) if args.sample_covariates is not None: if "," in args.sample_covariates: args.sample_covariates = args.sample_covariates.split(",") diff --git a/docs/exon_fa_format.md b/docs/exon_fa_format.md index 2498b39..4902a41 100755 --- a/docs/exon_fa_format.md +++ b/docs/exon_fa_format.md @@ -1,5 +1,5 @@ # Input .fa file format for `bean-filter` -You can provide custom FASTA file with exon sequence entries. Currently only supports positive strand genes. +You can provide custom FASTA file with exon sequence entries. * Exon FASTA files can be downloaded from UCSC Genomic sequences / Table Browser: [see the instruction video](https://www.youtube.com/watch?v=T4E0Ez5Vjz8) * You can manually format as: diff --git a/setup.py b/setup.py index a747c45..5b83055 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="crispr-bean", - version="1.2.2", + version="1.2.4", python_requires=">=3.8.0", author="Jayoung Ryu", author_email="jayoung_ryu@g.harvard.edu", diff --git a/tests/data/abca1.fa b/tests/data/abca1.fa new file mode 100644 index 0000000..dbf6829 --- /dev/null +++ b/tests/data/abca1.fa @@ -0,0 +1,280 @@ +>hg38_knownGene_ENST00000374736.8_0 range=chr9:104927935-104928155 5'pad=0 3'pad=0 strand=- repeatMasking=none +gtaattgcgagcgagagtgagtggggccgggacccgcagagccgagccga +cccttctctcccgggctgcggcagggcagggcggggagctccgcgcacca +acagagccggttctcagggcgctttgctccttgttttttccccggttctg +ttttctccccttctccggaaggcttgtcaaggggtaggagaaagagacgc +aaacacaaaagtggaaaacag +>hg38_knownGene_ENST00000374736.8_1 range=chr9:104903614-104903771 5'pad=0 3'pad=0 strand=- repeatMasking=none +ttaatgaccagccacggcgtccctgctgtgagctctggccgctgccttcc +agggctcccgagccacacgctgggggtgctggctgagggaacATGGCTTG +TTGGCCTCAGCTGAGGTTGCTGCTGTGGAAGAACCTCACTTTCAGAAGAA +GACAAACA +>hg38_knownGene_ENST00000374736.8_2 range=chr9:104889102-104889195 5'pad=0 3'pad=0 strand=- repeatMasking=none +TGTCAGCTGCTGCTGGAAGTGGCCTGGCCTCTATTTATCTTCCTGATCCT +GATCTCTGTTCGGCTGAGCTACCCACCCTATGAACAACATGAAT +>hg38_knownGene_ENST00000374736.8_3 range=chr9:104884427-104884568 5'pad=0 3'pad=0 strand=- repeatMasking=none +GCCATTTTCCAAATAAAGCCATGCCCTCTGCAGGAACACTTCCTTGGGTT +CAGGGGATTATCTGTAATGCCAACAACCCCTGTTTCCGTTACCCGACTCC +TGGGGAGGCTCCCGGAGTTGTTGGAAACTTTAACAAATCCAT +>hg38_knownGene_ENST00000374736.8_4 range=chr9:104883039-104883157 5'pad=0 3'pad=0 strand=- repeatMasking=none +TGTGGCTCGCCTGTTCTCAGATGCTCGGAGGCTTCTTTTATACAGCCAGA +AAGACACCAGCATGAAGGACATGCGCAAAGTTCTGAGAACATTACAGCAG +ATCAAGAAATCCAGCTCAA +>hg38_knownGene_ENST00000374736.8_5 range=chr9:104861679-104861800 5'pad=0 3'pad=0 strand=- repeatMasking=none +ACTTGAAGCTTCAAGATTTCCTGGTGGACAATGAAACCTTCTCTGGGTTC +CTGTATCACAACCTCTCTCTCCCAAAGTCTACTGTGGACAAGATGCTGAG +GGCTGATGTCATTCTCCACAAG +>hg38_knownGene_ENST00000374736.8_6 range=chr9:104858522-104858698 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTATTTTTGCAAGGCTACCAGTTACATTTGACAAGTCTGTGCAATGGATC +AAAATCAGAAGAGATGATTCAACTTGGTGACCAAGAAGTTTCTGAGCTTT +GTGGCCTACCAAGGGAGAAACTGGCTGCAGCAGAGCGAGTACTTCGTTCC +AACATGGACATCCTGAAGCCAATCCTG +>hg38_knownGene_ENST00000374736.8_7 range=chr9:104845477-104845569 5'pad=0 3'pad=0 strand=- repeatMasking=none +AGAACACTAAACTCTACATCTCCCTTCCCGAGCAAGGAGCTGGCTGAAGC +CACAAAAACATTGCTGCATAGTCTTGGGACTCTGGCCCAGGAG +>hg38_knownGene_ENST00000374736.8_8 range=chr9:104840279-104840519 5'pad=0 3'pad=0 strand=- repeatMasking=none +CTGTTCAGCATGAGAAGCTGGAGTGACATGCGACAGGAGGTGATGTTTCT +GACCAATGTGAACAGCTCCAGCTCCTCCACCCAAATCTACCAGGCTGTGT +CTCGTATTGTCTGCGGGCATCCCGAGGGAGGGGGGCTGAAGATCAAGTCT +CTCAACTGGTATGAGGACAACAACTACAAAGCCCTCTTTGGAGGCAATGG +CACTGAGGAAGATGCTGAAACCTTCTATGACAACTCTACAA +>hg38_knownGene_ENST00000374736.8_9 range=chr9:104837428-104837567 5'pad=0 3'pad=0 strand=- repeatMasking=none +CTCCTTACTGCAATGATTTGATGAAGAATTTGGAGTCTAGTCCTCTTTCC +CGCATTATCTGGAAAGCTCTGAAGCCGCTGCTCGTTGGGAAGATCCTGTA +TACACCTGACACTCCAGCCACAAGGCAGGTCATGGCTGAG +>hg38_knownGene_ENST00000374736.8_10 range=chr9:104836980-104837096 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTGAACAAGACCTTCCAGGAACTGGCTGTGTTCCATGATCTGGAAGGCAT +GTGGGAGGAACTCAGCCCCAAGATCTGGACCTTCATGGAGAACAGCCAAG +AAATGGACCTTGTCCGG +>hg38_knownGene_ENST00000374736.8_11 range=chr9:104832574-104832771 5'pad=0 3'pad=0 strand=- repeatMasking=none +ATGCTGTTGGACAGCAGGGACAATGACCACTTTTGGGAACAGCAGTTGGA +TGGCTTAGATTGGACAGCCCAAGACATCGTGGCGTTTTTGGCCAAGCACC +CAGAGGATGTCCAGTCCAGTAATGGTTCTGTGTACACCTGGAGAGAAGCT +TTCAACGAGACTAACCAGGCAATCCGGACCATATCTCGCTTCATGGAG +>hg38_knownGene_ENST00000374736.8_12 range=chr9:104831622-104831827 5'pad=0 3'pad=0 strand=- repeatMasking=none +TGTGTCAACCTGAACAAGCTAGAACCCATAGCAACAGAAGTCTGGCTCAT +CAACAAGTCCATGGAGCTGCTGGATGAGAGGAAGTTCTGGGCTGGTATTG +TGTTCACTGGAATTACTCCAGGCAGCATTGAGCTGCCCCATCATGTCAAG +TACAAGATCCGAATGGACATTGACAATGTGGAGAGGACAAATAAAATCAA +GGATGG +>hg38_knownGene_ENST00000374736.8_13 range=chr9:104830925-104831101 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTACTGGGACCCTGGTCCTCGAGCTGACCCCTTTGAGGACATGCGGTACG +TCTGGGGGGGCTTCGCCTACTTGCAGGATGTGGTGGAGCAGGCAATCATC +AGGGTGCTGACGGGCACCGAGAAGAAAACTGGTGTCTATATGCAACAGAT +GCCCTATCCCTGTTACGTTGATGACAT +>hg38_knownGene_ENST00000374736.8_14 range=chr9:104828916-104829138 5'pad=0 3'pad=0 strand=- repeatMasking=none +CTTTCTGCGGGTGATGAGCCGGTCAATGCCCCTCTTCATGACGCTGGCCT +GGATTTACTCAGTGGCTGTGATCATCAAGGGCATCGTGTATGAGAAGGAG +GCACGGCTGAAAGAGACCATGCGGATCATGGGCCTGGACAACAGCATCCT +CTGGTTTAGCTGGTTCATTAGTAGCCTCATTCCTCTTCTTGTGAGCGCTG +GCCTGCTAGTGGTCATCCTGAAG +>hg38_knownGene_ENST00000374736.8_15 range=chr9:104826948-104827169 5'pad=0 3'pad=0 strand=- repeatMasking=none +TTAGGAAACCTGCTGCCCTACAGTGATCCCAGCGTGGTGTTTGTCTTCCT +GTCCGTGTTTGCTGTGGTGACAATCCTGCAGTGCTTCCTGATTAGCACAC +TCTTCTCCAGAGCCAACCTGGCAGCAGCCTGTGGGGGCATCATCTACTTC +ACGCTGTACCTGCCCTACGTCCTGTGTGTGGCATGGCAGGACTACGTGGG +CTTCACACTCAAGATCTTCGCT +>hg38_knownGene_ENST00000374736.8_16 range=chr9:104825683-104825887 5'pad=0 3'pad=0 strand=- repeatMasking=none +AGCCTGCTGTCTCCTGTGGCTTTTGGGTTTGGCTGTGAGTACTTTGCCCT +TTTTGAGGAGCAGGGCATTGGAGTGCAGTGGGACAACCTGTTTGAGAGTC +CTGTGGAGGAAGATGGCTTCAATCTCACCACTTCGGTCTCCATGATGCTG +TTTGACACCTTCCTCTATGGGGTGATGACCTGGTACATTGAGGCTGTCTT +TCCAG +>hg38_knownGene_ENST00000374736.8_17 range=chr9:104824465-104824578 5'pad=0 3'pad=0 strand=- repeatMasking=none +GCCAGTACGGAATTCCCAGGCCCTGGTATTTTCCTTGCACCAAGTCCTAC +TGGTTTGGCGAGGAAAGTGATGAGAAGAGCCACCCTGGTTCCAACCAGAA +GAGAATATCAGAAA +>hg38_knownGene_ENST00000374736.8_18 range=chr9:104822496-104822667 5'pad=0 3'pad=0 strand=- repeatMasking=none +TCTGCATGGAGGAGGAACCCACCCACTTGAAGCTGGGCGTGTCCATTCAG +AACCTGGTAAAAGTCTACCGAGATGGGATGAAGGTGGCTGTCGATGGCCT +GGCACTGAATTTTTATGAGGGCCAGATCACCTCCTTCCTGGGCCACAATG +GAGCGGGGAAGACGACCACCAT +>hg38_knownGene_ENST00000374736.8_19 range=chr9:104821375-104821506 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTCAATCCTGACCGGGTTGTTCCCCCCGACCTCGGGCACCGCCTACATCC +TGGGAAAAGACATTCGCTCTGAGATGAGCACCATCCGGCAGAACCTGGGG +GTCTGTCCCCAGCATAACGTGCTGTTTGACAT +>hg38_knownGene_ENST00000374736.8_20 range=chr9:104819927-104820069 5'pad=0 3'pad=0 strand=- repeatMasking=none +GCTGACTGTCGAAGAACACATCTGGTTCTATGCCCGCTTGAAAGGGCTCT +CTGAGAAGCACGTGAAGGCGGAGATGGAGCAGATGGCCCTGGATGTTGGT +TTGCCATCAAGCAAGCTGAAAAGCAAAACAAGCCAGCTGTCAG +>hg38_knownGene_ENST00000374736.8_21 range=chr9:104819586-104819723 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTGGAATGCAGAGAAAGCTATCTGTGGCCTTGGCCTTTGTCGGGGGATCT +AAGGTTGTCATTCTGGATGAACCCACAGCTGGTGTGGACCCTTACTCCCG +CAGGGGAATATGGGAGCTGCTGCTGAAATACCGACAAG +>hg38_knownGene_ENST00000374736.8_22 range=chr9:104818663-104818883 5'pad=0 3'pad=0 strand=- repeatMasking=none +GCCGCACCATTATTCTCTCTACACACCACATGGATGAAGCGGACGTCCTG +GGGGACAGGATTGCCATCATCTCCCATGGGAAGCTGTGCTGTGTGGGCTC +CTCCCTGTTTCTGAAGAACCAGCTGGGAACAGGCTACTACCTGACCTTGG +TCAAGAAAGATGTGGAATCCTCCCTCAGTTCCTGCAGAAACAGTAGTAGC +ACTGTGTCATACCTGAAAAAG +>hg38_knownGene_ENST00000374736.8_23 range=chr9:104817332-104817404 5'pad=0 3'pad=0 strand=- repeatMasking=none +GAGGACAGTGTTTCTCAGAGCAGTTCTGATGCTGGCCTGGGCAGCGACCA +TGAGAGTGACACGCTGACCATCG +>hg38_knownGene_ENST00000374736.8_24 range=chr9:104816143-104816345 5'pad=0 3'pad=0 strand=- repeatMasking=none +ATGTCTCTGCTATCTCCAACCTCATCAGGAAGCATGTGTCTGAAGCCCGG +CTGGTGGAAGACATAGGGCATGAGCTGACCTATGTGCTGCCATATGAAGC +TGCTAAGGAGGGAGCCTTTGTGGAACTCTTTCATGAGATTGATGACCGGC +TCTCAGACCTGGGCATTTCTAGTTATGGCATCTCAGAGACGACCCTGGAA +GAA +>hg38_knownGene_ENST00000374736.8_25 range=chr9:104814427-104814475 5'pad=0 3'pad=0 strand=- repeatMasking=none +ATATTCCTCAAGGTGGCCGAAGAGAGTGGGGTGGATGCTGAGACCTCAG +>hg38_knownGene_ENST00000374736.8_26 range=chr9:104814118-104814231 5'pad=0 3'pad=0 strand=- repeatMasking=none +ATGGTACCTTGCCAGCAAGACGAAACAGGCGGGCCTTCGGGGACAAGCAG +AGCTGTCTTCGCCCGTTCACTGAAGATGATGCTGCTGATCCAAATGATTC +TGACATAGACCCAG +>hg38_knownGene_ENST00000374736.8_27 range=chr9:104812574-104812722 5'pad=0 3'pad=0 strand=- repeatMasking=none +AATCCAGAGAGACAGACTTGCTCAGTGGGATGGATGGCAAAGGGTCCTAC +CAGGTGAAAGGCTGGAAACTTACACAGCAACAGTTTGTGGCCCTTTTGTG +GAAGAGACTGCTAATTGCCAGACGGAGTCGGAAAGGATTTTTTGCTCAG +>hg38_knownGene_ENST00000374736.8_28 range=chr9:104810800-104810924 5'pad=0 3'pad=0 strand=- repeatMasking=none +ATTGTCTTGCCAGCTGTGTTTGTCTGCATTGCCCTTGTGTTCAGCCTGAT +CGTGCCACCCTTTGGCAAGTACCCCAGCCTGGAACTTCAGCCCTGGATGT +ACAACGAACAGTACACATTTGTCAG +>hg38_knownGene_ENST00000374736.8_29 range=chr9:104809466-104809564 5'pad=0 3'pad=0 strand=- repeatMasking=none +CAATGATGCTCCTGAGGACACGGGAACCCTGGAACTCTTAAACGCCCTCA +CCAAAGACCCTGGCTTCGGGACCCGCTGTATGGAAGGAAACCCAATCCC +>hg38_knownGene_ENST00000374736.8_30 range=chr9:104806241-104806430 5'pad=0 3'pad=0 strand=- repeatMasking=none +AGACACGCCCTGCCAGGCAGGGGAGGAAGAGTGGACCACTGCCCCAGTTC +CCCAGACCATCATGGACCTCTTCCAGAATGGGAACTGGACAATGCAGAAC +CCTTCACCTGCATGCCAGTGTAGCAGCGACAAAATCAAGAAGATGCTGCC +TGTGTGTCCCCCAGGGGCAGGGGGGCTGCCTCCTCCACAA +>hg38_knownGene_ENST00000374736.8_31 range=chr9:104804626-104804720 5'pad=0 3'pad=0 strand=- repeatMasking=none +AGAAAACAAAACACTGCAGATATCCTTCAGGACCTGACAGGAAGAAACAT +TTCGGATTATCTGGTGAAGACGTATGTGCAGATCATAGCCAAAAG +>hg38_knownGene_ENST00000374736.8_32 range=chr9:104803284-104803316 5'pad=0 3'pad=0 strand=- repeatMasking=none +CTTAAAGAACAAGATCTGGGTGAATGAGTTTAG +>hg38_knownGene_ENST00000374736.8_33 range=chr9:104802054-104802159 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTATGGCGGCTTTTCCCTGGGTGTCAGTAATACTCAAGCACTTCCTCCGA +GTCAAGAAGTTAATGATGCCATCAAACAAATGAAGAAACACCTAAAGCTG +GCCAAG +>hg38_knownGene_ENST00000374736.8_34 range=chr9:104800510-104800584 5'pad=0 3'pad=0 strand=- repeatMasking=none +GACAGTTCTGCAGATCGATTTCTCAACAGCTTGGGAAGATTTATGACAGG +ACTGGACACCAAAAATAATGTCAAG +>hg38_knownGene_ENST00000374736.8_35 range=chr9:104799819-104799988 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTGTGGTTCAATAACAAGGGCTGGCATGCAATCAGCTCTTTCCTGAATGT +CATCAACAATGCCATTCTCCGGGCCAACCTGCAAAAGGGAGAGAACCCTA +GCCATTATGGAATTACTGCTTTCAATCATCCCCTGAATCTCACCAAGCAG +CAGCTCTCAGAGGTGGCTCT +>hg38_knownGene_ENST00000374736.8_36 range=chr9:104798421-104798598 5'pad=0 3'pad=0 strand=- repeatMasking=none +GATGACCACATCAGTGGATGTCCTTGTGTCCATCTGTGTCATCTTTGCAA +TGTCCTTCGTCCCAGCCAGCTTTGTCGTATTCCTGATCCAGGAGCGGGTC +AGCAAAGCAAAACACCTGCAGTTCATCAGTGGAGTGAAGCCTGTCATCTA +CTGGCTCTCTAATTTTGTCTGGGATATG +>hg38_knownGene_ENST00000374736.8_37 range=chr9:104796309-104796424 5'pad=0 3'pad=0 strand=- repeatMasking=none +TGCAATTACGTTGTCCCTGCCACACTGGTCATTATCATCTTCATCTGCTT +CCAGCAGAAGTCCTATGTGTCCTCCACCAATCTGCCTGTGCTAGCCCTTC +TACTTTTGCTGTATGG +>hg38_knownGene_ENST00000374736.8_38 range=chr9:104796053-104796197 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTGGTCAATCACACCTCTCATGTACCCAGCCTCCTTTGTGTTCAAGATCC +CCAGCACAGCCTATGTGGTGCTCACCAGCGTGAACCTCTTCATTGGCATT +AATGGCAGCGTGGCCACCTTTGTGCTGGAGCTGTTCACCGACAAT +>hg38_knownGene_ENST00000374736.8_39 range=chr9:104794387-104794510 5'pad=0 3'pad=0 strand=- repeatMasking=none +AAGCTGAATAATATCAATGATATCCTGAAGTCCGTGTTCTTGATCTTCCC +ACATTTTTGCCTGGGACGAGGGCTCATCGACATGGTGAAAAACCAGGCAA +TGGCTGATGCCCTGGAAAGGTTTG +>hg38_knownGene_ENST00000374736.8_40 range=chr9:104793171-104793300 5'pad=0 3'pad=0 strand=- repeatMasking=none +GGGAGAATCGCTTTGTGTCACCATTATCTTGGGACTTGGTGGGACGAAAC +CTCTTCGCCATGGCCGTGGAAGGGGTGGTGTTCTTCCTCATTACTGTTCT +GATCCAGTACAGATTCTTCATCAGGCCCAG +>hg38_knownGene_ENST00000374736.8_41 range=chr9:104792786-104792906 5'pad=0 3'pad=0 strand=- repeatMasking=none +ACCTGTAAATGCAAAGCTATCTCCTCTGAATGATGAAGATGAAGATGTGA +GGCGGGAAAGACAGAGAATTCTTGATGGTGGAGGCCAGAATGACATCTTA +GAAATCAAGGAGTTGACGAAG +>hg38_knownGene_ENST00000374736.8_42 range=chr9:104791936-104791998 5'pad=0 3'pad=0 strand=- repeatMasking=none +ATATATAGAAGGAAGCGGAAGCCTGCTGTTGACAGGATTTGCGTGGGCAT +TCCTCCTGGTGAG +>hg38_knownGene_ENST00000374736.8_43 range=chr9:104790922-104791028 5'pad=0 3'pad=0 strand=- repeatMasking=none +TGCTTTGGGCTCCTGGGAGTTAATGGGGCTGGAAAATCATCAACTTTCAA +GATGTTAACAGGAGATACCACTGTTACCAGAGGAGATGCTTTCCTTAACA +AAAATAG +>hg38_knownGene_ENST00000374736.8_44 range=chr9:104788426-104788567 5'pad=0 3'pad=0 strand=- repeatMasking=none +TATCTTATCAAACATCCATGAAGTACATCAGAACATGGGCTACTGCCCTC +AGTTTGATGCCATCACAGAGCTGTTGACTGGGAGAGAACACGTGGAGTTC +TTTGCCCTTTTGAGAGGAGTCCCAGAGAAAGAAGTTGGCAAG +>hg38_knownGene_ENST00000374736.8_45 range=chr9:104787920-104788054 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTTGGTGAGTGGGCGATTCGGAAACTGGGCCTCGTGAAGTATGGAGAAAA +ATATGCTGGTAACTATAGTGGAGGCAACAAACGCAAGCTCTCTACAGCCA +TGGCTTTGATCGGCGGGCCTCCTGTGGTGTTTCTG +>hg38_knownGene_ENST00000374736.8_46 range=chr9:104786873-104786976 5'pad=0 3'pad=0 strand=- repeatMasking=none +GATGAACCCACCACAGGCATGGATCCCAAAGCCCGGCGGTTCTTGTGGAA +TTGTGCCCTAAGTGTTGTCAAGGAGGGGAGATCAGTAGTGCTTACATCTC +ATAG +>hg38_knownGene_ENST00000374736.8_47 range=chr9:104786298-104786390 5'pad=0 3'pad=0 strand=- repeatMasking=none +TATGGAAGAATGTGAAGCTCTTTGCACTAGGATGGCAATCATGGTCAATG +GAAGGTTCAGGTGCCTTGGCAGTGTCCAGCATCTAAAAAATAG +>hg38_knownGene_ENST00000374736.8_48 range=chr9:104785396-104785639 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTTTGGAGATGGTTATACAATAGTTGTACGAATAGCAGGGTCCAACCCGG +ACCTGAAGCCTGTCCAGGATTTCTTTGGACTTGCATTTCCTGGAAGTGTT +CTAAAAGAGAAACACCGGAACATGCTACAATACCAGCTTCCATCTTCATT +ATCTTCTCTGGCCAGGATATTCAGCATCCTCTCCCAGAGCAAAAAGCGAC +TCCACATAGAAGACTACTCTGTTTCTCAGACAACACTTGACCAA +>hg38_knownGene_ENST00000374736.8_49 range=chr9:104781006-104784455 5'pad=0 3'pad=0 strand=- repeatMasking=none +GTATTTGTGAACTTTGCCAAGGACCAAAGTGATGATGACCACTTAAAAGA +CCTCTCATTACACAAAAACCAGACAGTAGTGGACGTTGCAGTTCTCACAT +CTTTTCTACAGGATGAGAAAGTGAAAGAAAGCTATGTATGAagaatcctg +ttcatacggggtggctgaaagtaaagaggaactagactttcctttgcacc +atgtgaagtgttgtggagaaaagagccagaagttgatgtgggaagaagta +aactggatactgtactgatactattcaatgcaatgcaattcaatgcaatg +aaaacaaaattccattacaggggcagtgcctttgtagcctatgtcttgta +tggctctcaagtgaaagacttgaatttagttttttacctatacctatgtg +aaactctattatggaacccaatggacatatgggtttgaactcacactttt +ttttttttttttgttcctgtgtattctcattggggttgcaacaataattc +atcaagtaatcatggccagcgattattgatcaaaatcaaaaggtaatgca +catcctcattcactaagccatgccatgcccaggagactggtttcccggtg +acacatccattgctggcaatgagtgtgccagagttattagtgccaagttt +ttcagaaagtttgaagcaccatggtgtgtcatgctcacttttgtgaaagc +tgctctgctcagagtctatcaacattgaatatcagttgacagaatggtgc +catgcgtggctaacatcctgctttgattccctctgataagctgttctggt +ggcagtaacatgcaacaaaaatgtgggtgtctccaggcacgggaaacttg +gttccattgttatattgtcctatgcttcgagccatgggtctacagggtca +tccttatgagactcttaaatatacttagatcctggtaagaggcaaagaat +caacagccaaactgctggggctgcaagctgctgaagccagggcatgggat +taaagagattgtgcgttcaaacctagggaagcctgtgcccatttgtcctg +actgtctgctaacatggtacactgcatctcaagatgtttatctgacacaa +gtgtattatttctggctttttgaattaatctagaaaatgaaaagatggag +ttgtattttgacaaaaatgtttgtactttttaatgttatttggaatttta +agttctatcagtgacttctgaatccttagaatggcctctttgtagaaccc +tgtggtatagaggagtatggccactgccccactatttttattttcttatg +taagtttgcatatcagtcatgactagtgcctagaaagcaatgtgatggtc +aggatctcatgacattatatttgagtttctttcagatcatttaggatact +cttaatctcacttcatcaatcaaatattttttgagtgtatgctgtagctg +aaagagtatgtacgtacgtataagactagagagatattaagtctcagtac +acttcctgtgccatgttattcagctcactggtttacaaatataggttgtc +ttgtggttgtaggagcccactgtaacaatactgggcagcctttttttttt +tttttttaattgcaacaatgcaaaagccaagaaagtataagggtcacaag +tctaaacaatgaattcttcaacagggaaaacagctagcttgaaaacttgc +tgaaaaacacaacttgtgtttatggcatttagtaccttcaaataattggc +tttgcagatattggataccccattaaatctgacagtctcaaatttttcat +ctcttcaatcactagtcaagaaaaatataaaaacaacaaatacttccata +tggagcatttttcagagttttctaacccagtcttatttttctagtcagta +aacatttgtaaaaatactgtttcactaatacttactgttaactgtcttga +gagaaaagaaaaatatgagagaactattgtttggggaagttcaagtgatc +tttcaatatcattactaacttcttccactttttccagaatttgaatatta +acgctaaaggtgtaagacttcagatttcaaattaatctttctatattttt +taaatttacagaatattatataacccactgctgaaaaagaaaaaaatgat +tgttttagaagttaaagtcaatattgattttaaatataagtaatgaaggc +atatttccaataactagtgatatggcatcgttgcattttacagtatcttc +aaaaatacagaatttatagaataatttctcctcatttaatatttttcaaa +atcaaagttatggtttcctcattttactaaaatcgtattctaattcttca +ttatagtaaatctatgagcaactccttacttcggttcctctgatttcaag +gccatattttaaaaaatcaaaaggcactgtgaactattttgaagaaaaca +caacattttaatacagattgaaaggacctcttctgaagctagaaacaatc +tatagttatacatcttcattaatactgtgttaccttttaaaatagtaatt +ttttacattttcctgtgtaaacctaattgtggtagaaatttttaccaact +ctatactcaatcaagcaaaatttctgtatattccctgtggaatgtaccta +tgtgagtttcagaaattctcaaaatacgtgttcaaaaatttctgcttttg +catctttgggacacctcagaaaacttattaacaactgtgaatatgagaaa +tacagaagaaaataataagccctctatacataaatgcccagcacaattca +ttgttaaaaaacaaccaaacctcacactactgtatttcattatctgtact +gaaagcaaatgctttgtgactattaaatgttgcacatcattcattcactg +tatagtaatcattgactaaagccatttgtctgtgttttcttcttgtggtt +gtatatatcaggtaaaatattttccaaagagccatgtgtcatgtaatact +gaaccactttgatattgagacattaatttgtacccttgttattatctact +agtaataatgtaatactgtagaaatattgctctaattcttttcaaaattg +ttgcatcccccttagaatgtttctatttccataaggatttaggtatgcta +ttatcccttcttataccctaagatgaagctgtttttgtgctctttgttca +tcattggccctcattccaagcactttacgctgtctgtaatgggatctatt +tttgcactggaatatctgagaattgcaaaactagacaaaagtttcacaac +agatttctaagttaaatcattttcattaaaaggaaaaaagaaaaaaaatt +ttgtatgtcaataactttatatgaagtattaaaatgcatatttctatgtt +gtaatataatgagtcacaaaataaagctgtgacagttctgttggtctaca \ No newline at end of file diff --git a/tests/test_filter.py b/tests/test_filter.py index a57ce81..6d3d6d2 100755 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,6 +1,10 @@ import pytest import subprocess from bean.annotate.translate_allele import CDS +from bean.annotate.translate_allele import ( + get_cds_seq_pos_from_gene_name, + get_cds_seq_pos_from_fasta, +) @pytest.mark.order(311) @@ -76,3 +80,26 @@ def test_translate_aa(): abca1 = CDS.from_gene_name("ABCA1") allele_str = "chr9:104903664:0:-:C>T" assert str(abca1.get_aa_change(allele_str)) == "ABCA1:6:Q>*|", allele_str + + +def test_pos_fasta(): + chrom, translated_seq, genomic_pos, strand = get_cds_seq_pos_from_fasta( + "tests/data/abca1.fa" + ) + chrom2, translated_seq2, genomic_pos2, strand2 = get_cds_seq_pos_from_gene_name( + "ABCA1" + ) + assert chrom == chrom2 + assert translated_seq == translated_seq2 + assert genomic_pos == genomic_pos2, genomic_pos[:10] + assert strand == strand2 + + +def test_translate_fasta(): + abca1 = CDS.from_fasta("tests/data/abca1.fa", "ABCA1") + allele_str = "chr9:104903664:0:+:G>A" + assert str(abca1.get_aa_change(allele_str)) == "ABCA1:6:Q>*|", allele_str + + abca1 = CDS.from_fasta("tests/data/abca1.fa", "ABCA1") + allele_str = "chr9:104903664:0:-:C>T" + assert str(abca1.get_aa_change(allele_str)) == "ABCA1:6:Q>*|", allele_str