From 9efb7c51104c987940d981988d58016fb02bda67 Mon Sep 17 00:00:00 2001
From: antonylebechec <antony.lebechec@gmail.com>
Date: Fri, 20 Sep 2024 19:45:45 +0200
Subject: [PATCH] Add options to control transcripts view struct #256

---
 howard/functions/commons.py                   |  28 +
 howard/objects/variants.py                    | 270 +++++++--
 ...n_transcripts_profiles_fields_renamed.json |  54 ++
 tests/test_commons.py                         |  28 +
 tests/test_variants_transcripts.py            | 557 +++++++++++++++++-
 5 files changed, 852 insertions(+), 85 deletions(-)
 create mode 100644 tests/data/prioritization_transcripts_profiles_fields_renamed.json

diff --git a/howard/functions/commons.py b/howard/functions/commons.py
index 3d8a13a..94f9d8c 100644
--- a/howard/functions/commons.py
+++ b/howard/functions/commons.py
@@ -3987,3 +3987,31 @@ def determine_column_number(values_list: list) -> str:
             return "."
 
     return "1"
+
+
+def clean_annotation_field(name: str = "", char_allowed: list = None) -> str:
+    """
+    The `clean_annotation_field` function removes characters from a string that are not alphanumeric or
+    in a specified list.
+
+    :param name: The `name` parameter is a string that represents the input text that you want to clean.
+    It typically contains annotations or other text that you want to process
+    :type name: str
+    :param char_allowed: The `char_allowed` parameter is a list that contains characters that are
+    allowed to remain in the `name` string after cleaning. Any character in the `name` string that is
+    not alphanumeric and not in the `char_allowed` list will be removed during the cleaning process
+    :type char_allowed: list
+    :return: The function `clean_annotation_field` returns a cleaned version of the `name` string, where
+    only alphanumeric characters and characters from the `char_allowed` list are kept.
+    """
+
+    # Init
+    if char_allowed is None:
+        char_allowed = []
+
+    # Convert char_allowed to a set for faster membership testing
+    char_allowed_set = set(char_allowed)
+
+    return "".join(
+        char for char in name if (char.isalnum() or char in char_allowed_set)
+    )
diff --git a/howard/objects/variants.py b/howard/objects/variants.py
index 6d2ca6a..cc85ccf 100644
--- a/howard/objects/variants.py
+++ b/howard/objects/variants.py
@@ -9725,7 +9725,6 @@ def transcripts_prioritization(
                     pz_param.get("pzprefix", "PTZ") + pz_field
                 )
             else:
-                # pz_param_pzfields.append(pz_field)
                 pz_field_new = pz_param.get("pzprefix", "PTZ") + pz_field
                 pz_param_pzfields[pz_field] = pz_field_new
 
@@ -9798,9 +9797,17 @@ def transcripts_prioritization(
         if "transcript" in fields_to_explode:
             fields_to_explode.remove("transcript")
 
+        # Fields intranscripts table
+        query_transcripts_table = f"""
+            DESCRIBE SELECT * FROM {transcripts_table}
+        """
+        query_transcripts_table = self.get_query_to_df(query=query_transcripts_table)
+
         # Check fields to explode
         for field_to_explode in fields_to_explode:
-            if field_to_explode not in self.get_header_infos_list():
+            if field_to_explode not in self.get_header_infos_list() + list(
+                query_transcripts_table.column_name
+            ):
                 msg_err = f"INFO/{field_to_explode} NOT IN header"
                 log.error(msg_err)
                 raise ValueError(msg_err)
@@ -9890,11 +9897,6 @@ def transcripts_prioritization(
                 FROM {transcripts_table}
             """
 
-        # DEBUG
-        # log.debug(f""" query_update_ranking={query_update_ranking} """)
-        # df_devel = self.get_query_to_df(query=query_update_ranking)
-        # log.debug(df_devel)
-
         # Export Transcripts prioritization infos to variants table
         query_update = f"""
             WITH RankedTranscripts AS (
@@ -9932,37 +9934,59 @@ def create_transcript_view_from_columns_map(
         added_columns: list = [],
         temporary_tables: list = None,
         annotation_fields: list = None,
+        column_rename: dict = {},
+        column_clean: bool = False,
+        column_case: str = None,
     ) -> tuple[list, list, list]:
         """
         The `create_transcript_view_from_columns_map` function generates a temporary table view based on
         specified columns mapping for transcripts data.
 
-        :param transcripts_table: The `transcripts_table` parameter is a string that specifies the name of
-        the table where the transcripts data is stored or will be stored in the database. This table
-        typically contains information about transcripts such as Ensembl transcript IDs, gene names, scores,
-        predictions, etc. It defaults to "transcripts, defaults to transcripts
+        :param transcripts_table: The `transcripts_table` parameter is a string that specifies the name
+        of the table where the transcripts data is stored or will be stored in the database. This table
+        typically contains information about transcripts such as Ensembl transcript IDs, gene names,
+        scores, predictions, etc. It defaults to "transcripts, defaults to transcripts
         :type transcripts_table: str (optional)
-        :param columns_maps: The `columns_maps` parameter is a dictionary that contains information about
-        how to map columns from a transcripts table to create a view. Each entry in the `columns_maps` list
-        represents a mapping configuration for a specific set of columns. It typically includes details such
-        as the main transcript column and additional information columns
+        :param columns_maps: The `columns_maps` parameter is a dictionary that contains information
+        about how to map columns from a transcripts table to create a view. Each entry in the
+        `columns_maps` list represents a mapping configuration for a specific set of columns. It
+        typically includes details such as the main transcript column and additional information columns
         :type columns_maps: dict
-        :param added_columns: The `added_columns` parameter in the `create_transcript_view_from_columns_map`
-        function is a list that stores the additional columns that will be added to the view being created
-        based on the columns map provided. These columns are generated by exploding the transcript
-        information columns along with the main transcript column
+        :param added_columns: The `added_columns` parameter in the
+        `create_transcript_view_from_columns_map` function is a list that stores the additional columns
+        that will be added to the view being created based on the columns map provided. These columns
+        are generated by exploding the transcript information columns along with the main transcript
+        column
         :type added_columns: list
         :param temporary_tables: The `temporary_tables` parameter in the
         `create_transcript_view_from_columns_map` function is a list that stores the names of temporary
-        tables created during the process of creating a transcript view from a columns map. These temporary
-        tables are used to store intermediate results or transformations before the final view is generated
+        tables created during the process of creating a transcript view from a columns map. These
+        temporary tables are used to store intermediate results or transformations before the final view
+        is generated
         :type temporary_tables: list
         :param annotation_fields: The `annotation_fields` parameter in the
-        `create_transcript_view_from_columns_map` function is a list that stores the fields that are used
-        for annotation in the query view creation process. These fields are extracted from the
+        `create_transcript_view_from_columns_map` function is a list that stores the fields that are
+        used for annotation in the query view creation process. These fields are extracted from the
         `transcripts_column` and `transcripts_infos_columns` specified in the `columns
         :type annotation_fields: list
-        :return: The function `create_transcript_view_from_columns_map` returns a tuple containing three
+        :param column_rename: The `column_rename` parameter in the
+        `create_transcript_view_from_columns_map` function is a dictionary that allows you to specify
+        custom renaming for columns during the creation of the temporary table view. This parameter
+        provides a mapping of original column names to the desired renamed column names. By using this
+        parameter,
+        :type column_rename: dict
+        :param column_clean: The `column_clean` parameter in the
+        `create_transcript_view_from_columns_map` function is a boolean flag that determines whether the
+        column values should be cleaned or not. If set to `True`, the column values will be cleaned by
+        removing any non-alphanumeric characters from them. This cleaning process ensures, defaults to
+        False
+        :type column_clean: bool (optional)
+        :param column_case: The `column_case` parameter in the `create_transcript_view_from_columns_map`
+        function is used to specify the case transformation to be applied to the columns during the view
+        creation process. It allows you to control whether the column values should be converted to
+        lowercase, uppercase, or remain unchanged
+        :type column_case: str
+        :return: The `create_transcript_view_from_columns_map` function returns a tuple containing three
         lists: `added_columns`, `temporary_tables`, and `annotation_fields`.
         """
 
@@ -10005,6 +10029,15 @@ def create_transcript_view_from_columns_map(
             # Transcripts infos columns
             transcripts_infos_columns = columns_map.get("transcripts_infos_columns", [])
 
+            # Transcripts infos columns rename
+            column_rename = columns_map.get("column_rename", column_rename)
+
+            # Transcripts infos columns clean
+            column_clean = columns_map.get("column_clean", column_clean)
+
+            # Transcripts infos columns case
+            column_case = columns_map.get("column_case", column_case)
+
             if transcripts_column is not None:
 
                 # Explode
@@ -10013,24 +10046,53 @@ def create_transcript_view_from_columns_map(
                 )
 
                 # View clauses
-                clause_select = []
+                clause_select_variants = []
+                clause_select_tanscripts = []
                 for field in [transcripts_column] + transcripts_infos_columns:
-                    clause_select.append(
+
+                    # AS field
+                    as_field = field
+
+                    # Rename
+                    if column_rename:
+                        as_field = column_rename.get(as_field, as_field)
+
+                    # Clean
+                    if column_clean:
+                        as_field = clean_annotation_field(as_field)
+
+                    # Case
+                    if column_case:
+                        if column_case.lower() in ["lower"]:
+                            as_field = as_field.lower()
+                        elif column_case.lower() in ["upper"]:
+                            as_field = as_field.upper()
+
+                    # Clause select Variants
+                    clause_select_variants.append(
                         f""" regexp_split_to_table("{field}", ',') AS '{field}' """
                     )
-                    if field not in [transcripts_column]:
-                        annotation_fields.append(field)
+
+                    if field in [transcripts_column]:
+                        clause_select_tanscripts.append(
+                            f""" regexp_split_to_table("{field}", ',') AS '{field}' """
+                        )
+                    else:
+                        clause_select_tanscripts.append(
+                            f""" regexp_split_to_table("{field}", ',') AS '{as_field}' """
+                        )
+                        annotation_fields.append(as_field)
 
                 # Querey View
                 query = f""" 
                     SELECT
                         "#CHROM", POS, REF, ALT, INFO,
                         "{transcripts_column}" AS 'transcript',
-                        {", ".join(clause_select)}
+                        {", ".join(clause_select_tanscripts)}
                     FROM (
                         SELECT 
                             "#CHROM", POS, REF, ALT, INFO,
-                            {", ".join(clause_select)}
+                            {", ".join(clause_select_variants)}
                         FROM {table_variants}
                         )
                     WHERE "{transcripts_column}" IS NOT NULL
@@ -10057,33 +10119,55 @@ def create_transcript_view_from_column_format(
         column_formats: dict = {},
         temporary_tables: list = None,
         annotation_fields: list = None,
+        column_rename: dict = {},
+        column_clean: bool = False,
+        column_case: str = None,
     ) -> tuple[list, list, list]:
         """
         The `create_transcript_view_from_column_format` function generates a transcript view based on
         specified column formats, adds additional columns and annotation fields, and returns the list of
         temporary tables and annotation fields.
 
-        :param transcripts_table: The `transcripts_table` parameter is a string that specifies the name of
-        the table containing the transcripts data. This table will be used as the base table for creating
-        the transcript view. The default value for this parameter is "transcripts", but you can provide a
-        different table name if needed, defaults to transcripts
+        :param transcripts_table: The `transcripts_table` parameter is a string that specifies the name
+        of the table containing the transcripts data. This table will be used as the base table for
+        creating the transcript view. The default value for this parameter is "transcripts", but you can
+        provide a different table name if needed, defaults to transcripts
         :type transcripts_table: str (optional)
         :param column_formats: The `column_formats` parameter is a dictionary that contains information
         about the columns to be used for creating the transcript view. Each entry in the dictionary
-        specifies the mapping between a transcripts column and a transcripts infos column. For example, in
-        the provided code snippet:
+        specifies the mapping between a transcripts column and a transcripts infos column. This
+        parameter allows you to define how the columns from the transcripts table should be transformed
+        or mapped
         :type column_formats: dict
         :param temporary_tables: The `temporary_tables` parameter in the
-        `create_transcript_view_from_column_format` function is a list that stores the names of temporary
-        views created during the process of creating a transcript view from a column format. These temporary
-        views are used to manipulate and extract data before generating the final transcript view. It
+        `create_transcript_view_from_column_format` function is a list that stores the names of
+        temporary views created during the process of creating a transcript view from a column format.
+        These temporary views are used to manipulate and extract data before generating the final
+        transcript view
         :type temporary_tables: list
         :param annotation_fields: The `annotation_fields` parameter in the
         `create_transcript_view_from_column_format` function is a list that stores the annotation fields
-        that are extracted from the temporary views created during the process. These annotation fields are
-        obtained by querying the temporary views and extracting the column names excluding specific columns
-        like `#CH
+        that are extracted from the temporary views created during the process. These annotation fields
+        are obtained by querying the temporary views and extracting the column names excluding specific
+        columns like `#CH
         :type annotation_fields: list
+        :param column_rename: The `column_rename` parameter in the
+        `create_transcript_view_from_column_format` function is a dictionary that allows you to specify
+        custom renaming of columns in the transcripts infos table. By providing a mapping of original
+        column names to new column names in this dictionary, you can rename specific columns during the
+        process
+        :type column_rename: dict
+        :param column_clean: The `column_clean` parameter in the
+        `create_transcript_view_from_column_format` function is a boolean flag that determines whether
+        the transcripts infos columns should undergo a cleaning process. If set to `True`, the columns
+        will be cleaned during the creation of the transcript view based on the specified column format,
+        defaults to False
+        :type column_clean: bool (optional)
+        :param column_case: The `column_case` parameter in the
+        `create_transcript_view_from_column_format` function is used to specify the case transformation
+        to be applied to the columns in the transcript view. It can be set to either "upper" or "lower"
+        to convert the column names to uppercase or lowercase, respectively
+        :type column_case: str
         :return: The `create_transcript_view_from_column_format` function returns two lists:
         `temporary_tables` and `annotation_fields`.
         """
@@ -10111,6 +10195,15 @@ def create_transcript_view_from_column_format(
                 "transcripts_infos_column", "Feature_ID"
             )
 
+            # Transcripts infos columns rename
+            column_rename = column_format.get("column_rename", column_rename)
+
+            # Transcripts infos columns clean
+            column_clean = column_format.get("column_clean", column_clean)
+
+            # Transcripts infos columns case
+            column_case = column_format.get("column_case", column_case)
+
             # Temporary View name
             temporary_view_name = transcripts_table + "".join(
                 random.choices(string.ascii_uppercase + string.digits, k=10)
@@ -10122,6 +10215,9 @@ def create_transcript_view_from_column_format(
                 annotation_field=annotation_field,
                 view_name=temporary_view_name,
                 annotation_id=transcript_annotation,
+                column_rename=column_rename,
+                column_clean=column_clean,
+                column_case=column_case,
             )
 
             # Annotation fields
@@ -10230,9 +10326,15 @@ def create_transcript_view(
             temporary_tables += temporary_tables_tmp
             annotation_fields += annotation_fields_tmp
 
+            # Remove some specific fields/column
+            annotation_fields = list(set(annotation_fields))
+            for field in ["#CHROM", "POS", "REF", "ALT", "INFO", "transcript"]:
+                if field in annotation_fields:
+                    annotation_fields.remove(field)
+
             # Merge temporary tables query
             query_merge = ""
-            for temporary_table in temporary_tables:
+            for temporary_table in list(set(temporary_tables)):
 
                 # First temporary table
                 if not query_merge:
@@ -10289,38 +10391,63 @@ def annotation_format_to_table(
         annotation_field: str = "ANN",
         annotation_id: str = "Feature_ID",
         view_name: str = "transcripts",
+        column_rename: dict = {},
+        column_clean: bool = False,
+        column_case: str = None,
     ) -> str:
         """
-        The function `annotation_format_to_table` converts annotation data from a VCF file into a structured
-        table format.
+        The `annotation_format_to_table` function converts annotation data from a VCF file into a
+        structured table format, ensuring unique values and creating a temporary table for further
+        processing or analysis.
 
-        :param uniquify: The `uniquify` parameter is a boolean flag that determines whether to ensure unique
-        values in the output or not. If set to `True`, the function will make sure that the output values
-        are unique, defaults to True
+        :param uniquify: The `uniquify` parameter is a boolean flag that determines whether to ensure
+        unique values in the output or not. If set to `True`, the function will make sure that the
+        output values are unique, defaults to True
         :type uniquify: bool (optional)
-        :param annotation_field: The `annotation_field` parameter refers to the field in the VCF file that
-        contains the annotation information for each variant. This field is used to extract the annotation
-        details for further processing in the function, defaults to ANN
+        :param annotation_field: The `annotation_field` parameter refers to the field in the VCF file
+        that contains the annotation information for each variant. This field is used to extract the
+        annotation details for further processing in the function. By default, it is set to "ANN",
+        defaults to ANN
         :type annotation_field: str (optional)
-        :param annotation_id: The `annotation_id` parameter in the `annotation_format_to_table` method is
-        used to specify the identifier for the annotation feature. This identifier will be used as a column
-        name in the resulting table or view that is created based on the annotation data. It helps in
-        uniquely identifying each annotation entry in the, defaults to Feature_ID
+        :param annotation_id: The `annotation_id` parameter in the `annotation_format_to_table` method
+        is used to specify the identifier for the annotation feature. This identifier will be used as a
+        column name in the resulting table or view that is created based on the annotation data. It
+        helps in uniquely identifying each annotation entry in the, defaults to Feature_ID
         :type annotation_id: str (optional)
-        :param view_name: The `view_name` parameter in the `annotation_format_to_table` method is used to
-        specify the name of the temporary table that will be created to store the transformed annotation
-        data. This table will hold the extracted information from the annotation field in a structured
-        format for further processing or analysis, defaults to transcripts
+        :param view_name: The `view_name` parameter in the `annotation_format_to_table` method is used
+        to specify the name of the temporary table that will be created to store the transformed
+        annotation data. This table will hold the extracted information from the annotation field in a
+        structured format for further processing or analysis. By default,, defaults to transcripts
         :type view_name: str (optional)
-        :return: The function `annotation_format_to_table` is returning the name of the view created, which
-        is stored in the variable `view_name`.
+        :param column_rename: The `column_rename` parameter in the `annotation_format_to_table` method
+        is a dictionary that allows you to specify custom renaming for columns. By providing key-value
+        pairs in this dictionary, you can rename specific columns in the resulting table or view that is
+        created based on the annotation data. This feature enables
+        :type column_rename: dict
+        :param column_clean: The `column_clean` parameter in the `annotation_format_to_table` method is
+        a boolean flag that determines whether the annotation field should undergo a cleaning process.
+        If set to `True`, the function will clean the annotation field before further processing. This
+        cleaning step may involve removing any unwanted characters, formatting inconsistencies, defaults
+        to False
+        :type column_clean: bool (optional)
+        :param column_case: The `column_case` parameter in the `annotation_format_to_table` method is
+        used to specify the case transformation to be applied to the column names extracted from the
+        annotation data. It allows you to set the case of the column names to either lowercase or
+        uppercase for consistency or other specific requirements during the conversion
+        :type column_case: str
+        :return: The function `annotation_format_to_table` is returning the name of the view created,
+        which is stored in the variable `view_name`.
         """
 
         # Annotation field
         annotation_format = "annotation_explode"
 
         # Transcript annotation
-        annotation_id = "".join(char for char in annotation_id if char.isalnum())
+        if column_rename:
+            annotation_id = column_rename.get(annotation_id, annotation_id)
+
+        if column_clean:
+            annotation_id = clean_annotation_field(annotation_id)
 
         # Prefix
         prefix = self.get_explode_infos_prefix()
@@ -10396,9 +10523,22 @@ def annotation_format_to_table(
 
                 # Key
                 key = row.iloc[0]
-
-                # key_clean
-                key_clean = "".join(char for char in key if char.isalnum())
+                key_clean = key
+
+                # key rename
+                if column_rename:
+                    key_clean = column_rename.get(key_clean, key_clean)
+
+                # key clean
+                if column_clean:
+                    key_clean = clean_annotation_field(key_clean)
+
+                # Key case
+                if column_case:
+                    if column_case.lower() in ["lower"]:
+                        key_clean = key_clean.lower()
+                    elif column_case.lower() in ["upper"]:
+                        key_clean = key_clean.upper()
 
                 # Type
                 query_json_type = f"""SELECT unnest(json_extract_string({annotation_format}, '$.*."{key}"')) AS '{key_clean}' FROM dataframe_annotation_format WHERE trim('{key}') NOT IN ('');"""
diff --git a/tests/data/prioritization_transcripts_profiles_fields_renamed.json b/tests/data/prioritization_transcripts_profiles_fields_renamed.json
new file mode 100644
index 0000000..e33bf59
--- /dev/null
+++ b/tests/data/prioritization_transcripts_profiles_fields_renamed.json
@@ -0,0 +1,54 @@
+{
+  "transcripts": {
+    "LISTScore": [
+      {
+        "type": "gt",
+        "value": "0.75",
+        "score": 10,
+        "flag": "PASS",
+        "comment": ["Very Good LIST Score"]
+      },
+      {
+        "type": "gt",
+        "value": "0.50",
+        "score": 10,
+        "flag": "PASS",
+        "comment": ["Good LIST Score"]
+      }
+    ],
+    "CLNSIG": [
+      {
+        "type": "eq",
+        "value": "pathogenic",
+        "score": 100,
+        "flag": "PASS",
+        "comment": ["Pathogenic"]
+      }
+    ],
+    "AnnotationImpact": [
+      {
+        "type": "eq",
+        "value": "MODIFIER",
+        "score": 100,
+        "flag": "PASS",
+        "comment": ["MODIFIER"]
+      }
+    ],
+    "transcript": [
+      {
+        "type": "eq",
+        "value": "NM_005228.5",
+        "score": 100,
+        "flag": "PASS",
+        "comment": ["NM_005228.5"]
+      },
+      {
+        "type": "eq",
+        "value": "NM_001346941.2",
+        "score": 100,
+        "flag": "PASS",
+        "comment": ["NM_001346941.2"]
+      }
+    ]
+  }
+}
diff --git a/tests/test_commons.py b/tests/test_commons.py
index d149fa0..ac415d1 100644
--- a/tests/test_commons.py
+++ b/tests/test_commons.py
@@ -1531,3 +1531,31 @@ def test_get_duckdb_extension_file():
     conn = duckdb.connect()
 
     assert get_duckdb_extension_file("sqlite_scanner", conn=conn)
+
+
+def test_clean_annotation_field_basic_alphanumeric():
+    assert clean_annotation_field("HelloWorld") == "HelloWorld"
+
+
+def test_clean_annotation_field_with_special_characters():
+    assert clean_annotation_field("Hello, World!") == "HelloWorld"
+
+
+def test_clean_annotation_field_with_allowed_characters():
+    assert clean_annotation_field("Hello-World", char_allowed=["-"]) == "Hello-World"
+
+
+def test_clean_annotation_field_empty_string():
+    assert clean_annotation_field("") == ""
+
+
+def test_clean_annotation_field_no_allowed_characters():
+    assert clean_annotation_field("Hello@World#2023") == "HelloWorld2023"
+
+
+def test_clean_annotation_field_all_characters_removed():
+    assert clean_annotation_field("!!!") == ""
+
+
+def test_clean_annotation_field_non_alphanumeric_with_allowed_chars():
+    assert clean_annotation_field("Test123!@#", char_allowed=["!"]) == "Test123!"
diff --git a/tests/test_variants_transcripts.py b/tests/test_variants_transcripts.py
index 3882b2e..bf4cf64 100644
--- a/tests/test_variants_transcripts.py
+++ b/tests/test_variants_transcripts.py
@@ -23,10 +23,10 @@
 @pytest.mark.parametrize(
     "input_vcf",
     [
-        "tests/data/example.ann.transcripts.vcf.gz",
-        "tests/data/example.ann.vcf.gz",
-        "tests/data/example.dbnsfp.transcripts.vcf.gz",
-        "tests/data/example.dbnsfp.no_transcripts.vcf.gz",
+        f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
+        f"{tests_data_folder}/example.ann.vcf.gz",
+        f"{tests_data_folder}/example.dbnsfp.transcripts.vcf.gz",
+        f"{tests_data_folder}/example.dbnsfp.no_transcripts.vcf.gz",
     ],
 )
 def test_create_transcript_view(input_vcf):
@@ -53,6 +53,9 @@ def test_create_transcript_view(input_vcf):
                         {
                             "transcripts_column": "ANN",
                             "transcripts_infos_column": "Feature_ID",
+                            "column_rename": None,
+                            "column_clean": True,
+                            "column_case": None,
                         }
                     ],
                     "from_columns_map": [  # format List, e.g. dbNSFP columns
@@ -64,6 +67,9 @@ def test_create_transcript_view(input_vcf):
                                 "LIST_S2_score",
                                 "LIST_S2_pred",
                             ],
+                            "column_rename": None,
+                            "column_clean": False,
+                            "column_case": None,
                         },
                         {
                             "transcripts_column": "Ensembl_transcriptid",
@@ -72,6 +78,9 @@ def test_create_transcript_view(input_vcf):
                                 "VARITY_R_score",
                                 "Aloft_pred",
                             ],
+                            "column_rename": None,
+                            "column_clean": False,
+                            "column_case": None,
                         },
                     ],
                 },
@@ -101,10 +110,10 @@ def test_create_transcript_view(input_vcf):
 @pytest.mark.parametrize(
     "input_vcf",
     [
-        "tests/data/example.ann.transcripts.vcf.gz",
-        "tests/data/example.ann.vcf.gz",
-        "tests/data/example.dbnsfp.transcripts.vcf.gz",
-        "tests/data/example.dbnsfp.no_transcripts.vcf.gz",
+        f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
+        f"{tests_data_folder}/example.ann.vcf.gz",
+        f"{tests_data_folder}/example.dbnsfp.transcripts.vcf.gz",
+        f"{tests_data_folder}/example.dbnsfp.no_transcripts.vcf.gz",
     ],
 )
 def test_create_transcript_view_to_variants(input_vcf):
@@ -126,6 +135,9 @@ def test_create_transcript_view_to_variants(input_vcf):
                         {
                             "transcripts_column": "ANN",
                             "transcripts_infos_column": "Feature_ID",
+                            "column_rename": None,
+                            "column_clean": True,
+                            "column_case": None,
                         }
                     ],
                     "from_columns_map": [  # format List, e.g. dbNSFP columns
@@ -137,6 +149,9 @@ def test_create_transcript_view_to_variants(input_vcf):
                                 "LIST_S2_score",
                                 "LIST_S2_pred",
                             ],
+                            "column_rename": None,
+                            "column_clean": False,
+                            "column_case": None,
                         },
                         {
                             "transcripts_column": "Ensembl_transcriptid",
@@ -145,6 +160,9 @@ def test_create_transcript_view_to_variants(input_vcf):
                                 "VARITY_R_score",
                                 "Aloft_pred",
                             ],
+                            "column_rename": None,
+                            "column_clean": False,
+                            "column_case": None,
                         },
                     ],
                 },
@@ -283,10 +301,10 @@ def test_create_transcript_view_to_variants(input_vcf):
 @pytest.mark.parametrize(
     "input_vcf",
     [
-        "tests/data/example.ann.transcripts.vcf.gz",
-        "tests/data/example.ann.vcf.gz",
-        "tests/data/example.dbnsfp.transcripts.vcf.gz",
-        "tests/data/example.dbnsfp.no_transcripts.vcf.gz",
+        f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
+        f"{tests_data_folder}/example.ann.vcf.gz",
+        f"{tests_data_folder}/example.dbnsfp.transcripts.vcf.gz",
+        f"{tests_data_folder}/example.dbnsfp.no_transcripts.vcf.gz",
     ],
 )
 def test_transcripts_prioritization(input_vcf):
@@ -309,10 +327,16 @@ def test_transcripts_prioritization(input_vcf):
                     {
                         "transcripts_column": "ANN",
                         "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
                     },
                     {
                         "transcripts_column": "ANN",
                         "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
                     },
                 ],
                 "from_columns_map": [
@@ -324,6 +348,9 @@ def test_transcripts_prioritization(input_vcf):
                             "LIST_S2_score",
                             "LIST_S2_pred",
                         ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": None,
                     },
                     {
                         "transcripts_column": "Ensembl_transcriptid",
@@ -332,13 +359,16 @@ def test_transcripts_prioritization(input_vcf):
                             "VARITY_R_score",
                             "Aloft_pred",
                         ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": None,
                     },
                 ],
             },
         }
         param_prioritization = {
             "profiles": ["transcripts"],
-            "prioritization_config": "config/prioritization_transcripts_profiles.json",
+            "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
             "pzprefix": "PZT",
             "prioritization_score_mode": "HOWARD",
         }
@@ -484,7 +514,7 @@ def test_transcripts_prioritization(input_vcf):
             f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
             {
                 "profiles": ["transcripts"],
-                "prioritization_config": "config/prioritization_transcripts_profiles.json",
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
                 "pzprefix": "PZT",
                 "prioritization_score_mode": "HOWARD",
             },
@@ -500,7 +530,7 @@ def test_transcripts_prioritization(input_vcf):
             f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
             {
                 "profiles": ["transcripts"],
-                "prioritization_config": "config/prioritization_transcripts_profiles.json",
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
                 "pzprefix": "PZT",
                 "pzfields": ["Score", "Flag"],
                 "prioritization_score_mode": "HOWARD",
@@ -518,7 +548,7 @@ def test_transcripts_prioritization(input_vcf):
             f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
             {
                 "profiles": ["transcripts"],
-                "prioritization_config": "config/prioritization_transcripts_profiles.json",
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
                 "pzprefix": "PZT",
                 "pzfields": ["Score", "Flag", "LIST_S2_score", "LIST_S2_pred"],
                 "prioritization_score_mode": "HOWARD",
@@ -538,7 +568,7 @@ def test_transcripts_prioritization(input_vcf):
             f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
             {
                 "profiles": ["transcripts"],
-                "prioritization_config": "config/prioritization_transcripts_profiles.json",
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
                 "pzprefix": "PZT",
                 "pzfields": [
                     "Score",
@@ -555,7 +585,7 @@ def test_transcripts_prioritization(input_vcf):
             f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
             {
                 "profiles": ["transcripts"],
-                "prioritization_config": "config/prioritization_transcripts_profiles.json",
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
                 "pzprefix": "PZT",
                 "prioritization_transcripts_order": {
                     "LIST_S2_score": "ASC",
@@ -574,7 +604,7 @@ def test_transcripts_prioritization(input_vcf):
             f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
             {
                 "profiles": ["transcripts"],
-                "prioritization_config": "config/prioritization_transcripts_profiles.json",
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
                 "pzprefix": "PZT",
                 "prioritization_transcripts_order": {
                     "CADD_raw": "ASC",
@@ -593,7 +623,7 @@ def test_transcripts_prioritization(input_vcf):
             f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
             {
                 "profiles": ["transcripts"],
-                "prioritization_config": "config/prioritization_transcripts_profiles.json",
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles.json",
                 "pzprefix": "PZT",
                 "prioritization_transcripts_order": {
                     "field_not_present_in_header": "ASC",
@@ -647,12 +677,491 @@ def test_transcripts_prioritization_multiple_param(
                     {
                         "transcripts_column": "ANN",
                         "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
+                    }
+                ],
+                "from_columns_map": [
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "Ensembl_geneid",
+                            "LIST_S2_score",
+                            "LIST_S2_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": None,
                     },
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "VARITY_R_score",
+                            "Aloft_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": None,
+                    },
+                ],
+            },
+        }
+
+        # Param without prioritization
+        param_without_prioritization = {"transcripts": dict(param_struct)}
+
+        # Param with prioritization
+        param_with_prioritization = {"transcripts": dict(param_struct)}
+        param_with_prioritization["transcripts"]["prioritization"] = dict(
+            param_prioritization
+        )
+
+        # Create object
+        variants = Variants(
+            conn=None, input=input_vcf, output=output_vcf, param=param, load=True
+        )
+
+        # Create transcript view
+        transcripts_table = variants.create_transcript_view(
+            param=param_without_prioritization
+        )
+
+        # Check table exists
+        assert transcripts_table is not None
+
+        # If Raise with Value Error
+        if raise_value_error:
+
+            # Catch ValueError
+            with pytest.raises(ValueError) as excinfo:
+
+                # Prioritization
+                variants.transcripts_prioritization(param=param_with_prioritization)
+
+            assert str(excinfo.value) == raise_value_error
+
+        # If expected results
+        if where_clause:
+
+            # Prioritization
+            assert variants.transcripts_prioritization(param=param_with_prioritization)
+
+            # Check transcript prioritization result
+            # Check table content
+            query_check = f"""
+                SELECT * FROM variants
+                WHERE {where_clause}
+            """
+            check = variants.get_query_to_df(query=query_check)
+            assert len(check) > 0
+
+            # Export
+            ########
+
+            # Check if VCF is in correct format with pyVCF
+            remove_if_exists([output_vcf])
+            variants.export_output(output_file=output_vcf)
+            try:
+                vcf.Reader(filename=output_vcf)
+            except:
+                assert False
+
+
+@pytest.mark.parametrize(
+    "struct, fields_list",
+    [
+        (  # By default, no rename, no clean, no case (except clean for snpEff because mandatory)
+            {
+                "from_column_format": [  # format List, e.g. snpEff
+                    {
+                        "transcripts_column": "ANN",
+                        "transcripts_infos_column": "Feature_ID",
+                        "column_clean": True,
+                    }
+                ],
+                "from_columns_map": [  # format List, e.g. dbNSFP columns
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "Ensembl_geneid",
+                            "LIST_S2_score",
+                            "LIST_S2_pred",
+                        ],
+                    },
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "VARITY_R_score",
+                            "Aloft_pred",
+                        ],
+                    },
+                ],
+            },
+            [
+                "FeatureID",
+                "Ensembl_geneid",
+                "LIST_S2_score",
+                "LIST_S2_pred",
+                "VARITY_R_score",
+                "Aloft_pred",
+            ],
+        ),
+        (  # No rename, no clean, nor case (except clean for snpEff because mandatory)
+            {
+                "from_column_format": [  # format List, e.g. snpEff
+                    {
+                        "transcripts_column": "ANN",
+                        "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
+                    }
+                ],
+                "from_columns_map": [  # format List, e.g. dbNSFP columns
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "Ensembl_geneid",
+                            "LIST_S2_score",
+                            "LIST_S2_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": None,
+                    },
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "VARITY_R_score",
+                            "Aloft_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": None,
+                    },
+                ],
+            },
+            [
+                "FeatureID",
+                "Ensembl_geneid",
+                "LIST_S2_score",
+                "LIST_S2_pred",
+                "VARITY_R_score",
+                "Aloft_pred",
+            ],
+        ),
+        (  # No rename, clean all and nocase (except clean for snpEff because mandatory)
+            {
+                "from_column_format": [  # format List, e.g. snpEff
                     {
                         "transcripts_column": "ANN",
                         "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
+                    }
+                ],
+                "from_columns_map": [  # format List, e.g. dbNSFP columns
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "Ensembl_geneid",
+                            "LIST_S2_score",
+                            "LIST_S2_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
+                    },
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "VARITY_R_score",
+                            "Aloft_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
                     },
                 ],
+            },
+            [
+                "FeatureID",
+                "Ensemblgeneid",
+                "LISTS2score",
+                "LISTS2pred",
+                "VARITYRscore",
+                "Aloftpred",
+            ],
+        ),
+        (  # No rename, no clean, and case all (except clean for snpEff because mandatory)
+            {
+                "from_column_format": [  # format List, e.g. snpEff
+                    {
+                        "transcripts_column": "ANN",
+                        "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": "lower",
+                    }
+                ],
+                "from_columns_map": [  # format List, e.g. dbNSFP columns
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "Ensembl_geneid",
+                            "LIST_S2_score",
+                            "LIST_S2_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": "lower",
+                    },
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "VARITY_R_score",
+                            "Aloft_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": False,
+                        "column_case": "lower",
+                    },
+                ],
+            },
+            [
+                "featureid",
+                "ensembl_geneid",
+                "list_s2_score",
+                "list_s2_pred",
+                "varity_r_score",
+                "aloft_pred",
+            ],
+        ),
+        (  # No rename, clean all and case all (except clean for snpEff because mandatory)
+            {
+                "from_column_format": [  # format List, e.g. snpEff
+                    {
+                        "transcripts_column": "ANN",
+                        "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": "lower",
+                    }
+                ],
+                "from_columns_map": [  # format List, e.g. dbNSFP columns
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "Ensembl_geneid",
+                            "LIST_S2_score",
+                            "LIST_S2_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": "lower",
+                    },
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "VARITY_R_score",
+                            "Aloft_pred",
+                        ],
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": "lower",
+                    },
+                ],
+            },
+            [
+                "featureid",
+                "ensemblgeneid",
+                "lists2score",
+                "lists2pred",
+                "varityrscore",
+                "aloftpred",
+            ],
+        ),
+        (  # Rename "genename" columns to merge, transcript ANN id, extra columns on struct_map
+            {
+                "from_column_format": [  # format List, e.g. snpEff
+                    {
+                        "transcripts_column": "ANN",
+                        "transcripts_infos_column": "Feature_ID",
+                        "column_rename": {
+                            "Gene_Name": "genename",
+                            "Feature_ID": "THETRANSCRIPTOFSNPEFF",
+                        },
+                        "column_clean": True,
+                        "column_case": None,
+                    }
+                ],
+                "from_columns_map": [  # format List, e.g. dbNSFP columns
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "Ensembl_geneid",
+                            "LIST_S2_score",
+                            "LIST_S2_pred",
+                        ],
+                        "column_clean": False,
+                        "column_case": None,
+                        "column_rename": {
+                            "LIST_S2_score": "LISTScore",
+                            "LIST_S2_pred": "LISTPred",
+                        },
+                    },
+                    {
+                        "transcripts_column": "Ensembl_transcriptid",
+                        "transcripts_infos_columns": [
+                            "genename",
+                            "VARITY_R_score",
+                            "Aloft_pred",
+                        ],
+                        "column_clean": False,
+                        "column_case": None,
+                    },
+                ],
+            },
+            [
+                "genename",
+                "THETRANSCRIPTOFSNPEFF",
+                "LISTScore",
+                "LISTPred",
+                "VARITY_R_score",
+                "Aloft_pred",
+            ],
+        ),
+    ],
+)
+def test_create_transcript_view_rename_clean_case(struct, fields_list):
+    """
+    The function `test_devel_create_transcript_view` creates a transcript view from a VCF file using
+    specified parameters and checks the resulting table for data.
+
+    :param input_vcf: It seems like the `input_vcf` parameter is missing in the provided code snippet.
+    Could you please provide the value or path that should be assigned to the `input_vcf` variable in
+    the `test_devel_create_transcript_view` function?
+    """
+
+    with TemporaryDirectory(dir=tests_folder) as tmp_dir:
+
+        # Init files
+        input_vcf = f"{tests_data_folder}/example.ann.transcripts.vcf.gz"
+        output_vcf = f"{tmp_dir}/output.vcf"
+
+        # Construct param dict
+        param = {"transcripts": {"table": "transcripts", "struct": struct}}
+
+        # Create object
+        variants = Variants(
+            conn=None, input=input_vcf, output=output_vcf, param=param, load=True
+        )
+
+        # Create transcript view
+        transcripts_table = variants.create_transcript_view()
+
+        # Check table exists
+        assert transcripts_table is not None
+
+        # Check table content
+        query_check = f"""
+            SELECT column_name
+            FROM (
+                DESCRIBE SELECT * FROM {transcripts_table}
+            )
+            WHERE column_name in ('{"', '".join(fields_list)}')
+        """
+        check = variants.get_query_to_df(query=query_check)
+
+        assert len(check) == len(list(set(fields_list)))
+
+
+@pytest.mark.parametrize(
+    "input_vcf, param_prioritization, where_clause, raise_value_error",
+    [
+        (  # Add PZfields plus
+            f"{tests_data_folder}/example.ann.transcripts.vcf.gz",
+            {
+                "profiles": ["transcripts"],
+                "prioritization_config": f"{tests_data_folder}/prioritization_transcripts_profiles_fields_renamed.json",
+                "pzprefix": "PZT",
+                "pzfields": ["Score", "Flag", "LISTScore", "LISTPred"],
+                "prioritization_score_mode": "HOWARD",
+            },
+            """
+                "#CHROM" = 'chr1'
+                AND POS = 69101
+                AND contains(INFO, 'PZTTranscript=ENST00000641515')
+                AND contains(INFO, 'PZTScore')
+                AND contains(INFO, 'PZTFlag')
+                AND contains(INFO, 'PZTLISTScore')
+                AND contains(INFO, 'PZTLISTPred')
+            """,
+            None,
+        ),
+    ],
+)
+def test_transcripts_prioritization_multiple_param_fields_renamed(
+    input_vcf, param_prioritization, where_clause, raise_value_error
+):
+    """
+    The `test_transcripts_prioritization_multiple_param` function tests transcript prioritization
+    functionality in a genetic variant analysis pipeline with configurable parameters.
+
+    :param input_vcf: It seems like the `input_vcf` parameter is the path or reference to the VCF
+    (Variant Call Format) file that contains genetic variant data. This file is likely used as input for
+    the genetic variant analysis pipeline where the transcript prioritization functionality is being
+    tested
+    :param param_prioritization: The `param_prioritization` parameter is a dictionary that contains
+    information about the prioritization configuration for transcripts in a genetic variant analysis
+    pipeline. It includes details such as profiles, prioritization configuration file path, prefix, and
+    score mode. This parameter is used to customize how transcripts are prioritized during the
+    :param where_clause: The `where_clause` parameter in the `test_transcripts_prioritization` function
+    is a SQL WHERE clause that is used to filter the results of a query. It specifies a condition that
+    must be met for a row to be included in the result set
+    :param raise_value_error: The `raise_value_error` parameter in the `test_transcripts_prioritization`
+    function is a boolean flag that determines whether the test should raise a `ValueError` and check if
+    the raised error message matches a specific value. If `raise_value_error` is `True`, the test will
+    raise
+    """
+
+    with TemporaryDirectory(dir=tests_folder) as tmp_dir:
+
+        # Init files
+        output_vcf = f"{tmp_dir}/output.vcf"
+
+        # Construct param dict
+        param = {}
+        param_struct = {
+            "table": "transcripts",
+            "column_id": "transcript",
+            "transcripts_info_json": "transcripts_json",
+            "transcripts_info_field": "transcripts_json",
+            "struct": {
+                "from_column_format": [
+                    {
+                        "transcripts_column": "ANN",
+                        "transcripts_infos_column": "Feature_ID",
+                        "column_rename": None,
+                        "column_clean": True,
+                        "column_case": None,
+                    }
+                ],
                 "from_columns_map": [
                     {
                         "transcripts_column": "Ensembl_transcriptid",
@@ -662,6 +1171,12 @@ def test_transcripts_prioritization_multiple_param(
                             "LIST_S2_score",
                             "LIST_S2_pred",
                         ],
+                        "column_rename": {
+                            "LIST_S2_score": "LISTScore",
+                            "LIST_S2_pred": "LISTPred",
+                        },
+                        "column_clean": False,
+                        "column_case": None,
                     },
                     {
                         "transcripts_column": "Ensembl_transcriptid",
@@ -670,6 +1185,8 @@ def test_transcripts_prioritization_multiple_param(
                             "VARITY_R_score",
                             "Aloft_pred",
                         ],
+                        "column_clean": False,
+                        "column_case": None,
                     },
                 ],
             },