diff --git a/pyxform/aliases.py b/pyxform/aliases.py
index 17cfa5c6..7497c720 100644
--- a/pyxform/aliases.py
+++ b/pyxform/aliases.py
@@ -44,77 +44,57 @@
 }
 settings_header = {
     "form_title": constants.TITLE,
-    "set form title": constants.TITLE,
+    "set_form_title": constants.TITLE,
     "form_id": constants.ID_STRING,
-    "sms_keyword": constants.SMS_KEYWORD,
-    "sms_separator": constants.SMS_SEPARATOR,
-    "sms_allow_media": constants.SMS_ALLOW_MEDIA,
-    "sms_date_format": constants.SMS_DATE_FORMAT,
-    "sms_datetime_format": constants.SMS_DATETIME_FORMAT,
+    "set_form_id": constants.ID_STRING,
     "prefix": constants.COMPACT_PREFIX,
-    "delimiter": constants.COMPACT_DELIMITER,
-    "set form id": constants.ID_STRING,
-    "public_key": constants.PUBLIC_KEY,
-    "submission_url": constants.SUBMISSION_URL,
-    "auto_send": constants.AUTO_SEND,
-    "auto_delete": constants.AUTO_DELETE,
-    "allow_choice_duplicates": constants.ALLOW_CHOICE_DUPLICATES,
 }
 # TODO: Check on bind prefix approach in json.
 # Conversion dictionary from user friendly column names to meaningful values
 survey_header = {
-    "Label": "label",
-    "Name": "name",
-    "SMS Field": constants.SMS_FIELD,
-    "SMS Option": constants.SMS_OPTION,
-    "SMS Separator": constants.SMS_SEPARATOR,
-    "SMS Allow Media": constants.SMS_ALLOW_MEDIA,
-    "SMS Date Format": constants.SMS_DATE_FORMAT,
-    "SMS DateTime Format": constants.SMS_DATETIME_FORMAT,
-    "SMS Response": constants.SMS_RESPONSE,
-    "compact_tag": "instance::odk:tag",  # used for compact representation
-    "Type": "type",
-    "List_name": constants.LIST_NAME_U,
-    # u"repeat_count": u"jr:count",  duplicate key
-    "read_only": "bind::readonly",
-    "readonly": "bind::readonly",
-    "relevant": "bind::relevant",
+    "sms_field": constants.SMS_FIELD,
+    "sms_option": constants.SMS_OPTION,
+    "sms_separator": constants.SMS_SEPARATOR,
+    "sms_allow_media": constants.SMS_ALLOW_MEDIA,
+    "sms_date_format": constants.SMS_DATE_FORMAT,
+    "sms_datetime_format": constants.SMS_DATETIME_FORMAT,
+    "sms_response": constants.SMS_RESPONSE,
+    "compact_tag": ("instance", "odk:tag"),  # used for compact representation
+    "read_only": ("bind", "readonly"),
+    "readonly": ("bind", "readonly"),
+    "relevant": ("bind", "relevant"),
     "caption": constants.LABEL,
-    "appearance": "control::appearance",  # TODO: this is also an issue
-    "relevance": "bind::relevant",
-    "required": "bind::required",
-    "constraint": "bind::constraint",
-    "constraining message": "bind::jr:constraintMsg",
-    "constraint message": "bind::jr:constraintMsg",
-    "constraint_message": "bind::jr:constraintMsg",
-    "calculation": "bind::calculate",
-    "calculate": "bind::calculate",
+    "appearance": ("control", "appearance"),
+    "relevance": ("bind", "relevant"),
+    "required": ("bind", "required"),
+    "constraint": ("bind", "constraint"),
+    "constraining_message": ("bind", "jr:constraintMsg"),
+    "constraint_message": ("bind", "jr:constraintMsg"),
+    "calculation": ("bind", "calculate"),
+    "calculate": ("bind", "calculate"),
     "command": constants.TYPE,
     "tag": constants.NAME,
     "value": constants.NAME,
-    "image": "media::image",
-    "big-image": "media::big-image",
-    "audio": "media::audio",
-    "video": "media::video",
-    "count": "control::jr:count",
-    "repeat_count": "control::jr:count",
-    "jr:count": "control::jr:count",
-    "autoplay": "control::autoplay",
-    "rows": "control::rows",
+    "image": ("media", "image"),
+    "big-image": ("media", "big-image"),
+    "audio": ("media", "audio"),
+    "video": ("media", "video"),
+    "count": ("control", "jr:count"),
+    "repeat_count": ("control", "jr:count"),
+    "jr:count": ("control", "jr:count"),
+    "autoplay": ("control", "autoplay"),
+    "rows": ("control", "rows"),
     # New elements that have to go into itext elements:
-    "noAppErrorString": "bind::jr:noAppErrorString",
-    "no_app_error_string": "bind::jr:noAppErrorString",
-    "requiredMsg": "bind::jr:requiredMsg",
-    "required_message": "bind::jr:requiredMsg",
-    "required message": "bind::jr:requiredMsg",
+    "noapperrorstring": ("bind", "jr:noAppErrorString"),
+    "no_app_error_string": ("bind", "jr:noAppErrorString"),
+    "requiredmsg": ("bind", "jr:requiredMsg"),
+    "required_message": ("bind", "jr:requiredMsg"),
     "body": "control",
-    "parameters": "parameters",
-    constants.ENTITIES_SAVETO: "bind::entities:saveto",
+    constants.ENTITIES_SAVETO: ("bind", "entities:saveto"),
 }
 
 entities_header = {constants.LIST_NAME_U: "dataset"}
 
-# Key is the pyxform internal name, Value is the name used in error/warning messages.
 TRANSLATABLE_SURVEY_COLUMNS = {
     constants.LABEL: constants.LABEL,
     # Per ODK Spec, could include "short" once pyxform supports it.
@@ -129,19 +109,19 @@
 }
 TRANSLATABLE_CHOICES_COLUMNS = {
     "label": constants.LABEL,
-    "image": "media::image",
-    "big-image": "media::big-image",
-    "audio": "media::audio",
-    "video": "media::video",
+    "image": survey_header["image"],
+    "big-image": survey_header["big-image"],
+    "audio": survey_header["audio"],
+    "video": survey_header["video"],
 }
 list_header = {
     "caption": constants.LABEL,
     constants.LIST_NAME_U: constants.LIST_NAME_S,
     "value": constants.NAME,
-    "image": "media::image",
-    "big-image": "media::big-image",
-    "audio": "media::audio",
-    "video": "media::video",
+    "image": survey_header["image"],
+    "big-image": survey_header["big-image"],
+    "audio": survey_header["audio"],
+    "video": survey_header["video"],
 }
 # Note that most of the type aliasing happens in all.xls
 _type_alias_map = {
diff --git a/pyxform/builder.py b/pyxform/builder.py
index 86675f14..2b699de9 100644
--- a/pyxform/builder.py
+++ b/pyxform/builder.py
@@ -146,16 +146,20 @@ def _create_question_from_dict(
         )
 
         if question_class:
-            if const.CHOICES in d and choices:
-                return question_class(
-                    question_type_dictionary=question_type_dictionary,
-                    choices=choices.get(d[const.ITEMSET], d[const.CHOICES]),
-                    **{k: v for k, v in d.items() if k != const.CHOICES},
-                )
-            else:
-                return question_class(
-                    question_type_dictionary=question_type_dictionary, **d
-                )
+            if choices:
+                d_choices = d.get(const.CHOICES, d.get(const.CHILDREN))
+                if d_choices:
+                    return question_class(
+                        question_type_dictionary=question_type_dictionary,
+                        **{
+                            k: v
+                            for k, v in d.items()
+                            if k not in {const.CHOICES, const.CHILDREN}
+                        },
+                        choices=choices.get(d[const.ITEMSET], d_choices),
+                    )
+
+            return question_class(question_type_dictionary=question_type_dictionary, **d)
 
         return ()
 
@@ -259,16 +263,16 @@ def _name_and_label_substitutions(question_template, column_headers):
                     const.NAME: column_headers[const.NAME],
                     const.LABEL: column_headers[const.LABEL][lang],
                 }
-                for lang in column_headers[const.LABEL].keys()
+                for lang in column_headers[const.LABEL]
             }
 
         result = question_template.copy()
-        for key in result.keys():
+        for key in result:
             if isinstance(result[key], str):
                 result[key] %= column_headers
             elif isinstance(result[key], dict):
                 result[key] = result[key].copy()
-                for key2 in result[key].keys():
+                for key2 in result[key]:
                     if info_by_lang and isinstance(column_headers[const.LABEL], dict):
                         result[key][key2] %= info_by_lang.get(key2, column_headers)
                     else:
diff --git a/pyxform/entities/entities_parsing.py b/pyxform/entities/entities_parsing.py
index 39f06dce..f67263e1 100644
--- a/pyxform/entities/entities_parsing.py
+++ b/pyxform/entities/entities_parsing.py
@@ -1,22 +1,17 @@
+from collections.abc import Sequence
 from typing import Any
 
 from pyxform import constants as const
 from pyxform.errors import PyXFormError
 from pyxform.parsing.expression import is_xml_tag
-from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings
 
 EC = const.EntityColumns
 
 
 def get_entity_declaration(
-    entities_sheet: list[dict], workbook_dict: dict[str, list[dict]], warnings: list[str]
+    entities_sheet: Sequence[dict],
 ) -> dict[str, Any]:
-    if len(entities_sheet) == 0:
-        similar = find_sheet_misspellings(key=const.ENTITIES, keys=workbook_dict.keys())
-        if similar is not None:
-            warnings.append(similar + const._MSG_SUPPRESS_SPELLING)
-        return {}
-    elif len(entities_sheet) > 1:
+    if len(entities_sheet) > 1:
         raise PyXFormError(
             "Currently, you can only declare a single entity per form. Please make sure your entities sheet only declares one entity."
         )
@@ -49,11 +44,11 @@ def get_entity_declaration(
         const.NAME: const.ENTITY,
         const.TYPE: const.ENTITY,
         const.PARAMETERS: {
-            EC.DATASET: dataset_name,
-            EC.ENTITY_ID: entity_id,
-            EC.CREATE_IF: create_condition,
-            EC.UPDATE_IF: update_condition,
-            EC.LABEL: entity_label,
+            EC.DATASET.value: dataset_name,
+            EC.ENTITY_ID.value: entity_id,
+            EC.CREATE_IF.value: create_condition,
+            EC.UPDATE_IF.value: update_condition,
+            EC.LABEL.value: entity_label,
         },
     }
 
@@ -83,13 +78,16 @@ def get_validated_dataset_name(entity):
 
 
 def validate_entity_saveto(
-    row: dict, row_number: int, entity_declaration: dict[str, Any], in_repeat: bool
+    row: dict,
+    row_number: int,
+    in_repeat: bool,
+    entity_declaration: dict[str, Any] | None = None,
 ):
     save_to = row.get(const.BIND, {}).get("entities:saveto", "")
     if not save_to:
         return
 
-    if len(entity_declaration) == 0:
+    if not entity_declaration:
         raise PyXFormError(
             "To save entity properties using the save_to column, you must add an entities sheet and declare an entity."
         )
@@ -126,9 +124,9 @@ def validate_entity_saveto(
 
 
 def validate_entities_columns(row: dict):
-    extra = {k: None for k in row.keys() if k not in EC.value_list()}
+    extra = {k: None for k in row if k not in EC.value_list()}
     if 0 < len(extra):
-        fmt_extra = ", ".join(f"'{k}'" for k in extra.keys())
+        fmt_extra = ", ".join(f"'{k}'" for k in extra)
         msg = (
             f"The entities sheet included the following unexpected column(s): {fmt_extra}. "
             f"These columns are not supported by this version of pyxform. Please either: "
diff --git a/pyxform/parsing/sheet_headers.py b/pyxform/parsing/sheet_headers.py
new file mode 100644
index 00000000..9c69e242
--- /dev/null
+++ b/pyxform/parsing/sheet_headers.py
@@ -0,0 +1,262 @@
+from collections.abc import Container, Sequence
+from itertools import chain, islice
+from typing import Any
+
+from pyxform import constants
+from pyxform.errors import PyXFormError
+
+INVALID_HEADER = (
+    "Invalid headers provided for sheet: '{sheet_name}'. For XLSForms, this may be due "
+    "a missing header row, in which case add a header row as per the reference template "
+    "https://xlsform.org/en/ref-table/. For internal API usage, may be due to a missing "
+    "mapping for '{header}', in which case ensure that the full set of headers appear "
+    "within the first 100 rows, or specify the header row in '{sheet_name}_header'."
+)
+INVALID_DUPLICATE = (
+    "Invalid headers provided for sheet: '{sheet_name}'. Headers that are different "
+    "names for the same column were found: '{other}', '{header}'. Rename or remove one "
+    "of these columns."
+)
+INVALID_MISSING_REQUIRED = (
+    "Invalid headers provided for sheet: '{sheet_name}'. One or more required column "
+    "headers were not found: {missing}. "
+    "Learn more: https://xlsform.org/en/#setting-up-your-worksheets"
+)
+
+
+def merge_dicts(
+    dict_a: dict, dict_b: dict, default_key: str = constants.DEFAULT_LANGUAGE_VALUE
+) -> dict:
+    """
+    Recursively merge two nested dicts into a single dict.
+
+    When keys match their values are merged using a recursive call to this function,
+    otherwise they are just added to the output dict.
+    """
+    if not dict_a:
+        return dict_b
+    if not dict_b:
+        return dict_a
+
+    if not isinstance(dict_a, dict):
+        if default_key in dict_b:
+            return dict_b
+        dict_a = {default_key: dict_a}
+    if not isinstance(dict_b, dict):
+        if default_key in dict_a:
+            return dict_a
+        dict_b = {default_key: dict_b}
+
+    # Union keys but retain order (as opposed to set()), preferencing dict_a then dict_b.
+    # E.g. {"a": 1, "b": 2} + {"c": 3, "a": 4} -> {"a": None, "b": None, "c": None}
+    out_dict = dict_a
+    for key in {k: None for k in (chain(dict_a, dict_b))}:
+        out_dict[key] = merge_dicts(dict_a.get(key), dict_b.get(key), default_key)
+    return out_dict
+
+
+def list_to_nested_dict(lst: Sequence) -> dict:
+    """
+    [1,2,3,4] -> {1:{2:{3:4}}}
+    """
+    if len(lst) > 1:
+        return {lst[0]: list_to_nested_dict(lst[1:])}
+    else:
+        return lst[0]
+
+
+class DealiasAndGroupHeadersResult:
+    __slots__ = ("headers", "data")
+
+    def __init__(self, headers: tuple[tuple[str, ...], ...], data: Sequence[dict]):
+        """
+        :param headers: Distinct headers seen in the sheet, parsed / split if applicable.
+        :param data: Sheet data rows, in grouped dict format.
+        """
+        self.headers: tuple[tuple[str, ...], ...] = headers
+        self.data: Sequence[dict] = data
+
+
+def to_snake_case(value: str) -> str:
+    """
+    Convert a name (e.g. column name or question type) to snake case.
+
+    Removes duplicate, leading, trailing spaces.
+    """
+    return "_".join(value.split()).lower()
+
+
+def process_header(
+    header: str,
+    use_double_colon: bool,
+    header_aliases: dict[str, str | tuple[str, ...]],
+    header_columns: Container[str],
+) -> tuple[str, tuple[str, ...]]:
+    """
+    Lookup the header in the provided expected columns or aliases, or split the header.
+
+    :param header: Original XLSForm data header.
+    :param use_double_colon: If True, split the header on "::" rather than ":" (deprecated).
+    :param header_aliases: Mapping of original headers to aliased (possibly split) headers.
+    :param header_columns: The expected headers for the sheet.
+    :return e.g. tuple[original, tuple[new,]] | tuple[original, tuple[new1, new2]]
+    """
+    # If the header is already recognised then nothing further needed.
+    if header in header_columns and header not in header_aliases:
+        return header, (header,)
+
+    # Also try normalising to snake_case.
+    header_normalised = to_snake_case(value=header)
+    if header_normalised in header_columns and header_normalised not in header_aliases:
+        return header_normalised, (header_normalised,)
+
+    # Check for double columns to determine whether to use them or single colons to
+    # delimit grouped headers. Single colons are bad because they conflict with with the
+    # xform namespace syntax (i.e. jr:constraintMsg), so they are only used if necessary
+    # for backwards compatibility.
+    group_delimiter = "::"
+    if use_double_colon or group_delimiter in header:
+        tokens = tuple(t.strip() for t in header.split(group_delimiter))
+    else:
+        tokens = tuple(t.strip() for t in header.split(":"))
+        # Handle "jr:count" or similar when used with single colon delimiters.
+        if "jr" in tokens:
+            jr_idx = tokens.index("jr")
+            tokens = (
+                *tokens[0:jr_idx],
+                f"jr:{tokens[jr_idx + 1]}",
+                *tokens[jr_idx + 2 :],
+            )
+
+    new_header = to_snake_case(tokens[0])
+    dealiased_first_token = header_aliases.get(new_header)
+    if dealiased_first_token:
+        new_header = dealiased_first_token
+        if isinstance(new_header, tuple):
+            tokens = (*new_header, *tokens[1:])
+        else:
+            tokens = (new_header, *tokens[1:])
+    elif new_header in header_columns:
+        tokens = (new_header, *tokens[1:])
+    # Avoid changing unknown columns, since it could break choice_filter expressions.
+    else:
+        new_header = header
+        tokens = tuple(tokens)
+    return new_header, tokens
+
+
+def process_row(
+    sheet_name: str,
+    row: dict[str, str],
+    header_key: dict[str, tuple[str, ...]],
+    default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
+) -> dict[str, str]:
+    """
+    Convert original headers and values to a possibly nested structure.
+
+    :param sheet_name: Name of the sheet data being processed.
+    :param row: Original XLSForm data row.
+    :param header_key: Mapping from original headers to headers split on a delimiter.
+    :param default_language: Default translation language for the form, used to group
+      used to group labels/hints/etc without a language specified with localized versions.
+    """
+    out_row = {}
+    for header, val in row.items():
+        tokens = header_key.get(header, None)
+        if header == "__row":
+            out_row[header] = val
+        elif not tokens:
+            raise PyXFormError(
+                INVALID_HEADER.format(sheet_name=sheet_name, header=header)
+            )
+        elif len(tokens) == 1:
+            out_row[tokens[0]] = val
+        else:
+            new_value = list_to_nested_dict((*tokens[1:], val))
+            out_row = merge_dicts(out_row, {tokens[0]: new_value}, default_language)
+
+    return out_row
+
+
+def dealias_and_group_headers(
+    sheet_name: str,
+    sheet_data: Sequence[dict[str, str]],
+    sheet_header: Sequence[dict[str, Any]],
+    header_aliases: dict[str, str],
+    header_columns: set[str],
+    headers_required: set[str] | None = None,
+    default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
+) -> DealiasAndGroupHeadersResult:
+    """
+    Normalise headers and group keys that contain a delimiter.
+
+    For example a row:
+        {"text::english": "hello", "text::french" : "bonjour"}
+    Becomes
+        {"text": {"english": "hello", "french" : "bonjour"}.
+
+    Dealiasing is done to the first token (the first term separated by the delimiter).
+
+    :param sheet_name: Name of the sheet data being processed.
+    :param sheet_data: The sheet data.
+    :param sheet_header: The sheet column names (headers).
+    :param header_aliases: Mapping of allowed column aliases (backwards compatibility).
+    :param header_columns: Expected columns for the sheet.
+    :param headers_required: Required columns for the sheet.
+    :param default_language: Default translation language for the form, used to group
+      used to group labels/hints/etc without a language specified with localized versions.
+    """
+
+    header_key: dict[str, tuple[str, ...]] = {}
+    tokens_key: dict[tuple[str, ...], str] = {}
+
+    # If not specified, try to guess the headers from the first 100 rows of data.
+    # Should only happen if the XLSForm is provided as a dict with no "_headers" keys.
+    if not sheet_header and sheet_data:
+        sheet_header = {}
+        for row in islice(sheet_data, 0, 100):
+            for k in row:
+                sheet_header[k] = None
+        sheet_header = [sheet_header]
+
+    if sheet_header:
+        use_double_colon = any("::" in k for k in sheet_header[0])
+        for header in sheet_header[0]:
+            tokens = header_key.get(header, None)
+            if tokens is None:
+                new_header, tokens = process_header(
+                    header=header,
+                    use_double_colon=use_double_colon,
+                    header_aliases=header_aliases,
+                    header_columns=header_columns,
+                )
+                other_header = tokens_key.get(tokens)
+                if other_header and new_header != header:
+                    raise PyXFormError(
+                        INVALID_DUPLICATE.format(
+                            sheet_name=sheet_name,
+                            other=other_header,
+                            header=header,
+                        )
+                    )
+                header_key[header] = tokens
+                tokens_key[tokens] = header
+
+    data = tuple(
+        process_row(
+            sheet_name=sheet_name,
+            row=row,
+            header_key=header_key,
+            default_language=default_language,
+        )
+        for row in sheet_data
+    )
+    if headers_required and (data or sheet_name == constants.SURVEY):
+        missing = {h for h in headers_required if h not in {h[0] for h in tokens_key}}
+        if missing:
+            raise PyXFormError(
+                INVALID_MISSING_REQUIRED.format(
+                    sheet_name=sheet_name, missing=", ".join(f"'{h}'" for h in missing)
+                )
+            )
+    return DealiasAndGroupHeadersResult(headers=tuple(tokens_key), data=data)
diff --git a/pyxform/question.py b/pyxform/question.py
index 4c7f2329..6ebac4c4 100644
--- a/pyxform/question.py
+++ b/pyxform/question.py
@@ -3,6 +3,7 @@
 """
 
 import os.path
+import re
 from collections.abc import Callable, Generator, Iterable
 from itertools import chain
 from typing import TYPE_CHECKING
@@ -16,12 +17,12 @@
     EXTERNAL_INSTANCE_EXTENSIONS,
 )
 from pyxform.errors import PyXFormError
+from pyxform.parsing.expression import RE_ANY_PYXFORM_REF
 from pyxform.question_type_dictionary import QUESTION_TYPE_DICT
 from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement
 from pyxform.utils import (
     PYXFORM_REFERENCE_REGEX,
     DetachableElement,
-    coalesce,
     combine_lists,
     default_is_dynamic,
     node,
@@ -32,9 +33,6 @@
 
 
 QUESTION_EXTRA_FIELDS = (
-    "_itemset_dyn_label",
-    "_itemset_has_media",
-    "_itemset_multi_language",
     "_qtd_defaults",
     "_qtd_kwargs",
     "action",
@@ -55,7 +53,7 @@
 QUESTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *QUESTION_EXTRA_FIELDS)
 
 SELECT_QUESTION_EXTRA_FIELDS = (
-    constants.CHILDREN,
+    constants.CHOICES,
     constants.ITEMSET,
     constants.LIST_NAME_U,
 )
@@ -65,15 +63,12 @@
 OSM_QUESTION_FIELDS = (*QUESTION_FIELDS, *SELECT_QUESTION_EXTRA_FIELDS)
 
 OPTION_EXTRA_FIELDS = (
-    "_choice_itext_id",
+    "_choice_itext_ref",
     constants.MEDIA,
     "sms_option",
 )
 OPTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *OPTION_EXTRA_FIELDS)
 
-TAG_EXTRA_FIELDS = (constants.CHILDREN,)
-TAG_FIELDS = (*SURVEY_ELEMENT_FIELDS, *TAG_EXTRA_FIELDS)
-
 
 class Question(SurveyElement):
     __slots__ = QUESTION_EXTRA_FIELDS
@@ -110,8 +105,7 @@ def __init__(self, fields: tuple[str, ...] | None = None, **kwargs):
 
         qtd = kwargs.pop("question_type_dictionary", QUESTION_TYPE_DICT)
         type_arg = kwargs.get("type")
-        default_type = qtd.get(type_arg)
-        if default_type is None:
+        if type_arg not in qtd:
             raise PyXFormError(f"Unknown question type '{type_arg}'.")
 
         # Keeping original qtd_kwargs is only needed if output of QTD data is not
@@ -139,25 +133,16 @@ def __init__(self, fields: tuple[str, ...] | None = None, **kwargs):
             fields = chain(QUESTION_EXTRA_FIELDS, fields)
         super().__init__(fields=fields, **kwargs)
 
-    def validate(self):
-        SurveyElement.validate(self)
-
-        # make sure that the type of this question exists in the
-        # question type dictionary.
-        if self.type not in QUESTION_TYPE_DICT:
-            raise PyXFormError(f"Unknown question type '{self.type}'.")
-
     def xml_instance(self, survey: "Survey", **kwargs):
-        attributes = self.instance
-        if attributes is None:
-            attributes = {}
-        else:
-            for key, value in attributes.items():
-                attributes[key] = survey.insert_xpaths(value, self)
-
         if self.default and not default_is_dynamic(self.default, self.type):
-            return node(self.name, str(self.default), **attributes)
-        return node(self.name, **attributes)
+            result = node(self.name, str(self.default))
+        else:
+            result = node(self.name)
+        attributes = self.instance
+        if attributes:
+            for k, v in attributes.items():
+                result.setAttribute(k, survey.insert_xpaths(v, self))
+        return result
 
     def xml_control(self, survey: "Survey"):
         if self.type == "calculate" or (
@@ -198,18 +183,16 @@ def xml_control(self, survey: "Survey"):
 
         return xml_node
 
-    def xml_action(self):
+    def xml_action(self) -> DetachableElement | None:
         """
         Return the action for this survey element.
         """
         if self.action:
-            return node(
-                self.action["name"],
-                ref=self.get_xpath(),
-                **{k: v for k, v in self.action.items() if k != "name"},
-            )
-
-        return None
+            result = node(self.action["name"], ref=self.get_xpath())
+            for k, v in self.action.items():
+                if k != "name":
+                    result.setAttribute(k, v)
+            return result
 
     def nest_set_nodes(self, survey, xml_node, tag, nested_items):
         for item in nested_items:
@@ -222,6 +205,24 @@ def nest_set_nodes(self, survey, xml_node, tag, nested_items):
             set_node = node(tag, **node_attrs)
             xml_node.appendChild(set_node)
 
+    def _build_xml(self, survey: "Survey") -> DetachableElement | None:
+        """
+        Initial control node result for further processing depending on Question type.
+        """
+        control_dict = self.control
+        result = node(
+            control_dict["tag"],
+            *self.xml_label_and_hint(survey=survey),
+            ref=self.get_xpath(),
+        )
+        # Resolve field references in attributes
+        for k, v in control_dict.items():
+            # "tag" is from the question type dict so it can't include references. Also,
+            # if it did include references, then the node element name would be invalid.
+            if k != "tag":
+                result.setAttribute(k, survey.insert_xpaths(v, self))
+        return result
+
     def build_xml(self, survey: "Survey") -> DetachableElement | None:
         return None
 
@@ -246,23 +247,12 @@ class InputQuestion(Question):
     """
 
     def build_xml(self, survey: "Survey"):
-        control_dict = self.control
-        label_and_hint = self.xml_label_and_hint(survey=survey)
-        # Resolve field references in attributes
-        for key, value in control_dict.items():
-            control_dict[key] = survey.insert_xpaths(value, self)
-        control_dict["ref"] = self.get_xpath()
-
-        result = node(**control_dict)
-        if label_and_hint:
-            for element in self.xml_label_and_hint(survey=survey):
-                if element:
-                    result.appendChild(element)
+        result = self._build_xml(survey=survey)
 
         # Input types are used for selects with external choices sheets.
         if self.query:
             choice_filter = self.choice_filter
-            if choice_filter is not None:
+            if choice_filter:
                 pred = survey.insert_xpaths(choice_filter, self, True)
                 query = f"""instance('{self.query}')/root/item[{pred}]"""
             else:
@@ -273,26 +263,12 @@ def build_xml(self, survey: "Survey"):
 
 class TriggerQuestion(Question):
     def build_xml(self, survey: "Survey"):
-        control_dict = self.control
-        # Resolve field references in attributes
-        for key, value in control_dict.items():
-            control_dict[key] = survey.insert_xpaths(value, self)
-        control_dict["ref"] = self.get_xpath()
-        return node("trigger", *self.xml_label_and_hint(survey=survey), **control_dict)
+        return self._build_xml(survey=survey)
 
 
 class UploadQuestion(Question):
-    def _get_media_type(self):
-        return self.control["mediatype"]
-
     def build_xml(self, survey: "Survey"):
-        control_dict = self.control
-        # Resolve field references in attributes
-        for key, value in control_dict.items():
-            control_dict[key] = survey.insert_xpaths(value, self)
-        control_dict["ref"] = self.get_xpath()
-        control_dict["mediatype"] = self._get_media_type()
-        return node("upload", *self.xml_label_and_hint(survey=survey), **control_dict)
+        return self._build_xml(survey=survey)
 
 
 class Option(SurveyElement):
@@ -310,33 +286,18 @@ def __init__(
         sms_option: str | None = None,
         **kwargs,
     ):
-        self._choice_itext_id: str | None = None
+        self._choice_itext_ref: str | None = None
         self.media: dict | None = media
         self.sms_option: str | None = sms_option
 
         super().__init__(name=name, label=label, **kwargs)
 
-    def xml_value(self):
-        return node("value", self.name)
-
-    def xml(self, survey: "Survey"):
-        item = node("item")
-        item.appendChild(self.xml_label(survey=survey))
-        item.appendChild(self.xml_value())
-
-        return item
-
     def validate(self):
         pass
 
     def xml_control(self, survey: "Survey"):
         raise NotImplementedError()
 
-    def _translation_path(self, display_element):
-        if self._choice_itext_id is not None:
-            return self._choice_itext_id
-        return super()._translation_path(display_element=display_element)
-
     def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
         to_delete = (k for k in self.get_slot_names() if k.startswith("_"))
         if delete_keys is not None:
@@ -344,6 +305,41 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
         return super().to_json_dict(delete_keys=to_delete)
 
 
+class Itemset:
+    """Itemset details and metadata detection."""
+
+    __slots__ = ("name", "options", "requires_itext", "used_by_search")
+
+    def __init__(self, name: str, choices: Iterable[dict]):
+        self.requires_itext: bool = False
+        self.used_by_search: bool = False
+        self.name: str = name
+        self.options: tuple[Option, ...] = tuple(o for o in self.get_options(choices))
+
+    def get_options(self, choices: Iterable[dict]) -> Generator[Option, None, None]:
+        requires_itext = False
+        for c in choices:
+            option = Option(**c)
+            if not requires_itext:
+                # Media: dict of image, audio, etc. Defaults to None.
+                if option.media:
+                    requires_itext = True
+                else:
+                    choice_label = option.label
+                    label_is_dict = isinstance(choice_label, dict)
+                    # Multi-language: dict of labels etc per language. Can be just a string.
+                    if label_is_dict:
+                        requires_itext = True
+                    # Dynamic label: string contains a pyxform reference.
+                    elif (
+                        choice_label
+                        and re.search(RE_ANY_PYXFORM_REF, choice_label) is not None
+                    ):
+                        requires_itext = True
+            yield option
+        self.requires_itext = requires_itext
+
+
 class MultipleChoiceQuestion(Question):
     __slots__ = SELECT_QUESTION_EXTRA_FIELDS
 
@@ -354,67 +350,26 @@ def get_slot_names() -> tuple[str, ...]:
     def __init__(
         self, itemset: str | None = None, list_name: str | None = None, **kwargs
     ):
-        # Internals
-        self._itemset_dyn_label: bool = False
-        self._itemset_has_media: bool = False
-        self._itemset_multi_language: bool = False
+        if not itemset and not list_name:
+            raise PyXFormError(
+                "Arguments 'itemset' and 'list_name' must not both be None or empty."
+            )
 
         # Structure
-        self.children: tuple[Option, ...] | None = None
+        self.choices: Itemset | None = None
         self.itemset: str | None = itemset
         self.list_name: str | None = list_name
 
-        # Notice that choices can be specified under choices or children.
-        # I'm going to try to stick to just choices.
-        # Aliases in the json format will make it more difficult
-        # to use going forward.
-        kw_choices = kwargs.pop(constants.CHOICES, None)
-        kw_children = kwargs.pop(constants.CHILDREN, None)
-        choices = coalesce(kw_choices, kw_children)
-        if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option):
-            self.children = choices
-        elif choices:
-            self.children = tuple(
-                Option(**c) for c in combine_lists(kw_choices, kw_children)
-            )
+        choices = kwargs.pop(constants.CHOICES, None)
+        if isinstance(choices, Itemset):
+            self.choices = choices
         super().__init__(**kwargs)
 
-    def validate(self):
-        Question.validate(self)
-        if self.children:
-            for child in self.children:
-                child.validate()
-
-    def iter_descendants(
-        self,
-        condition: Callable[["SurveyElement"], bool] | None = None,
-        iter_into_section_items: bool = False,
-    ) -> Generator["SurveyElement", None, None]:
-        if condition is None:
-            yield self
-        elif condition(self):
-            yield self
-        if iter_into_section_items and self.children:
-            for e in self.children:
-                yield from e.iter_descendants(
-                    condition=condition,
-                    iter_into_section_items=iter_into_section_items,
-                )
-
     def build_xml(self, survey: "Survey"):
         if self.bind["type"] not in {"string", "odk:rank"}:
             raise PyXFormError("""Invalid value for `self.bind["type"]`.""")
 
-        # Resolve field references in attributes
-        control_dict = {
-            key: survey.insert_xpaths(value, self) for key, value in self.control.items()
-        }
-        control_dict["ref"] = self.get_xpath()
-
-        result = node(**control_dict)
-        for element in self.xml_label_and_hint(survey=survey):
-            if element:
-                result.appendChild(element)
+        result = self._build_xml(survey=survey)
 
         # itemset are only supposed to be strings,
         # check to prevent the rare dicts that show up
@@ -431,21 +386,18 @@ def build_xml(self, survey: "Survey"):
                 itemset_value_ref = self.parameters.get("value", itemset_value_ref)
                 itemset_label_ref = self.parameters.get("label", itemset_label_ref)
 
-            multi_language = self._itemset_multi_language
-            has_media = self._itemset_has_media
-            has_dyn_label = self._itemset_dyn_label
             is_previous_question = bool(PYXFORM_REFERENCE_REGEX.search(self.itemset))
 
             if file_extension in EXTERNAL_INSTANCE_EXTENSIONS:
                 pass
-            elif not multi_language and not has_media and not has_dyn_label:
+            elif self.choices and self.choices.requires_itext:
                 itemset = self.itemset
+                itemset_label_ref = "jr:itext(itextId)"
             else:
                 itemset = self.itemset
-                itemset_label_ref = "jr:itext(itextId)"
 
             choice_filter = self.choice_filter
-            if choice_filter is not None:
+            if choice_filter:
                 choice_filter = survey.insert_xpaths(
                     choice_filter, self, True, is_previous_question
                 )
@@ -488,63 +440,43 @@ def build_xml(self, survey: "Survey"):
 
                     nodeset += ")"
 
-            itemset_children = [
-                node("value", ref=itemset_value_ref),
-                node("label", ref=itemset_label_ref),
-            ]
-            result.appendChild(node("itemset", *itemset_children, nodeset=nodeset))
-        elif self.children:
-            for child in self.children:
-                result.appendChild(child.xml(survey=survey))
+            result.appendChild(
+                node(
+                    "itemset",
+                    node("value", ref=itemset_value_ref),
+                    node("label", ref=itemset_label_ref),
+                    nodeset=nodeset,
+                )
+            )
+        elif self.choices:
+            # Options processing specific to XLSForms using the "search()" function.
+            # The _choice_itext_ref is prepared by Survey._redirect_is_search_itext.
+            itemset = self.choices
+            if itemset.used_by_search:
+                for option in itemset.options:
+                    if itemset.requires_itext:
+                        label_node = node("label", ref=option._choice_itext_ref)
+                    elif self.label:
+                        label, output_inserted = survey.insert_output_values(
+                            option.label, option
+                        )
+                        label_node = node("label", label, toParseString=output_inserted)
+                    else:
+                        label_node = node("label")
+                    result.appendChild(
+                        node("item", label_node, node("value", option.name))
+                    )
 
         return result
 
 
 class Tag(SurveyElement):
-    __slots__ = TAG_EXTRA_FIELDS
-
     @staticmethod
     def get_slot_names() -> tuple[str, ...]:
-        return TAG_FIELDS
-
-    def __init__(self, name: str, label: str | dict | None = None, **kwargs):
-        self.children: tuple[Option, ...] | None = None
-
-        kw_choices = kwargs.pop(constants.CHOICES, None)
-        kw_children = kwargs.pop(constants.CHILDREN, None)
-        choices = coalesce(kw_choices, kw_children)
-        if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option):
-            self.children = choices
-        elif choices:
-            self.children = tuple(
-                Option(**c) for c in combine_lists(kw_choices, kw_children)
-            )
-        super().__init__(name=name, label=label, **kwargs)
-
-    def iter_descendants(
-        self,
-        condition: Callable[["SurveyElement"], bool] | None = None,
-        iter_into_section_items: bool = False,
-    ) -> Generator["SurveyElement", None, None]:
-        if condition is None:
-            yield self
-        elif condition(self):
-            yield self
-        if iter_into_section_items and self.children:
-            for e in self.children:
-                yield from e.iter_descendants(
-                    condition=condition,
-                    iter_into_section_items=iter_into_section_items,
-                )
+        return SURVEY_ELEMENT_FIELDS
 
     def xml(self, survey: "Survey"):
-        result = node("tag", key=self.name)
-        result.appendChild(self.xml_label(survey=survey))
-        if self.children:
-            for choice in self.children:
-                result.appendChild(choice.xml(survey=survey))
-
-        return result
+        return node("tag", self.xml_label(survey=survey), key=self.name)
 
     def validate(self):
         pass
@@ -588,32 +520,18 @@ def iter_descendants(
                 )
 
     def build_xml(self, survey: "Survey"):
-        control_dict = self.control
-        control_dict["ref"] = self.get_xpath()
-        control_dict["mediatype"] = self._get_media_type()
-        result = node("upload", *self.xml_label_and_hint(survey=survey), **control_dict)
-
+        result = self._build_xml(survey=survey)
         if self.children:
             for osm_tag in self.children:
                 result.appendChild(osm_tag.xml(survey=survey))
-
         return result
 
 
 class RangeQuestion(Question):
     def build_xml(self, survey: "Survey"):
-        control_dict = self.control
-        label_and_hint = self.xml_label_and_hint(survey=survey)
-        # Resolve field references in attributes
-        for key, value in control_dict.items():
-            control_dict[key] = survey.insert_xpaths(value, self)
-        control_dict["ref"] = self.get_xpath()
+        result = self._build_xml(survey=survey)
         params = self.parameters
         if params:
-            control_dict.update(params)
-        result = node(**control_dict)
-        if label_and_hint:
-            for element in self.xml_label_and_hint(survey=survey):
-                result.appendChild(element)
-
+            for k, v in params.items():
+                result.setAttribute(k, v)
         return result
diff --git a/pyxform/survey.py b/pyxform/survey.py
index 50ecafe9..aa6ca193 100644
--- a/pyxform/survey.py
+++ b/pyxform/survey.py
@@ -20,7 +20,7 @@
 from pyxform.instance import SurveyInstance
 from pyxform.parsing.expression import has_last_saved
 from pyxform.parsing.instance_expression import replace_with_output
-from pyxform.question import MultipleChoiceQuestion, Option, Question, Tag
+from pyxform.question import Itemset, MultipleChoiceQuestion, Option, Question, Tag
 from pyxform.section import SECTION_EXTRA_FIELDS, Section
 from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement
 from pyxform.utils import (
@@ -28,7 +28,6 @@
     LAST_SAVED_INSTANCE_NAME,
     DetachableElement,
     escape_text_for_xml,
-    has_dynamic_label,
     node,
 )
 from pyxform.validators import enketo_validate, odk_validate
@@ -43,6 +42,7 @@
 )
 RE_PULLDATA = re.compile(r"(pulldata\s*\(\s*)(.*?),")
 SEARCH_FUNCTION_REGEX = re.compile(r"search\(.*?\)")
+SELECT_TYPES = set(aliases.select)
 
 
 class InstanceInfo:
@@ -170,22 +170,12 @@ def _get_steps_and_target_xpath(context_parent, xpath_parent, include_parent=Fal
     return (None, None)
 
 
-@lru_cache(maxsize=128)
-def is_label_dynamic(label: str) -> bool:
-    return (
-        label is not None
-        and isinstance(label, str)
-        and re.search(BRACKETED_TAG_REGEX, label) is not None
-    )
-
-
 def recursive_dict():
     return defaultdict(recursive_dict)
 
 
 SURVEY_EXTRA_FIELDS = (
     "_created",
-    "_search_lists",
     "_translations",
     "_xpath",
     "add_none_option",
@@ -236,14 +226,13 @@ def get_slot_names() -> tuple[str, ...]:
     def __init__(self, **kwargs):
         # Internals
         self._created: datetime.now = datetime.now()
-        self._search_lists: set = set()
         self._translations: recursive_dict = recursive_dict()
-        self._xpath: dict[str, Section | Question | None] = {}
+        self._xpath: dict[str, Section | Question | None] | None = None
 
         # Structure
         # attribute is for custom instance attrs from settings e.g. attribute::abc:xyz
         self.attribute: dict | None = None
-        self.choices: dict[str, tuple[Option, ...]] | None = None
+        self.choices: dict[str, Itemset] | None = None
         self.entity_features: list[str] | None = None
         self.setgeopoint_by_triggering_ref: dict[str, list[str]] = {}
         self.setvalues_by_triggering_ref: dict[str, list[str]] = {}
@@ -279,11 +268,9 @@ def __init__(self, **kwargs):
         self.sms_separator: str | None = None
 
         choices = kwargs.pop("choices", None)
-        if choices is not None:
+        if choices and isinstance(choices, dict):
             self.choices = {
-                list_name: tuple(
-                    c if isinstance(c, Option) else Option(**c) for c in values
-                )
+                list_name: Itemset(name=list_name, choices=values)
                 for list_name, values in choices.items()
             }
         kwargs[constants.TYPE] = constants.SURVEY
@@ -296,7 +283,7 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
         return super().to_json_dict(delete_keys=to_delete)
 
     def validate(self):
-        if self.id_string in [None, "None"]:
+        if self.id_string in {None, "None"}:
             raise PyXFormError("Survey cannot have an empty id_string")
         super().validate()
         self._validate_uniqueness_of_section_names()
@@ -353,7 +340,7 @@ def xml(self):
         self.validate()
         self._setup_xpath_dictionary()
 
-        for triggering_reference in self.setvalues_by_triggering_ref.keys():
+        for triggering_reference in self.setvalues_by_triggering_ref:
             if not re.search(BRACKETED_TAG_REGEX, triggering_reference):
                 raise PyXFormError(
                     "Only references to other fields are allowed in the 'trigger' column."
@@ -380,80 +367,60 @@ def get_trigger_values_for_question_name(self, question_name: str, trigger_type:
         elif trigger_type == "setgeopoint":
             return self.setgeopoint_by_triggering_ref.get(f"${{{question_name}}}")
 
-    def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo:
+    def _generate_static_instances(
+        self, list_name: str, itemset: Itemset
+    ) -> InstanceInfo:
         """
         Generate <instance> elements for static data (e.g. choices for selects)
         """
-        instance_element_list = []
-        has_media = bool(choice_list[0].get("media"))
-        has_dyn_label = has_dynamic_label(choice_list)
-        multi_language = False
-        if isinstance(self._translations, dict):
-            choices = (
-                True
-                for items in self._translations.values()
-                for k, v in items.items()
-                if v.get(constants.TYPE, "") == constants.CHOICE
-                and "-".join(k.split("-")[:-1]) == list_name
-            )
-            try:
-                if next(choices):
-                    multi_language = True
-            except StopIteration:
-                pass
 
-        for idx, choice in enumerate(choice_list):
-            choice_element_list = []
+        def choice_nodes(idx, choice):
             # Add a unique id to the choice element in case there are itext references
-            if multi_language or has_media or has_dyn_label:
-                itext_id = f"{list_name}-{idx}"
-                choice_element_list.append(node("itextId", itext_id))
-
-            for name, value in choice.items():
-                if not value:
-                    continue
-                elif name != "label" and isinstance(value, str):
-                    choice_element_list.append(node(name, value))
-                elif name == "extra_data" and isinstance(value, dict):
-                    for k, v in value.items():
-                        choice_element_list.append(node(k, v))
-                elif (
-                    not multi_language
-                    and not has_media
-                    and not has_dyn_label
-                    and isinstance(value, str)
-                    and name == "label"
-                ):
-                    choice_element_list.append(node(name, value))
-
-            instance_element_list.append(node("item", *choice_element_list))
+            if itemset.requires_itext:
+                yield node("itextId", f"{list_name}-{idx}")
+            yield node(constants.NAME, choice.name)
+            choice_label = choice.label
+            if not itemset.requires_itext and isinstance(choice_label, str):
+                yield node(constants.LABEL, choice_label)
+            choice_extra_data = choice.extra_data
+            if choice_extra_data and isinstance(choice_extra_data, dict):
+                for k, v in choice_extra_data.items():
+                    yield node(k, v)
+            choice_sms_option = choice.sms_option
+            if choice_sms_option and isinstance(choice_sms_option, str):
+                yield node("sms_option", choice_sms_option)
+
+        def instance_nodes(choices):
+            for idx, choice in enumerate(choices):
+                yield node("item", choice_nodes(idx, choice))
 
         return InstanceInfo(
             type="choice",
             context="survey",
             name=list_name,
             src=None,
-            instance=node("instance", node("root", *instance_element_list), id=list_name),
+            instance=node(
+                "instance",
+                node("root", instance_nodes(itemset.options)),
+                id=list_name,
+            ),
         )
 
     @staticmethod
-    def _generate_external_instances(element: SurveyElement) -> InstanceInfo | None:
-        if isinstance(element, ExternalInstance):
-            name = element["name"]
-            extension = element["type"].split("-")[0]
-            prefix = "file-csv" if extension == "csv" else "file"
-            src = f"jr://{prefix}/{name}.{extension}"
-            return InstanceInfo(
-                type="external",
-                context="[type: {t}, name: {n}]".format(
-                    t=element["parent"]["type"], n=element["parent"]["name"]
-                ),
-                name=name,
-                src=src,
-                instance=node("instance", id=name, src=src),
-            )
-
-        return None
+    def _generate_external_instances(element: ExternalInstance) -> InstanceInfo:
+        name = element["name"]
+        extension = element["type"].split("-")[0]
+        prefix = "file-csv" if extension == "csv" else "file"
+        src = f"jr://{prefix}/{name}.{extension}"
+        return InstanceInfo(
+            type="external",
+            context="[type: {t}, name: {n}]".format(
+                t=element["parent"]["type"], n=element["parent"]["name"]
+            ),
+            name=name,
+            src=src,
+            instance=node("instance", id=name, src=src),
+        )
 
     @staticmethod
     def _validate_external_instances(instances) -> None:
@@ -483,14 +450,14 @@ def _validate_external_instances(instances) -> None:
             raise ValidationError("\n".join(errors))
 
     @staticmethod
-    def _generate_pulldata_instances(element: SurveyElement) -> list[InstanceInfo] | None:
+    def _generate_pulldata_instances(
+        element: Question | Section,
+    ) -> Generator[InstanceInfo, None, None]:
         def get_pulldata_functions(element):
             """
             Returns a list of different pulldata(... function strings if
             pulldata function is defined at least once for any of:
             calculate, constraint, readonly, required, relevant
-
-            :param: element (pyxform.survey.Survey):
             """
             functions_present = []
             for formula_name in constants.EXTERNAL_INSTANCES:
@@ -515,24 +482,20 @@ def get_pulldata_functions(element):
 
             return functions_present
 
-        def get_instance_info(element, file_id):
+        def get_instance_info(elem, file_id):
             uri = f"jr://file-csv/{file_id}.csv"
+            parent = elem.parent
 
             return InstanceInfo(
                 type="pulldata",
-                context="[type: {t}, name: {n}]".format(
-                    t=element["parent"]["type"], n=element["parent"]["name"]
-                ),
+                context=f"[type: {parent.type}, name: {parent.name}]",
                 name=file_id,
                 src=uri,
                 instance=node("instance", id=file_id, src=uri),
             )
 
-        if isinstance(element, Option | ExternalInstance | Tag | Survey):
-            return None
         pulldata_usages = get_pulldata_functions(element)
         if len(pulldata_usages) > 0:
-            pulldata_instances = []
             for usage in pulldata_usages:
                 for call_match in re.finditer(RE_PULLDATA, usage):
                     groups = call_match.groups()
@@ -540,40 +503,32 @@ def get_instance_info(element, file_id):
                         first_argument = (  # first argument to pulldata()
                             groups[1].replace("'", "").replace('"', "").strip()
                         )
-                        pulldata_instances.append(
-                            get_instance_info(element, first_argument)
-                        )
-            return pulldata_instances
-        return None
+                        yield get_instance_info(element, first_argument)
 
     @staticmethod
-    def _generate_from_file_instances(element: SurveyElement) -> InstanceInfo | None:
-        if not isinstance(element, MultipleChoiceQuestion) or element.itemset is None:
+    def _generate_from_file_instances(
+        element: MultipleChoiceQuestion,
+    ) -> InstanceInfo | None:
+        itemset = element.itemset
+        if not itemset:
             return None
-        itemset = element.get("itemset")
         file_id, ext = os.path.splitext(itemset)
         if itemset and ext in EXTERNAL_INSTANCE_EXTENSIONS:
             file_ext = "file" if ext in {".xml", ".geojson"} else f"file-{ext[1:]}"
             uri = f"jr://{file_ext}/{itemset}"
             return InstanceInfo(
                 type="file",
-                context="[type: {t}, name: {n}]".format(
-                    t=element["parent"]["type"], n=element["parent"]["name"]
-                ),
+                context=f"[type: {element.parent.type}, name: {element.parent.name}]",
                 name=file_id,
                 src=uri,
                 instance=node("instance", id=file_id, src=uri),
             )
 
-        return None
-
     @staticmethod
-    def _generate_last_saved_instance(element: SurveyElement) -> bool:
+    def _generate_last_saved_instance(element: Question) -> bool:
         """
         True if a last-saved instance should be generated, false otherwise.
         """
-        if not isinstance(element, Question):
-            return False
         if has_last_saved(element.default):
             return True
         if has_last_saved(element.choice_filter):
@@ -633,49 +588,59 @@ def _generate_instances(self) -> Generator[DetachableElement, None, None]:
         - `select_one_external`: implicitly relies on a `itemsets.csv` file and
           uses XPath-like expressions for querying.
         """
-        instances = []
-        generate_last_saved = False
-        for i in self.iter_descendants():
-            i_ext = self._generate_external_instances(element=i)
-            i_pull = self._generate_pulldata_instances(element=i)
-            i_file = self._generate_from_file_instances(element=i)
-            if not generate_last_saved:
-                generate_last_saved = self._generate_last_saved_instance(element=i)
-            for x in [i_ext, i_pull, i_file]:
-                if x is not None:
-                    instances += x if isinstance(x, list) else [x]
-
-        if generate_last_saved:
-            instances += [self._get_last_saved_instance()]
-
-        # Append last so the choice instance is excluded on a name clash.
-        if self.choices:
-            for name, value in self.choices.items():
-                if name not in self._search_lists:
-                    instances += [
-                        self._generate_static_instances(list_name=name, choice_list=value)
-                    ]
+
+        def get_element_instances():
+            generate_last_saved = False
+            for i in self.iter_descendants():
+                if isinstance(i, Question):
+                    yield from self._generate_pulldata_instances(element=i)
+                    if isinstance(i, MultipleChoiceQuestion):
+                        i_file = self._generate_from_file_instances(element=i)
+                        if i_file:
+                            yield i_file
+                    if not generate_last_saved:
+                        generate_last_saved = self._generate_last_saved_instance(
+                            element=i
+                        )
+                elif isinstance(i, Section):
+                    yield from self._generate_pulldata_instances(element=i)
+                elif isinstance(i, ExternalInstance):
+                    yield self._generate_external_instances(element=i)
+
+            if generate_last_saved:
+                yield self._get_last_saved_instance()
+
+            # Append last so the choice instance is excluded on a name clash.
+            if self.choices:
+                for k, v in self.choices.items():
+                    if not v.used_by_search:
+                        yield self._generate_static_instances(list_name=k, itemset=v)
+
+        instances = tuple(get_element_instances())
 
         # Check that external instances have unique names.
         if instances:
-            ext_only = [x for x in instances if x.type == "external"]
-            self._validate_external_instances(instances=ext_only)
+            self._validate_external_instances(
+                instances=(x for x in instances if x.type == "external")
+            )
 
         seen = {}
         for i in instances:
-            if i.name in seen.keys() and seen[i.name].src != i.src:
-                # Instance id exists with different src URI -> error.
-                msg = (
-                    "The same instance id will be generated for different "
-                    "external instance source URIs. Please check the form."
-                    f" Instance name: '{i.name}', Existing type: '{seen[i.name].type}', "
-                    f"Existing URI: '{seen[i.name].src}', Duplicate type: '{i.type}', "
-                    f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'."
-                )
-                raise PyXFormError(msg)
-            elif i.name in seen.keys() and seen[i.name].src == i.src:
-                # Instance id exists with same src URI -> ok, don't duplicate.
-                continue
+            prior = seen.get(i.name)
+            if prior:
+                if prior.src != i.src:
+                    # Instance id exists with different src URI -> error.
+                    msg = (
+                        "The same instance id will be generated for different "
+                        "external instance source URIs. Please check the form."
+                        f" Instance name: '{i.name}', Existing type: '{prior.type}', "
+                        f"Existing URI: '{prior.src}', Duplicate type: '{i.type}', "
+                        f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'."
+                    )
+                    raise PyXFormError(msg)
+                else:
+                    # Instance id exists with same src URI -> ok, don't duplicate.
+                    continue
             else:
                 # Instance doesn't exist yet -> add it.
                 yield i.instance
@@ -786,7 +751,7 @@ def _add_to_nested_dict(self, dicty, path, value):
             dicty[path[0]] = {}
         self._add_to_nested_dict(dicty[path[0]], path[1:], value)
 
-    def _redirect_is_search_itext(self, element: Question) -> bool:
+    def _redirect_is_search_itext(self, element: MultipleChoiceQuestion) -> bool:
         """
         For selects using the "search()" function, redirect itext for in-line items.
 
@@ -801,29 +766,29 @@ def _redirect_is_search_itext(self, element: Question) -> bool:
         :param element: A select type question.
         :return: If True, the element uses the search function.
         """
+        is_search = False
         try:
-            is_search = bool(
-                SEARCH_FUNCTION_REGEX.search(
-                    element[constants.CONTROL][constants.APPEARANCE]
-                )
-            )
+            appearance = element.control[constants.APPEARANCE]
+            if appearance and len(appearance) > 7:
+                is_search = bool(SEARCH_FUNCTION_REGEX.search(appearance))
         except (KeyError, TypeError):
-            is_search = False
+            pass
         if is_search:
-            file_id, ext = os.path.splitext(element[constants.ITEMSET])
-            if ext in EXTERNAL_INSTANCE_EXTENSIONS:
+            ext = os.path.splitext(element.itemset)[1]
+            if ext and ext in EXTERNAL_INSTANCE_EXTENSIONS:
                 msg = (
-                    f"Question '{element[constants.NAME]}' is a select from file type, "
+                    f"Question '{element.name}' is a select from file type, "
                     "using 'search()'. This combination is not supported. "
                     "Remove the 'search()' usage, or change the select type."
                 )
                 raise PyXFormError(msg)
-            if self.choices:
-                element.children = self.choices.get(element[constants.ITEMSET], None)
-                element[constants.ITEMSET] = ""
-                if element.children is not None:
-                    for i, opt in enumerate(element.children):
-                        opt["_choice_itext_id"] = f"{element[constants.LIST_NAME_U]}-{i}"
+
+            element.itemset = ""
+            itemset = element.choices
+            if not itemset.used_by_search:
+                itemset.used_by_search = True
+                for i, opt in enumerate(itemset.options):
+                    opt._choice_itext_ref = f"jr:itext('{itemset.name}-{i}')"
         return is_search
 
     def _setup_translations(self):
@@ -832,58 +797,35 @@ def _setup_translations(self):
         setup media and itext functions
         """
 
-        def _setup_choice_translations(
-            name, choice_value, itext_id
-        ) -> Generator[tuple[list[str], str], None, None]:
-            for media_or_lang, value in choice_value.items():
-                if isinstance(value, dict):
-                    for language, val in value.items():
-                        yield ([language, itext_id, media_or_lang], val)
-                elif name == constants.MEDIA:
-                    yield ([self.default_language, itext_id, media_or_lang], value)
-                else:
-                    yield ([media_or_lang, itext_id, "long"], value)
+        def get_choice_content(name, idx, choice):
+            itext_id = f"{name}-{idx}"
 
-        itemsets_multi_language = set()
-        itemsets_has_media = set()
-        itemsets_has_dyn_label = set()
+            choice_label = choice.label
+            if choice_label:
+                if isinstance(choice_label, dict):
+                    for lang, value in choice_label.items():
+                        if isinstance(value, dict):
+                            for language, val in value.items():
+                                yield ([language, itext_id, lang], val)
+                        else:
+                            yield ([lang, itext_id, "long"], value)
+                else:
+                    yield ([self.default_language, itext_id, "long"], choice_label)
+
+            choice_media = choice.media
+            if choice_media:
+                for media, value in choice_media.items():
+                    if isinstance(value, dict):
+                        for language, val in value.items():
+                            yield ([language, itext_id, media], val)
+                    else:
+                        yield ([self.default_language, itext_id, media], value)
 
         def get_choices():
-            for list_name, choice_list in self.choices.items():
-                multi_language = False
-                has_media = False
-                dyn_label = False
-                choices = []
-                for idx, choice in enumerate(choice_list):
-                    for col_name, choice_value in choice.items():
-                        lang_choice = None
-                        if not choice_value:
-                            continue
-                        if col_name == constants.MEDIA:
-                            has_media = True
-                            lang_choice = choice_value
-                        elif col_name == constants.LABEL:
-                            if isinstance(choice_value, dict):
-                                lang_choice = choice_value
-                                multi_language = True
-                            else:
-                                lang_choice = {self.default_language: choice_value}
-                                if is_label_dynamic(choice_value):
-                                    dyn_label = True
-                        if lang_choice is not None:
-                            # e.g. (label, {"default": "Yes"}, "consent", 0)
-                            choices.append((col_name, lang_choice, list_name, idx))
-                if multi_language or has_media or dyn_label:
-                    if multi_language:
-                        itemsets_multi_language.add(list_name)
-                    if has_media:
-                        itemsets_has_media.add(list_name)
-                    if dyn_label:
-                        itemsets_has_dyn_label.add(list_name)
-                    for c in choices:
-                        yield from _setup_choice_translations(
-                            c[0], c[1], f"{c[2]}-{c[3]}"
-                        )
+            for name, itemset in self.choices.items():
+                if itemset.requires_itext:
+                    for idx, choice in enumerate(itemset.options):
+                        yield from get_choice_content(name, idx, choice)
 
         if self.choices:
             for path, value in get_choices():
@@ -891,53 +833,40 @@ def get_choices():
                 leaf_value = {last_path: value, constants.TYPE: constants.CHOICE}
                 self._add_to_nested_dict(self._translations, path, leaf_value)
 
-        select_types = set(aliases.select.keys())
         search_lists = set()
         non_search_lists = set()
         for element in self.iter_descendants(
             condition=lambda i: isinstance(i, Question | Section)
         ):
             if isinstance(element, MultipleChoiceQuestion):
-                if element.itemset is not None:
-                    element._itemset_multi_language = (
-                        element.itemset in itemsets_multi_language
-                    )
-                    element._itemset_has_media = element.itemset in itemsets_has_media
-                    element._itemset_dyn_label = element.itemset in itemsets_has_dyn_label
-
-                if element.type in select_types:
-                    select_ref = (element[constants.NAME], element[constants.LIST_NAME_U])
-                    if self._redirect_is_search_itext(element=element):
-                        search_lists.add(select_ref)
-                        self._search_lists.add(element[constants.LIST_NAME_U])
-                    else:
-                        non_search_lists.add(select_ref)
-
-            # Skip creation of translations for choices in selects. The creation of these
-            # translations is done above in this function.
-            parent = element.get("parent")
-            if parent is not None and parent[constants.TYPE] not in select_types:
-                for d in element.get_translations(self.default_language):
-                    translation_path = d["path"]
-                    form = "long"
-
-                    if "guidance_hint" in d["path"]:
-                        translation_path = d["path"].replace("guidance_hint", "hint")
-                        form = "guidance"
-
-                    self._translations[d["lang"]][translation_path] = self._translations[
-                        d["lang"]
-                    ].get(translation_path, {})
-
-                    self._translations[d["lang"]][translation_path].update(
-                        {
-                            form: {
-                                "text": d["text"],
-                                "output_context": d["output_context"],
-                            },
-                            constants.TYPE: constants.QUESTION,
-                        }
-                    )
+                select_ref = (element.name, element.list_name)
+                if self._redirect_is_search_itext(element=element):
+                    search_lists.add(select_ref)
+                else:
+                    non_search_lists.add(select_ref)
+
+            # Create translations questions.
+            for d in element.get_translations(self.default_language):
+                translation_path = d["path"]
+                form = "long"
+
+                if "guidance_hint" in d["path"]:
+                    translation_path = d["path"].replace("guidance_hint", "hint")
+                    form = "guidance"
+
+                self._translations[d["lang"]][translation_path] = self._translations[
+                    d["lang"]
+                ].get(translation_path, {})
+
+                self._translations[d["lang"]][translation_path].update(
+                    {
+                        form: {
+                            "text": d["text"],
+                            "output_context": d["output_context"],
+                        },
+                        constants.TYPE: constants.QUESTION,
+                    }
+                )
 
         for q_name, list_name in search_lists:
             choice_refs = [f"'{q}'" for q, c in non_search_lists if c == list_name]
@@ -962,7 +891,7 @@ def _add_empty_translations(self):
         paths = {}
         for translation in self._translations.values():
             for path, content in translation.items():
-                paths[path] = paths.get(path, set()).union(content.keys())
+                paths[path] = paths.get(path, set()).union(content)
 
         for lang in self._translations:
             for path, content_types in paths.items():
@@ -1128,12 +1057,16 @@ def __unicode__(self):
         return f"<pyxform.survey.Survey instance at {hex(id(self))}>"
 
     def _setup_xpath_dictionary(self):
+        if self._xpath:
+            return
+        xpaths = {}
         for element in self.iter_descendants(lambda i: isinstance(i, Question | Section)):
             element_name = element.name
-            if element_name in self._xpath:
-                self._xpath[element_name] = None
+            if element_name in xpaths:
+                xpaths[element_name] = None
             else:
-                self._xpath[element_name] = element
+                xpaths[element_name] = element
+        self._xpath = xpaths
 
     def _var_repl_function(
         self, matchobj, context, use_current=False, reference_parent=False
@@ -1353,7 +1286,7 @@ def print_xform_to_file(
             warnings.extend(enketo_validate.check_xform(path))
 
         # Warn if one or more translation is missing a valid IANA subtag
-        translations = self._translations.keys()
+        translations = self._translations
         if translations:
             bad_languages = get_languages_with_bad_tags(translations)
             if bad_languages:
diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py
index b6c851b1..509228f8 100644
--- a/pyxform/survey_element.py
+++ b/pyxform/survey_element.py
@@ -35,6 +35,7 @@
 )
 SURVEY_ELEMENT_EXTRA_FIELDS = ("_survey_element_xpath",)
 SURVEY_ELEMENT_SLOTS = (*SURVEY_ELEMENT_FIELDS, *SURVEY_ELEMENT_EXTRA_FIELDS)
+_SURVEY_ELEMENT_FIELDS_SET = set(SURVEY_ELEMENT_FIELDS)
 
 
 class SurveyElement(Mapping):
@@ -58,6 +59,9 @@ def get_slot_names() -> tuple[str, ...]:
         """Each subclass must provide a list of slots from itself and all parents."""
         return SURVEY_ELEMENT_SLOTS
 
+    def __bool__(self):
+        return True
+
     def __len__(self):
         return len(self.get_slot_names())
 
@@ -93,7 +97,7 @@ def __init__(
 
         if fields is not None:
             for key in fields:
-                if key not in SURVEY_ELEMENT_FIELDS:
+                if key not in _SURVEY_ELEMENT_FIELDS_SET:
                     value = kwargs.pop(key, None)
                     if value or not hasattr(self, key):
                         self[key] = value
@@ -295,10 +299,18 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
             ]
         choices = result.pop("choices", None)
         if choices:
-            result["choices"] = {
-                list_name: [o.to_json_dict(delete_keys=("parent",)) for o in options]
-                for list_name, options in choices.items()
-            }
+            if isinstance(choices, dict):
+                result["choices"] = {
+                    list_name: [
+                        o.to_json_dict(delete_keys=("parent",)) for o in itemset.options
+                    ]
+                    for list_name, itemset in choices.items()
+                }
+            else:
+                result["children"] = [
+                    o.to_json_dict(delete_keys=("parent",)) for o in choices.options
+                ]
+
         # Translation items with "output_context" have circular references.
         if "_translations" in result:
             for lang in result["_translations"].values():
diff --git a/pyxform/utils.py b/pyxform/utils.py
index e42445a6..f5562ba5 100644
--- a/pyxform/utils.py
+++ b/pyxform/utils.py
@@ -11,7 +11,6 @@
 from io import StringIO
 from itertools import chain
 from json.decoder import JSONDecodeError
-from typing import Any
 from xml.dom import Node
 from xml.dom.minidom import Element, Text, _write_data
 
@@ -20,15 +19,15 @@
 from pyxform import constants as const
 from pyxform.errors import PyXFormError
 from pyxform.parsing.expression import parse_expression
+from pyxform.xls2json_backends import DefinitionData
 
-SEP = "_"
+BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}")
 INVALID_XFORM_TAG_REGEXP = re.compile(r"[^a-zA-Z:_][^a-zA-Z:_0-9\-.]*")
 LAST_SAVED_INSTANCE_NAME = "__last-saved"
-BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}")
-PYXFORM_REFERENCE_REGEX = re.compile(r"\$\{(.*?)\}")
 NODE_TYPE_TEXT = {Node.TEXT_NODE, Node.CDATA_SECTION_NODE}
+PYXFORM_REFERENCE_REGEX = re.compile(r"\$\{(.*?)\}")
+SPACE_TRANS_TABLE = str.maketrans({" ": "_"})
 XML_TEXT_SUBS = {"&": "&amp;", "<": "&lt;", ">": "&gt;"}
-XML_TEXT_SUBS_KEYS = set(XML_TEXT_SUBS)
 XML_TEXT_TABLE = str.maketrans(XML_TEXT_SUBS)
 
 
@@ -44,7 +43,7 @@ class DetachableElement(Element):
     """
 
     def __init__(self, *args, **kwargs):
-        Element.__init__(self, *args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.ownerDocument = None
 
     def writexml(self, writer, indent="", addindent="", newl=""):
@@ -82,7 +81,7 @@ def writexml(self, writer, indent="", addindent="", newl=""):
 
 @lru_cache(maxsize=64)
 def escape_text_for_xml(text: str) -> str:
-    if any(c in set(text) for c in XML_TEXT_SUBS_KEYS):
+    if any(c in set(text) for c in XML_TEXT_SUBS):
         return text.translate(XML_TEXT_TABLE)
     else:
         return text
@@ -171,7 +170,7 @@ def flatten(li):
 
 
 def external_choices_to_csv(
-    workbook_dict: dict[str, Any], warnings: list | None = None
+    workbook_dict: DefinitionData, warnings: list | None = None
 ) -> str | None:
     """
     Convert the 'external_choices' sheet data to CSV.
@@ -180,7 +179,7 @@ def external_choices_to_csv(
     :param warnings: The conversions warnings list.
     """
     warnings = coalesce(warnings, [])
-    if const.EXTERNAL_CHOICES not in workbook_dict:
+    if not workbook_dict.external_choices:
         warnings.append(
             f"Could not export itemsets.csv, the '{const.EXTERNAL_CHOICES}' sheet is missing."
         )
@@ -189,11 +188,11 @@ def external_choices_to_csv(
     itemsets = StringIO(newline="")
     csv_writer = csv.writer(itemsets, quoting=csv.QUOTE_ALL)
     try:
-        header = workbook_dict["external_choices_header"][0]
+        header = workbook_dict.external_choices_header[0]
     except (IndexError, KeyError, TypeError):
-        header = {k for d in workbook_dict[const.EXTERNAL_CHOICES] for k in d}
+        header = {k for d in workbook_dict.external_choices for k in d}
     csv_writer.writerow(header)
-    for row in workbook_dict[const.EXTERNAL_CHOICES]:
+    for row in workbook_dict.external_choices:
         csv_writer.writerow(row.values())
     return itemsets.getvalue()
 
@@ -227,13 +226,13 @@ def default_is_dynamic(element_default, element_type=None):
     * Contains arithmetic operator, including 'div' and 'mod' (except '-' for 'date' type).
     * Contains brackets, parentheses or braces.
     """
-    if not isinstance(element_default, str):
+    if not element_default or not isinstance(element_default, str):
         return False
 
     tokens, _ = parse_expression(element_default)
     for t in tokens:
         # Data types which are likely to have non-dynamic defaults containing a hyphen.
-        if element_type in ("date", "dateTime", "geopoint", "geotrace", "geoshape"):
+        if element_type in {"date", "dateTime", "geopoint", "geotrace", "geoshape"}:
             # Nested to avoid extra string comparisons if not a relevant data type.
             if t.name == "OPS_MATH" and t.value == "-":
                 return False
@@ -252,23 +251,6 @@ def default_is_dynamic(element_default, element_type=None):
     return False
 
 
-def has_dynamic_label(choice_list: "list[dict[str, str]]") -> bool:
-    """
-    If the first or second choice label includes a reference, we must use itext.
-
-    Check the first two choices in case first is something like "Other".
-    """
-    for c in choice_list[:2]:
-        choice_label = c.get("label")
-        if (
-            choice_label is not None
-            and isinstance(choice_label, str)
-            and re.search(BRACKETED_TAG_REGEX, choice_label) is not None
-        ):
-            return True
-    return False
-
-
 def levenshtein_distance(a: str, b: str) -> int:
     """
     Calculate Levenshtein distance between two strings.
diff --git a/pyxform/validators/pyxform/choices.py b/pyxform/validators/pyxform/choices.py
index 3b347c35..3e2ab55c 100644
--- a/pyxform/validators/pyxform/choices.py
+++ b/pyxform/validators/pyxform/choices.py
@@ -26,7 +26,7 @@
 
 def validate_headers(
     headers: tuple[tuple[str, ...], ...], warnings: list[str]
-) -> list[str]:
+) -> tuple[str, ...]:
     def check():
         for header in headers:
             header = header[0]
@@ -34,7 +34,7 @@ def check():
                 warnings.append(INVALID_HEADER.format(column=header))
                 yield header
 
-    return list(check())
+    return tuple(check())
 
 
 def validate_choice_list(
@@ -43,28 +43,38 @@ def validate_choice_list(
     seen_options = set()
     duplicate_errors = []
     for option in options:
-        if "name" not in option:
+        if constants.NAME not in option:
             raise PyXFormError(INVALID_NAME.format(row=option["__row"]))
-        elif "label" not in option:
+        elif constants.LABEL not in option:
             warnings.append(INVALID_LABEL.format(row=option["__row"]))
 
         if not allow_duplicates:
-            name = option["name"]
+            name = option[constants.NAME]
             if name in seen_options:
                 duplicate_errors.append(INVALID_DUPLICATE.format(row=option["__row"]))
             else:
                 seen_options.add(name)
 
-    if 0 < len(duplicate_errors):
+    if duplicate_errors:
         raise PyXFormError("\n".join(duplicate_errors))
 
 
-def validate_choices(
+def validate_and_clean_choices(
     choices: dict[str, list[dict]],
     warnings: list[str],
     headers: tuple[tuple[str, ...], ...],
     allow_duplicates: bool = False,
-):
+) -> dict[str, list[dict]]:
+    """
+    Warn about invalid or duplicate choices, and remove choices with invalid headers.
+
+    Choices columns are output as XML elements so they must be valid XML tags.
+
+    :param choices: Choices data from the XLSForm.
+    :param warnings: Warnings list.
+    :param headers: choices data headers i.e. unique dict keys.
+    :param allow_duplicates: If True, duplicate choice names are allowed in the XLSForm.
+    """
     invalid_headers = validate_headers(headers, warnings)
     for options in choices.values():
         validate_choice_list(
@@ -75,4 +85,5 @@ def validate_choices(
         for option in options:
             for invalid_header in invalid_headers:
                 option.pop(invalid_header, None)
-            del option["__row"]
+            option.pop("__row", None)
+    return choices
diff --git a/pyxform/validators/pyxform/parameters_generic.py b/pyxform/validators/pyxform/parameters_generic.py
index cd95843d..a0524d9a 100644
--- a/pyxform/validators/pyxform/parameters_generic.py
+++ b/pyxform/validators/pyxform/parameters_generic.py
@@ -37,7 +37,7 @@ def validate(
     """
     Raise an error if 'parameters' includes any keys not named in 'allowed'.
     """
-    extras = set(parameters.keys()) - (set(allowed))
+    extras = set(parameters) - (set(allowed))
     if 0 < len(extras):
         msg = (
             "Accepted parameters are '{a}'. "
diff --git a/pyxform/validators/pyxform/pyxform_reference.py b/pyxform/validators/pyxform/pyxform_reference.py
index a1b02783..45c6c60a 100644
--- a/pyxform/validators/pyxform/pyxform_reference.py
+++ b/pyxform/validators/pyxform/pyxform_reference.py
@@ -38,7 +38,7 @@ def validate_pyxform_reference_syntax(
                 continue
             elif t.name == "PYXFORM_REF_END":
                 start_token = None
-            elif t.name in ("PYXFORM_REF_START", "PYXFORM_REF"):
+            elif t.name in {"PYXFORM_REF_START", "PYXFORM_REF"}:
                 msg = PYXFORM_REFERENCE_INVALID.format(
                     sheet=sheet_name, row_number=row_number, column=key
                 )
diff --git a/pyxform/validators/pyxform/sheet_misspellings.py b/pyxform/validators/pyxform/sheet_misspellings.py
index c83fef31..06e3851b 100644
--- a/pyxform/validators/pyxform/sheet_misspellings.py
+++ b/pyxform/validators/pyxform/sheet_misspellings.py
@@ -1,10 +1,10 @@
-from collections.abc import KeysView
+from collections.abc import Iterable
 
 from pyxform import constants
 from pyxform.utils import levenshtein_distance
 
 
-def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None":
+def find_sheet_misspellings(key: str, keys: Iterable) -> "str | None":
     """
     Find possible sheet name misspellings to warn the user about.
 
@@ -15,6 +15,8 @@ def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None":
     :param key: The sheet name to look for.
     :param keys: The workbook sheet names.
     """
+    if not keys:
+        return None
     candidates = tuple(
         _k  # thanks to black
         for _k in keys
diff --git a/pyxform/validators/pyxform/translations_checks.py b/pyxform/validators/pyxform/translations_checks.py
index b74d2b36..043b6742 100644
--- a/pyxform/validators/pyxform/translations_checks.py
+++ b/pyxform/validators/pyxform/translations_checks.py
@@ -33,7 +33,7 @@ def format_missing_translations_msg(
 
     def get_sheet_msg(name, sheet):
         if sheet is not None:
-            langs = sorted(sheet.keys())
+            langs = sorted(sheet)
             if 0 < len(langs):
                 lang_msgs = []
                 for lang in langs:
@@ -94,8 +94,10 @@ def _find_translations(
         self, sheet_data: "SheetData", translatable_columns: dict[str, str]
     ):
         def process_header(head):
-            if head[0] in translatable_columns.keys():
+            if head[0] in translatable_columns:
                 name = translatable_columns[head[0]]
+                if isinstance(name, tuple):
+                    name = head[0]
                 if len(head) == 1:
                     self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name)
                 elif len(head) == 2:
@@ -103,7 +105,7 @@ def process_header(head):
                 self.columns_seen.add(name)
 
         for header in sheet_data:
-            if 1 < len(header) and header[0] in (const.MEDIA, const.BIND):
+            if 1 < len(header) and header[0] in {const.MEDIA, const.BIND}:
                 process_header(head=header[1:])
             else:
                 process_header(head=header)
diff --git a/pyxform/validators/updater.py b/pyxform/validators/updater.py
index e47cbb2c..bd5df1b3 100644
--- a/pyxform/validators/updater.py
+++ b/pyxform/validators/updater.py
@@ -302,9 +302,9 @@ def _unzip_find_jobs(open_zip_file, bin_paths, out_path):
                         if maybe_existing_match.CRC == zip_item.CRC:
                             continue
                     zip_jobs[file_out_path] = zip_item
-        if len(bin_paths) != len(zip_jobs.keys()):
+        if len(bin_paths) != len(zip_jobs):
             raise PyXFormError(
-                f"Expected {len(bin_paths)} zip job files, found: {len(zip_jobs.keys())}"
+                f"Expected {len(bin_paths)} zip job files, found: {len(zip_jobs)}"
             )
         return zip_jobs
 
diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py
index 6d64ded3..f5cc62b3 100644
--- a/pyxform/xls2json.py
+++ b/pyxform/xls2json.py
@@ -6,7 +6,7 @@
 import os
 import re
 import sys
-from itertools import chain
+from collections.abc import Sequence
 from typing import IO, Any
 
 from pyxform import aliases, constants
@@ -22,19 +22,44 @@
 )
 from pyxform.errors import PyXFormError
 from pyxform.parsing.expression import is_pyxform_reference, is_xml_tag
-from pyxform.utils import PYXFORM_REFERENCE_REGEX, coalesce, default_is_dynamic
-from pyxform.validators.pyxform import choices as vc
+from pyxform.parsing.sheet_headers import dealias_and_group_headers
+from pyxform.utils import (
+    PYXFORM_REFERENCE_REGEX,
+    coalesce,
+    default_is_dynamic,
+)
 from pyxform.validators.pyxform import parameters_generic, select_from_file
 from pyxform.validators.pyxform import question_types as qt
 from pyxform.validators.pyxform.android_package_name import validate_android_package_name
+from pyxform.validators.pyxform.choices import validate_and_clean_choices
 from pyxform.validators.pyxform.pyxform_reference import validate_pyxform_reference_syntax
 from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings
 from pyxform.validators.pyxform.translations_checks import SheetTranslations
-from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict
+from pyxform.xls2json_backends import (
+    RE_WHITESPACE,
+    DefinitionData,
+    get_xlsform,
+)
 
 SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
 RE_SMART_QUOTES = re.compile(r"|".join(re.escape(old) for old in SMART_QUOTES))
-RE_WHITESPACE = re.compile(r"( )+")
+RE_BEGIN_CONTROL = re.compile(
+    r"^(?P<begin>begin)(\s|_)(?P<type>("
+    + "|".join(aliases.control)
+    + r"))( (over )?(?P<list_name>\S+))?$"
+)
+RE_END_CONTROL = re.compile(
+    r"^(?P<end>end)(\s|_)(?P<type>(" + "|".join(aliases.control) + r"))$"
+)
+RE_SELECT = re.compile(
+    r"^(?P<select_command>("
+    + "|".join(aliases.select)
+    + r")) (?P<list_name>\S+)"
+    + "( (?P<specify_other>(or specify other|or_other|or other)))?$"
+)
+RE_OSM = re.compile(
+    r"(?P<osm_command>(" + "|".join(aliases.osm) + r")) (?P<list_name>\S+)"
+)
 
 
 def print_pyobj_to_json(pyobj, path=None):
@@ -49,120 +74,6 @@ def print_pyobj_to_json(pyobj, path=None):
         sys.stdout.write(json.dumps(pyobj, ensure_ascii=False, indent=4))
 
 
-def merge_dicts(dict_a, dict_b, default_key="default"):
-    """
-    Recursively merge two nested dicts into a single dict.
-    When keys match their values are merged using
-    a recursive call to this function,
-    otherwise they are just added to the output dict.
-    """
-    if not dict_a:
-        return dict_b
-    if not dict_b:
-        return dict_a
-
-    if not isinstance(dict_a, dict):
-        if default_key in dict_b:
-            return dict_b
-        dict_a = {default_key: dict_a}
-    if not isinstance(dict_b, dict):
-        if default_key in dict_a:
-            return dict_a
-        dict_b = {default_key: dict_b}
-
-    # Union keys but retain order (as opposed to set()), preferencing dict_a then dict_b.
-    # E.g. {"a": 1, "b": 2} + {"c": 3, "a": 4} -> {"a": None, "b": None, "c": None}
-    all_keys = {k: None for k in (chain(dict_a.keys(), dict_b.keys()))}
-
-    out_dict = {}
-    for key in all_keys.keys():
-        out_dict[key] = merge_dicts(dict_a.get(key), dict_b.get(key), default_key)
-    return out_dict
-
-
-def list_to_nested_dict(lst):
-    """
-    [1,2,3,4] -> {1:{2:{3:4}}}
-    """
-    if len(lst) > 1:
-        return {lst[0]: list_to_nested_dict(lst[1:])}
-    else:
-        return lst[0]
-
-
-class DealiasAndGroupHeadersResult:
-    __slots__ = ("headers", "data")
-
-    def __init__(self, headers: tuple[tuple[str, ...], ...], data: list[dict]):
-        """
-        :param headers: Distinct headers seen in the sheet, parsed / split if applicable.
-        :param data: Sheet data rows, in grouped dict format.
-        """
-        self.headers: tuple[tuple[str, ...], ...] = headers
-        self.data: list[dict] = data
-
-
-def dealias_and_group_headers(
-    dict_array: list[dict],
-    header_aliases: dict[str, str],
-    use_double_colons: bool,
-    default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
-    ignore_case: bool = False,
-) -> DealiasAndGroupHeadersResult:
-    """
-    For each row in the worksheet, group all keys that contain a double colon.
-    So
-        {"text::english": "hello", "text::french" : "bonjour"}
-    becomes
-        {"text": {"english": "hello", "french" : "bonjour"}.
-    Dealiasing is done to the first token
-    (the first term separated by the delimiter).
-    default_language -- used to group labels/hints/etc
-    without a language specified with localized versions.
-    """
-    group_delimiter = "::"
-    out_dict_array = []
-    seen_headers = {}
-    for row in dict_array:
-        out_row = {}
-        for header, val in row.items():
-            if ignore_case:
-                header = header.lower()
-
-            if use_double_colons:
-                tokens = [t.strip() for t in header.split(group_delimiter)]
-
-            # else:
-            #   We do the initial parse using single colons
-            #   for backwards compatibility and
-            #   only the first single is used
-            #   in order to avoid nesting jr:something tokens.
-            #   if len(tokens) > 1:
-            #       tokens[1:] = [u":".join(tokens[1:])]
-            else:
-                # I think the commented out section above
-                # break if there is something like media:image:english
-                # so maybe a better backwards compatibility hack
-                # is to join any jr token with the next token
-                tokens = [t.strip() for t in header.split(":")]
-                if "jr" in tokens:
-                    jr_idx = tokens.index("jr")
-                    tokens[jr_idx] = ":".join(tokens[jr_idx : jr_idx + 2])
-                    tokens.pop(jr_idx + 1)
-
-            dealiased_first_token = header_aliases.get(tokens[0], tokens[0])
-            tokens = dealiased_first_token.split(group_delimiter) + tokens[1:]
-            new_key = tokens[0]
-            new_value = list_to_nested_dict(tokens[1:] + [val])
-            out_row = merge_dicts(out_row, {new_key: new_value}, default_language)
-            seen_headers[tuple(tokens)] = None
-
-        out_dict_array.append(out_row)
-    return DealiasAndGroupHeadersResult(
-        headers=tuple(seen_headers.keys()), data=out_dict_array
-    )
-
-
 def dealias_types(dict_array):
     """
     Look at all the type values in a dict array and if any aliases are found,
@@ -170,24 +81,24 @@ def dealias_types(dict_array):
     """
     for row in dict_array:
         found_type = row.get(constants.TYPE)
-        if found_type in aliases._type_alias_map.keys():
+        if found_type in aliases._type_alias_map:
             row[constants.TYPE] = aliases._type_alias_map[found_type]
     return dict_array
 
 
 def clean_text_values(
     sheet_name: str,
-    data: list[dict],
+    data: Sequence[dict],
     strip_whitespace: bool = False,
     add_row_number: bool = False,
-) -> list[dict]:
+) -> Sequence[dict]:
     """
     Go though the dict array and strips all text values.
     Also replaces multiple spaces with single spaces.
     """
     for row_number, row in enumerate(data, start=2):
         for key, value in row.items():
-            if isinstance(value, str):
+            if isinstance(value, str) and value:
                 # Remove extraneous whitespace characters.
                 if strip_whitespace:
                     value = RE_WHITESPACE.sub(" ", value.strip())
@@ -223,21 +134,6 @@ def group_dictionaries_by_key(list_of_dicts, key, remove_key=True):
     return dict_of_lists
 
 
-def has_double_colon(workbook_dict) -> bool:
-    """
-    Look for a column header with a doublecolon (::) and
-    return true if one is found.
-    """
-    for sheet in workbook_dict.values():
-        for row in sheet:
-            for column_header in row.keys():
-                if not isinstance(column_header, str):
-                    continue
-                if "::" in column_header:
-                    return True
-    return False
-
-
 def add_flat_annotations(prompt_list, parent_relevant="", name_prefix=""):
     """
     This is a helper function for generating flat instances
@@ -373,7 +269,7 @@ def add_choices_info_to_question(
 
 
 def workbook_to_json(
-    workbook_dict,
+    workbook_dict: DefinitionData,
     form_name: str | None = None,
     fallback_form_name: str | None = None,
     default_language: str | None = None,
@@ -398,88 +294,75 @@ def workbook_to_json(
     json form spec.
     """
     warnings = coalesce(warnings, [])
-    is_valid = False
-    # Sheet names should be case-insensitive
-    workbook_dict = {x.lower(): y for x, y in workbook_dict.items()}
-    workbook_keys = workbook_dict.keys()
-    if constants.SURVEY not in workbook_dict:
+    sheet_names = workbook_dict.sheet_names
+    if not workbook_dict.survey and not workbook_dict.survey_header:
         msg = f"You must have a sheet named '{constants.SURVEY}'. "
-        similar = find_sheet_misspellings(key=constants.SURVEY, keys=workbook_keys)
+        similar = find_sheet_misspellings(key=constants.SURVEY, keys=sheet_names)
         if similar is not None:
             msg += similar
         raise PyXFormError(msg)
 
-    # ensure required headers are present
-    for row in workbook_dict.get(constants.SURVEY, []):
-        is_valid = "type" in [z.lower() for z in row]
-        if is_valid:
-            break
-    if not is_valid:
-        # TODO - could we state what headers are missing?
-        raise PyXFormError(
-            "The survey sheet is either empty or missing important column headers."
-        )
-
     # Make sure the passed in vars are unicode
     form_name = str(coalesce(form_name, constants.DEFAULT_FORM_NAME))
     default_language = str(coalesce(default_language, constants.DEFAULT_LANGUAGE_VALUE))
 
-    # We check for double columns to determine whether to use them
-    # or single colons to delimit grouped headers.
-    # Single colons are bad because they conflict with with the xform namespace
-    # syntax (i.e. jr:constraintMsg),
-    # so we only use them if we have to for backwards compatibility.
-    use_double_colons = has_double_colon(workbook_dict)
-
     # Break the spreadsheet dict into easier to access objects
     # (settings, choices, survey_sheet):
+
     # ########## Settings sheet ##########
-    k = constants.SETTINGS
-    if k not in workbook_dict:
-        similar = find_sheet_misspellings(key=k, keys=workbook_keys)
+    settings = {}
+    if workbook_dict.settings:
+        settings_sheet_headers = workbook_dict.settings_header or []
+        settings_sheet = workbook_dict.settings or []
+        try:
+            if (
+                sum(
+                    [
+                        element in {constants.ID_STRING, "form_id"}
+                        for element in settings_sheet_headers[0]
+                    ]
+                )
+                == 2
+            ):
+                settings_sheet_headers[0].pop(constants.ID_STRING, None)
+                settings_sheet[0].pop(constants.ID_STRING, None)
+                warnings.append(
+                    "The form_id and id_string column headers are both"
+                    " specified in the settings sheet provided."
+                    " This may cause errors during conversion."
+                    " In future, its best to avoid specifying both"
+                    " column headers in the settings sheet."
+                )
+        except IndexError:  # In case there is no settings sheet
+            pass
+
+        from pyxform.survey import Survey
+
+        settings_sheet = dealias_and_group_headers(
+            sheet_name=constants.SETTINGS,
+            sheet_data=settings_sheet,
+            sheet_header=settings_sheet_headers,
+            header_aliases=aliases.settings_header,
+            header_columns=set(Survey.get_slot_names()),
+        )
+        settings = clean_text_values(
+            sheet_name=constants.SETTINGS, data=[settings_sheet.data[0]]
+        )[0]
+    else:
+        similar = find_sheet_misspellings(key=constants.SETTINGS, keys=sheet_names)
         if similar is not None:
             warnings.append(similar + _MSG_SUPPRESS_SPELLING)
-    settings_sheet_headers = workbook_dict.get(constants.SETTINGS, [])
-    try:
-        if (
-            sum(
-                [
-                    element in [constants.ID_STRING, "form_id"]
-                    for element in settings_sheet_headers[0].keys()
-                ]
-            )
-            == 2
-        ):
-            settings_sheet_headers[0].pop(constants.ID_STRING, None)
-            warnings.append(
-                "The form_id and id_sting column headers are both"
-                " specified in the settings sheet provided."
-                " This may cause errors during conversion."
-                " In future, its best to avoid specifying both"
-                " column headers in the settings sheet."
-            )
-    except IndexError:  # In case there is no settings sheet
-        settings_sheet_headers = []
 
-    settings_sheet = dealias_and_group_headers(
-        dict_array=settings_sheet_headers,
-        header_aliases=aliases.settings_header,
-        use_double_colons=use_double_colons,
-    )
-    settings = settings_sheet.data[0] if len(settings_sheet.data) > 0 else {}
-    settings = clean_text_values(sheet_name=constants.SETTINGS, data=[settings])[0]
     clean_text_values_enabled = aliases.yes_no.get(
-        settings.get("clean_text_values", "true()")
+        settings.get("clean_text_values", "yes"), True
     )
-
     default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language)
-
     # add_none_option is a boolean that when true,
     # indicates a none option should automatically be added to selects.
     # It should probably be deprecated but I haven't checked yet.
     if "add_none_option" in settings:
         settings["add_none_option"] = aliases.yes_no.get(
-            settings["add_none_option"], False
+            settings.get("add_none_option", "no"), False
         )
 
     # Here we create our json dict root with default settings:
@@ -502,104 +385,144 @@ def workbook_to_json(
     }
     # Here the default settings are overridden by those in the settings sheet
     json_dict.update(settings)
+    from pyxform.question import Option
+
+    option_fields = set(Option.get_slot_names())
 
     # ########## External Choices sheet ##########
-    external_choices_sheet = workbook_dict.get(constants.EXTERNAL_CHOICES, [])
-    external_choices_sheet = clean_text_values(
-        sheet_name=constants.EXTERNAL_CHOICES, data=external_choices_sheet
-    )
-    external_choices_sheet = dealias_and_group_headers(
-        dict_array=external_choices_sheet,
-        header_aliases=aliases.list_header,
-        use_double_colons=use_double_colons,
-        default_language=default_language,
-    )
-    external_choices = group_dictionaries_by_key(
-        list_of_dicts=external_choices_sheet.data, key=constants.LIST_NAME_S
-    )
+    external_choices = workbook_dict.external_choices
+    if external_choices:
+        external_choices = clean_text_values(
+            sheet_name=constants.EXTERNAL_CHOICES, data=external_choices
+        )
+        external_choices = dealias_and_group_headers(
+            sheet_name=constants.EXTERNAL_CHOICES,
+            sheet_data=external_choices,
+            sheet_header=workbook_dict.external_choices_header,
+            header_aliases=aliases.list_header,
+            header_columns=option_fields,
+            default_language=default_language,
+        )
+        external_choices = group_dictionaries_by_key(
+            list_of_dicts=external_choices.data, key=constants.LIST_NAME_S
+        )
 
     # ########## Choices sheet ##########
-    choices_sheet = workbook_dict.get(constants.CHOICES, [])
-    choices_sheet = clean_text_values(
-        sheet_name=constants.CHOICES,
-        data=choices_sheet,
-        add_row_number=True,
-    )
-    choices_sheet = dealias_and_group_headers(
-        dict_array=choices_sheet,
-        header_aliases=aliases.list_header,
-        use_double_colons=use_double_colons,
-        default_language=default_language,
-    )
-    choices = group_dictionaries_by_key(
-        list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S
-    )
-    # To combine the warning into one message, the check for missing choices translation
-    # columns is run with Survey sheet below.
-
-    # Warn and remove invalid headers in case the form uses headers for notes.
-    allow_duplicates = aliases.yes_no.get(
-        settings.get("allow_choice_duplicates", False), False
-    )
-    vc.validate_choices(
-        choices=choices,
-        warnings=warnings,
-        headers=choices_sheet.headers,
-        allow_duplicates=allow_duplicates,
-    )
-
-    if 0 < len(choices):
-        json_dict[constants.CHOICES] = choices
+    choices_sheet = workbook_dict.choices
+    choices = {}
+    if choices_sheet:
+        if clean_text_values_enabled:
+            choices_sheet = clean_text_values(
+                sheet_name=constants.CHOICES,
+                data=choices_sheet,
+                add_row_number=True,
+            )
+        choices_sheet = dealias_and_group_headers(
+            sheet_name=constants.CHOICES,
+            sheet_data=choices_sheet,
+            sheet_header=workbook_dict.choices_header,
+            header_aliases=aliases.list_header,
+            header_columns=option_fields,
+            headers_required={constants.NAME},
+            default_language=default_language,
+        )
+        choices = group_dictionaries_by_key(
+            list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S
+        )
+        # To combine the warning into one message, the check for missing choices translation
+        # columns is run with Survey sheet below.
+
+        # Warn and remove invalid headers in case the form uses headers for notes.
+        choices = validate_and_clean_choices(
+            choices=choices,
+            warnings=warnings,
+            headers=choices_sheet.headers,
+            allow_duplicates=aliases.yes_no.get(
+                settings.get("allow_choice_duplicates", "no"), False
+            ),
+        )
+        if choices:
+            json_dict[constants.CHOICES] = choices
 
     # ########## Entities sheet ###########
-    entities_sheet = workbook_dict.get(constants.ENTITIES, [])
-    entities_sheet = clean_text_values(sheet_name=constants.ENTITIES, data=entities_sheet)
-    entities_sheet = dealias_and_group_headers(
-        dict_array=entities_sheet,
-        header_aliases=aliases.entities_header,
-        use_double_colons=False,
-    )
-    entity_declaration = get_entity_declaration(
-        entities_sheet=entities_sheet.data, workbook_dict=workbook_dict, warnings=warnings
-    )
+    entity_declaration = None
+    if workbook_dict.entities:
+        entities_sheet = clean_text_values(
+            sheet_name=constants.ENTITIES, data=workbook_dict.entities
+        )
+        from pyxform.entities.entity_declaration import EntityDeclaration
+
+        entities_sheet = dealias_and_group_headers(
+            sheet_name=constants.ENTITIES,
+            sheet_data=entities_sheet,
+            sheet_header=workbook_dict.entities_header,
+            header_aliases=aliases.entities_header,
+            # Entities treat some actual columns as if they are parameters.
+            header_columns={
+                *EntityDeclaration.get_slot_names(),
+                *(i.value for i in constants.EntityColumns.value_list()),
+            },
+        )
+        entity_declaration = get_entity_declaration(entities_sheet=entities_sheet.data)
+    else:
+        similar = find_sheet_misspellings(key=constants.ENTITIES, keys=sheet_names)
+        if similar is not None:
+            warnings.append(similar + constants._MSG_SUPPRESS_SPELLING)
 
     # ########## Survey sheet ###########
-    survey_sheet = workbook_dict[constants.SURVEY]
+    survey_sheet = workbook_dict.survey
     # Process the headers:
     if clean_text_values_enabled:
         survey_sheet = clean_text_values(
-            sheet_name=constants.SURVEY, data=survey_sheet, strip_whitespace=True
+            sheet_name=constants.SURVEY, data=workbook_dict.survey, strip_whitespace=True
         )
+    from pyxform.question import MultipleChoiceQuestion
+
     survey_sheet = dealias_and_group_headers(
-        dict_array=survey_sheet,
+        sheet_name=constants.SURVEY,
+        sheet_data=survey_sheet,
+        sheet_header=workbook_dict.survey_header,
         header_aliases=aliases.survey_header,
-        use_double_colons=use_double_colons,
+        header_columns=set(MultipleChoiceQuestion.get_slot_names()),
+        headers_required={constants.TYPE},
         default_language=default_language,
     )
     survey_sheet.data = dealias_types(dict_array=survey_sheet.data)
 
     # Check for missing translations. The choices sheet is checked here so that the
     # warning can be combined into one message.
+    if not choices_sheet:
+        choices_headers = ()
+    else:
+        choices_headers = choices_sheet.headers
     sheet_translations = SheetTranslations(
         survey_sheet=survey_sheet.headers,
-        choices_sheet=choices_sheet.headers,
+        choices_sheet=choices_headers,
     )
     sheet_translations.missing_check(warnings=warnings)
 
+    # ########## OSM sheet ###########
     # No spell check for OSM sheet (infrequently used, many spurious matches).
-    osm_sheet = dealias_and_group_headers(
-        dict_array=workbook_dict.get(constants.OSM, []),
-        header_aliases=aliases.list_header,
-        use_double_colons=True,
-    )
-    osm_tags = group_dictionaries_by_key(
-        list_of_dicts=osm_sheet.data, key=constants.LIST_NAME_S
-    )
+    osm_tags = None
+    if workbook_dict.osm:
+        osm_sheet = dealias_and_group_headers(
+            sheet_data=workbook_dict.osm,
+            sheet_name=constants.OSM,
+            sheet_header=workbook_dict.osm_header,
+            header_aliases=aliases.list_header,
+            header_columns=option_fields,
+        )
+        osm_tags = group_dictionaries_by_key(
+            list_of_dicts=osm_sheet.data, key=constants.LIST_NAME_S
+        )
+
+    # Clear references to original data for garbage collection.
+    del workbook_dict
     # #################################
 
     # Parse the survey sheet while generating a survey in our json format:
     # A stack is used to keep track of begin/end expressions
-    stack = [
+    stack: list[dict[str, Any]] = [
         {
             "control_type": None,
             "control_name": None,
@@ -610,28 +533,8 @@ def workbook_to_json(
     # this will be set to the name of the list
     table_list = None
 
-    # For efficiency we compile all the regular expressions
-    # that will be used to parse types:
-    end_control_regex = re.compile(
-        r"^(?P<end>end)(\s|_)(?P<type>(" + "|".join(aliases.control.keys()) + r"))$"
-    )
-    begin_control_regex = re.compile(
-        r"^(?P<begin>begin)(\s|_)(?P<type>("
-        + "|".join(aliases.control.keys())
-        + r"))( (over )?(?P<list_name>\S+))?$"
-    )
-    select_regexp = re.compile(
-        r"^(?P<select_command>("
-        + "|".join(aliases.select.keys())
-        + r")) (?P<list_name>\S+)"
-        + "( (?P<specify_other>(or specify other|or_other|or other)))?$"
-    )
-    osm_regexp = re.compile(
-        r"(?P<osm_command>(" + "|".join(aliases.osm.keys()) + r")) (?P<list_name>\S+)"
-    )
-
     # Rows from the survey sheet that should be nested in meta
-    survey_meta = []
+    meta_children = []
     # To check that questions with triggers refer to other questions that exist.
     question_names = set()
     trigger_references = []
@@ -657,7 +560,7 @@ def workbook_to_json(
                 continue
 
         # skip empty rows
-        if len(row) == 0:
+        if not row:
             continue
 
         # Get question type
@@ -683,7 +586,7 @@ def workbook_to_json(
         # Pull out questions that will go in meta block
         if question_type == "audit":
             # Force audit name to always be "audit" to follow XForms spec
-            if "name" in row and row["name"] not in [None, "", "audit"]:
+            if "name" in row and row["name"] not in {None, "", "audit"}:
                 raise PyXFormError(
                     ROW_FORMAT_STRING % row_number
                     + " Audits must always be named 'audit.'"
@@ -704,7 +607,7 @@ def workbook_to_json(
                 ),
             )
 
-            if constants.TRACK_CHANGES in parameters.keys():
+            if constants.TRACK_CHANGES in parameters:
                 if (
                     parameters[constants.TRACK_CHANGES] != "true"
                     and parameters[constants.TRACK_CHANGES] != "false"
@@ -724,7 +627,7 @@ def workbook_to_json(
                         }
                     )
 
-            if constants.TRACK_CHANGES_REASONS in parameters.keys():
+            if constants.TRACK_CHANGES_REASONS in parameters:
                 if parameters[constants.TRACK_CHANGES_REASONS] != "on-form-edit":
                     raise PyXFormError(
                         constants.TRACK_CHANGES_REASONS + " must be set to on-form-edit"
@@ -735,7 +638,7 @@ def workbook_to_json(
                         {"odk:" + constants.TRACK_CHANGES_REASONS: "on-form-edit"}
                     )
 
-            if constants.IDENTIFY_USER in parameters.keys():
+            if constants.IDENTIFY_USER in parameters:
                 if (
                     parameters[constants.IDENTIFY_USER] != "true"
                     and parameters[constants.IDENTIFY_USER] != "false"
@@ -755,19 +658,19 @@ def workbook_to_json(
                         }
                     )
 
-            location_parameters = (
+            location_parameters = {
                 constants.LOCATION_PRIORITY,
                 constants.LOCATION_MIN_INTERVAL,
                 constants.LOCATION_MAX_AGE,
-            )
-            if any(k in parameters.keys() for k in location_parameters):
-                if all(k in parameters.keys() for k in location_parameters):
-                    if parameters[constants.LOCATION_PRIORITY] not in [
+            }
+            if any(k in parameters for k in location_parameters):
+                if all(k in parameters for k in location_parameters):
+                    if parameters[constants.LOCATION_PRIORITY] not in {
                         "no-power",
                         "low-power",
                         "balanced",
                         "high-accuracy",
-                    ]:
+                    }:
                         msg = (
                             f"Parameter {constants.LOCATION_PRIORITY} must be set to "
                             "no-power, low-power, balanced, or high-accuracy:"
@@ -843,7 +746,7 @@ def workbook_to_json(
                         + " parameters."
                     )
 
-            survey_meta.append(new_dict)
+            meta_children.append(new_dict)
             continue
 
         if question_type == "calculate":
@@ -872,7 +775,7 @@ def workbook_to_json(
 
         # Try to parse question as a end control statement
         # (i.e. end loop/repeat/group):
-        end_control_parse = end_control_regex.search(question_type)
+        end_control_parse = RE_END_CONTROL.search(question_type)
         if end_control_parse:
             parse_dict = end_control_parse.groupdict()
             if parse_dict.get("end") and "type" in parse_dict:
@@ -914,11 +817,11 @@ def workbook_to_json(
             )
 
         in_repeat = any(ancestor["control_type"] == "repeat" for ancestor in stack)
-        validate_entity_saveto(row, row_number, entity_declaration, in_repeat)
+        validate_entity_saveto(row, row_number, in_repeat, entity_declaration)
 
         # Try to parse question as begin control statement
         # (i.e. begin loop/repeat/group):
-        begin_control_parse = begin_control_regex.search(question_type)
+        begin_control_parse = RE_BEGIN_CONTROL.search(question_type)
         if begin_control_parse:
             parse_dict = begin_control_parse.groupdict()
             if parse_dict.get("begin") and "type" in parse_dict:
@@ -983,7 +886,7 @@ def workbook_to_json(
                 if repeat_count_expression:
                     # Simple expressions don't require a new node, they can reference directly.
                     if not is_pyxform_reference(value=repeat_count_expression):
-                        generated_node_name = new_json_dict["name"] + "_count"
+                        generated_node_name = f"""{new_json_dict["name"]}_count"""
                         parent_children_array.append(
                             {
                                 "name": generated_node_name,
@@ -996,7 +899,7 @@ def workbook_to_json(
                         )
                         # This re-directs the body/repeat ref to the above generated node.
                         new_json_dict["control"]["jr:count"] = (
-                            "${" + generated_node_name + "}"
+                            f"${{{generated_node_name}}}"
                         )
 
                 # Code to deal with table_list appearance flags
@@ -1050,7 +953,7 @@ def workbook_to_json(
         question_names.add(question_name)
 
         # Try to parse question as a select:
-        select_parse = select_regexp.search(question_type)
+        select_parse = RE_SELECT.search(question_type)
         if select_parse:
             parse_dict = select_parse.groupdict()
             if parse_dict.get("select_command"):
@@ -1065,14 +968,11 @@ def workbook_to_json(
                     )
                 list_name = parse_dict[constants.LIST_NAME_U]
                 file_extension = os.path.splitext(list_name)[1]
-                if (
-                    select_type == constants.SELECT_ONE_EXTERNAL
-                    and list_name not in external_choices
-                ):
+                if select_type == constants.SELECT_ONE_EXTERNAL:
                     if not external_choices:
                         k = constants.EXTERNAL_CHOICES
                         msg = "There should be an external_choices sheet in this xlsform."
-                        similar = find_sheet_misspellings(key=k, keys=workbook_keys)
+                        similar = find_sheet_misspellings(key=k, keys=sheet_names)
                         if similar is not None:
                             msg = msg + " " + similar
                         raise PyXFormError(
@@ -1080,11 +980,12 @@ def workbook_to_json(
                             + " Please ensure that the external_choices sheet has columns"
                             " 'list name', and 'name'."
                         )
-                    raise PyXFormError(
-                        ROW_FORMAT_STRING % row_number
-                        + "List name not in external choices sheet: "
-                        + list_name
-                    )
+                    if list_name not in external_choices:
+                        raise PyXFormError(
+                            ROW_FORMAT_STRING % row_number
+                            + "List name not in external choices sheet: "
+                            + list_name
+                        )
                 select_from_file.validate_list_name_extension(
                     select_command=parse_dict["select_command"],
                     list_name=list_name,
@@ -1099,11 +1000,11 @@ def workbook_to_json(
                     if not choices:
                         k = constants.CHOICES
                         msg = "There should be a choices sheet in this xlsform."
-                        similar = find_sheet_misspellings(key=k, keys=workbook_keys)
+                        similar = find_sheet_misspellings(key=k, keys=sheet_names)
                         if similar is not None:
-                            msg = msg + " " + similar
+                            msg = f"{msg} {similar}"
                         raise PyXFormError(
-                            msg + " Please ensure that the choices sheet has the"
+                            f"{msg} Please ensure that the choices sheet has the"
                             " mandatory columns 'list_name', 'name', and 'label'."
                         )
                     raise PyXFormError(
@@ -1188,10 +1089,10 @@ def workbook_to_json(
                 new_json_dict[constants.TYPE] = select_type
 
                 select_params_allowed = ["randomize", "seed"]
-                if parse_dict["select_command"] in (
+                if parse_dict["select_command"] in {
                     "select_one_from_file",
                     "select_multiple_from_file",
-                ):
+                }:
                     select_params_allowed += ["value", "label"]
 
                 # Look at parameters column for select parameters
@@ -1199,7 +1100,7 @@ def workbook_to_json(
                     parameters=parameters, allowed=select_params_allowed
                 )
 
-                if "randomize" in parameters.keys():
+                if "randomize" in parameters:
                     if (
                         parameters["randomize"] != "true"
                         and parameters["randomize"] != "false"
@@ -1209,7 +1110,7 @@ def workbook_to_json(
                             f"""'{parameters["randomize"]}' is an invalid value"""
                         )
 
-                    if "seed" in parameters.keys():
+                    if "seed" in parameters:
                         if not parameters["seed"].startswith("${"):
                             try:
                                 float(parameters["seed"])
@@ -1217,18 +1118,18 @@ def workbook_to_json(
                                 raise PyXFormError(
                                     "seed value must be a number or a reference to another field."
                                 ) from seed_err
-                elif "seed" in parameters.keys():
+                elif "seed" in parameters:
                     raise PyXFormError(
                         "Parameters must include randomize=true to use a seed."
                     )
 
-                if "value" in parameters.keys():
+                if "value" in parameters:
                     select_from_file.value_or_label_check(
                         name="value",
                         value=parameters["value"],
                         row_number=row_number,
                     )
-                if "label" in parameters.keys():
+                if "label" in parameters:
                     select_from_file.value_or_label_check(
                         name="label",
                         value=parameters["label"],
@@ -1287,13 +1188,13 @@ def workbook_to_json(
                 continue
 
         # Try to parse question as osm:
-        osm_parse = osm_regexp.search(question_type)
+        osm_parse = RE_OSM.search(question_type)
         if osm_parse:
             parse_dict = osm_parse.groupdict()
             new_dict = row.copy()
             new_dict["type"] = constants.OSM
 
-            if parse_dict.get(constants.LIST_NAME_U) is not None:
+            if osm_tags and parse_dict.get(constants.LIST_NAME_U) is not None:
                 tags = osm_tags.get(parse_dict.get(constants.LIST_NAME_U))
                 for tag in tags:
                     if osm_tags.get(tag.get("name")):
@@ -1314,7 +1215,7 @@ def workbook_to_json(
             new_dict = row.copy()
             parameters_generic.validate(parameters=parameters, allowed=("rows",))
 
-            if "rows" in parameters.keys():
+            if "rows" in parameters:
                 try:
                     int(parameters["rows"])
                 except ValueError as rows_err:
@@ -1341,7 +1242,7 @@ def workbook_to_json(
                     "app",
                 ),
             )
-            if "max-pixels" in parameters.keys():
+            if "max-pixels" in parameters:
                 try:
                     int(parameters["max-pixels"])
                 except ValueError as mp_err:
@@ -1357,7 +1258,7 @@ def workbook_to_json(
                     + " Use the max-pixels parameter to speed up submission sending and save storage space. Learn more: https://xlsform.org/#image"
                 )
 
-            if "app" in parameters.keys():
+            if "app" in parameters:
                 appearance = row.get("control", {}).get("appearance")
                 if appearance is None or appearance == "annotate":
                     app_package_name = str(parameters["app"])
@@ -1377,13 +1278,13 @@ def workbook_to_json(
             new_dict = row.copy()
             parameters_generic.validate(parameters=parameters, allowed=("quality",))
 
-            if "quality" in parameters.keys():
-                if parameters["quality"] not in [
+            if "quality" in parameters:
+                if parameters["quality"] not in {
                     constants.AUDIO_QUALITY_VOICE_ONLY,
                     constants.AUDIO_QUALITY_LOW,
                     constants.AUDIO_QUALITY_NORMAL,
                     constants.AUDIO_QUALITY_EXTERNAL,
-                ]:
+                }:
                     raise PyXFormError("Invalid value for quality.")
 
                 new_dict["bind"] = new_dict.get("bind", {})
@@ -1396,12 +1297,12 @@ def workbook_to_json(
             new_dict = row.copy()
             parameters_generic.validate(parameters=parameters, allowed=("quality",))
 
-            if "quality" in parameters.keys():
-                if parameters["quality"] not in [
+            if "quality" in parameters:
+                if parameters["quality"] not in {
                     constants.AUDIO_QUALITY_VOICE_ONLY,
                     constants.AUDIO_QUALITY_LOW,
                     constants.AUDIO_QUALITY_NORMAL,
-                ]:
+                }:
                     raise PyXFormError("Invalid value for quality.")
 
                 new_dict["action"] = new_dict.get("action", {})
@@ -1410,7 +1311,7 @@ def workbook_to_json(
             parent_children_array.append(new_dict)
             continue
 
-        if question_type in ["geopoint", "geoshape", "geotrace"]:
+        if question_type in {"geopoint", "geoshape", "geotrace"}:
             new_dict = row.copy()
 
             if question_type == "geopoint":
@@ -1427,8 +1328,8 @@ def workbook_to_json(
                     parameters=parameters, allowed=("allow-mock-accuracy",)
                 )
 
-            if "allow-mock-accuracy" in parameters.keys():
-                if parameters["allow-mock-accuracy"] not in ["true", "false"]:
+            if "allow-mock-accuracy" in parameters:
+                if parameters["allow-mock-accuracy"] not in {"true", "false"}:
                     raise PyXFormError("Invalid value for allow-mock-accuracy.")
 
                 new_dict["bind"] = new_dict.get("bind", {})
@@ -1437,7 +1338,7 @@ def workbook_to_json(
                 )
 
             new_dict["control"] = new_dict.get("control", {})
-            if "capture-accuracy" in parameters.keys():
+            if "capture-accuracy" in parameters:
                 try:
                     float(parameters["capture-accuracy"])
                     new_dict["control"].update(
@@ -1448,7 +1349,7 @@ def workbook_to_json(
                         "Parameter capture-accuracy must have a numeric value"
                     ) from ca_err
 
-            if "warning-accuracy" in parameters.keys():
+            if "warning-accuracy" in parameters:
                 try:
                     float(parameters["warning-accuracy"])
                     new_dict["control"].update(
@@ -1485,8 +1386,6 @@ def workbook_to_json(
         # print "Generating flattened instance..."
         add_flat_annotations(stack[0]["parent_children"])
 
-    meta_children = [*survey_meta]
-
     if aliases.yes_no.get(settings.get("omit_instanceID")):
         if settings.get("public_key"):
             raise PyXFormError("Cannot omit instanceID, it is required for encryption.")
@@ -1513,7 +1412,7 @@ def workbook_to_json(
             }
         )
 
-    if len(entity_declaration) > 0:
+    if entity_declaration:
         json_dict[constants.ENTITY_FEATURES] = ["create", "update", "offline"]
         meta_children.append(entity_declaration)
 
@@ -1531,38 +1430,6 @@ def workbook_to_json(
     return json_dict
 
 
-def parse_file_to_workbook_dict(path, file_object=None):
-    """
-    Given a xls or csv workbook file use xls2json_backends to create
-    a python workbook_dict.
-    workbook_dicts are organized as follows:
-    {sheetname : [{column_header : column_value_in_array_indexed_row}]}
-    """
-
-    (filepath, filename) = os.path.split(path)
-    if not filename:
-        raise PyXFormError("No filename.")
-    (shortname, extension) = os.path.splitext(filename)
-    if not extension:
-        raise PyXFormError("No extension.")
-
-    if extension in constants.XLS_EXTENSIONS:
-        return xls_to_dict(file_object if file_object is not None else path)
-    elif extension in constants.XLSX_EXTENSIONS:
-        return xlsx_to_dict(file_object if file_object is not None else path)
-    elif extension == ".csv":
-        return csv_to_dict(file_object if file_object is not None else path)
-    else:
-        raise PyXFormError("File was not recognized")
-
-
-def get_filename(path):
-    """
-    Get the extensionless filename from a path
-    """
-    return os.path.splitext(os.path.basename(path))[0]
-
-
 def parse_file_to_json(
     path: str,
     default_name: str = constants.DEFAULT_FORM_NAME,
@@ -1575,12 +1442,11 @@ def parse_file_to_json(
     """
     if warnings is None:
         warnings = []
-    workbook_dict = parse_file_to_workbook_dict(path, file_object)
-    fallback_form_name = str(get_filename(path))
+    workbook_dict = get_xlsform(xlsform=coalesce(path, file_object))
     return workbook_to_json(
         workbook_dict=workbook_dict,
         form_name=default_name,
-        fallback_form_name=fallback_form_name,
+        fallback_form_name=workbook_dict.fallback_form_name,
         default_language=default_language,
         warnings=warnings,
     )
@@ -1613,9 +1479,9 @@ def __init__(self, path_or_file):
             path = path.name
         except AttributeError:
             pass
-        self._dict = parse_file_to_workbook_dict(path)
+        self._dict = get_xlsform(xlsform=path)
         self._path = path
-        self._id = str(get_filename(path))
+        self._id = str(os.path.splitext(os.path.basename(path))[0])
         self._name = self._print_name = self._title = self._id
 
     def to_json_dict(self):
@@ -1670,13 +1536,13 @@ def __init__(self, path):
         self._setup_question_types_dictionary()
 
     def _setup_question_types_dictionary(self):
-        use_double_colons = has_double_colon(self._dict)
         types_sheet = "question types"
         self._dict = self._dict[types_sheet]
         self._dict = dealias_and_group_headers(
-            dict_array=self._dict,
+            sheet_name=types_sheet,
+            sheet_data=self._dict,
             header_aliases={},
-            use_double_colons=use_double_colons,
+            header_columns=set(),
             default_language=constants.DEFAULT_LANGUAGE_VALUE,
         ).data
         self._dict = organize_by_values(self._dict, "name")
diff --git a/pyxform/xls2json_backends.py b/pyxform/xls2json_backends.py
index 27652915..306d53e0 100644
--- a/pyxform/xls2json_backends.py
+++ b/pyxform/xls2json_backends.py
@@ -5,14 +5,13 @@
 import csv
 import datetime
 import re
-from collections.abc import Callable, Iterator
+from collections.abc import Callable, Iterable, Sequence
 from dataclasses import dataclass
 from enum import Enum
-from functools import reduce
 from io import BytesIO, IOBase, StringIO
 from os import PathLike
 from pathlib import Path
-from typing import Any
+from typing import Any, BinaryIO
 from zipfile import BadZipFile
 
 from openpyxl import open as pyxl_open
@@ -35,6 +34,29 @@
     "The xls file provided has an invalid date on the %s sheet, under"
     " the %s column on row number %s"
 )
+RE_WHITESPACE = re.compile(r"( )+")
+
+
+@dataclass(slots=True)
+class DefinitionData:
+    # XLSForm definition sheets.
+    # survey is optional to allow processing to proceed to warnings / spell checks.
+    survey: Sequence[dict[str, str]] | None = None
+    survey_header: Sequence[dict[str, Any]] | None = None
+    choices: Sequence[dict[str, str]] | None = None
+    choices_header: Sequence[dict[str, Any]] | None = None
+    settings: Sequence[dict[str, str]] | None = None
+    settings_header: Sequence[dict[str, Any]] | None = None
+    external_choices: Sequence[dict[str, str]] | None = None
+    external_choices_header: Sequence[dict[str, Any]] | None = None
+    entities: Sequence[dict[str, str]] | None = None
+    entities_header: Sequence[dict[str, Any]] | None = None
+    osm: Sequence[dict[str, str]] | None = None
+    osm_header: Sequence[dict[str, Any]] | None = None
+
+    # Extra metadata.
+    sheet_names: Sequence[str] | None = None
+    fallback_form_name: str | None = None
 
 
 def _list_to_dict_list(list_items):
@@ -43,7 +65,7 @@ def _list_to_dict_list(list_items):
     Returns a list of the created dict or an empty list
     """
     if list_items:
-        return [{str(i): "" for i in list_items}]
+        return [{str(i): None for i in list_items}]
     return []
 
 
@@ -57,7 +79,7 @@ def trim_trailing_empty(a_list: list, n_empty: int) -> list:
     return a_list
 
 
-def get_excel_column_headers(first_row: Iterator[str | None]) -> list[str | None]:
+def get_excel_column_headers(first_row: Iterable[str | None]) -> list[str | None]:
     """Get column headers from the first row; stop if there's a run of empty columns."""
     max_adjacent_empty_columns = 20
     column_header_list = []
@@ -76,15 +98,15 @@ def get_excel_column_headers(first_row: Iterator[str | None]) -> list[str | None
             if column_header in column_header_list:
                 raise PyXFormError(f"Duplicate column header: {column_header}")
             # Strip whitespaces from the header.
-            clean_header = re.sub(r"( )+", " ", column_header.strip())
+            clean_header = RE_WHITESPACE.sub(" ", column_header.strip())
             column_header_list.append(clean_header)
 
     return trim_trailing_empty(column_header_list, adjacent_empty_cols)
 
 
 def get_excel_rows(
-    headers: Iterator[str | None],
-    rows: Iterator[tuple[aCell, ...]],
+    headers: Iterable[str | None],
+    rows: Iterable[tuple[aCell, ...]],
     cell_func: Callable[[aCell, int, str], Any],
 ) -> list[dict[str, Any]]:
     """Get rows of cleaned data; stop if there's a run of empty rows."""
@@ -165,12 +187,13 @@ def clean_func(cell: xlrdCell, row_n: int, col_key: str) -> str | None:
         return rows, _list_to_dict_list(column_header_list)
 
     def process_workbook(wb: xlrdBook):
-        result_book = {}
+        result_book = {"sheet_names": []}
         for wb_sheet in wb.sheets():
-            # Note that the sheet exists but do no further processing here.
-            result_book[wb_sheet.name] = []
+            # Note original in sheet_names for spelling check.
+            result_book["sheet_names"].append(wb_sheet.name)
+            sheet_name = wb_sheet.name.lower()
             # Do not process sheets that have nothing to do with XLSForm.
-            if wb_sheet.name not in constants.SUPPORTED_SHEET_NAMES:
+            if sheet_name not in constants.SUPPORTED_SHEET_NAMES:
                 if len(wb.sheets()) == 1:
                     (
                         result_book[constants.SURVEY],
@@ -180,8 +203,8 @@ def process_workbook(wb: xlrdBook):
                     continue
             else:
                 (
-                    result_book[wb_sheet.name],
-                    result_book[f"{wb_sheet.name}_header"],
+                    result_book[sheet_name],
+                    result_book[f"{sheet_name}_header"],
                 ) = xls_to_dict_normal_sheet(wb=wb, wb_sheet=wb_sheet)
         return result_book
 
@@ -255,25 +278,25 @@ def xlsx_to_dict_normal_sheet(sheet: pyxlWorksheet):
         return rows, _list_to_dict_list(column_header_list)
 
     def process_workbook(wb: pyxlWorkbook):
-        result_book = {}
+        result_book = {"sheet_names": []}
         for sheetname in wb.sheetnames:
-            wb_sheet = wb[sheetname]
-            # Note that the sheet exists but do no further processing here.
-            result_book[sheetname] = []
+            # Note original in sheet_names for spelling check.
+            result_book["sheet_names"].append(sheetname)
+            sheet_name = sheetname.lower()
             # Do not process sheets that have nothing to do with XLSForm.
-            if sheetname not in constants.SUPPORTED_SHEET_NAMES:
+            if sheet_name not in constants.SUPPORTED_SHEET_NAMES:
                 if len(wb.sheetnames) == 1:
                     (
                         result_book[constants.SURVEY],
                         result_book[f"{constants.SURVEY}_header"],
-                    ) = xlsx_to_dict_normal_sheet(wb_sheet)
+                    ) = xlsx_to_dict_normal_sheet(wb[sheetname])
                 else:
                     continue
             else:
                 (
-                    result_book[sheetname],
-                    result_book[f"{sheetname}_header"],
-                ) = xlsx_to_dict_normal_sheet(wb_sheet)
+                    result_book[sheet_name],
+                    result_book[f"{sheet_name}_header"],
+                ) = xlsx_to_dict_normal_sheet(wb[sheetname])
         return result_book
 
     try:
@@ -306,14 +329,18 @@ def xlsx_value_to_str(value) -> str:
         # ensure unicode and replace nbsp spaces with normal ones
         # to avoid this issue:
         # https://github.com/modilabs/pyxform/issues/83
-        return str(value).replace(chr(160), " ")
+        value = str(value)
+        if chr(160) in value:
+            return value.replace(chr(160), " ")
+        else:
+            return value
 
 
 def is_empty(value):
     if value is None:
         return True
     elif isinstance(value, str):
-        if value.strip() == "":
+        if not value or value.isspace():
             return True
 
     return False
@@ -358,25 +385,26 @@ def first_column_as_sheet_name(row):
         elif len(row) == 1:
             return row[0], None
         else:
-            s_or_c = row[0]
-            content = row[1:]
-            if s_or_c == "":
-                s_or_c = None
-            # concatenate all the strings in content
-            if reduce(lambda x, y: x + y, content) == "":
+            sheet_name = row[0].strip()
+            content = [str(v).strip() for v in row[1:]]
+            if sheet_name == "":
+                sheet_name = None
+            if not any(c != "" for c in content):
                 # content is a list of empty strings
                 content = None
-            return s_or_c, content
+            return sheet_name, content
 
     def process_csv_data(rd):
-        _dict = {}
+        _dict = {"sheet_names": []}
         sheet_name = None
         current_headers = None
         for row in rd:
-            survey_or_choices, content = first_column_as_sheet_name(row)
-            if survey_or_choices is not None:
-                sheet_name = survey_or_choices
-                if sheet_name not in _dict:
+            maybe_sheet_name, content = first_column_as_sheet_name(row)
+            if maybe_sheet_name is not None:
+                sheet_name = maybe_sheet_name
+                if sheet_name and sheet_name not in _dict:
+                    _dict["sheet_names"].append(sheet_name)
+                    sheet_name = sheet_name.lower()
                     _dict[str(sheet_name)] = []
                 current_headers = None
             if content is not None:
@@ -384,13 +412,11 @@ def process_csv_data(rd):
                     current_headers = content
                     _dict[f"{sheet_name}_header"] = _list_to_dict_list(current_headers)
                 else:
-                    _d = {}
-                    for key, val in zip(current_headers, content, strict=False):
-                        if val != "":
-                            # Slight modification so values are striped
-                            # this is because csvs often spaces following commas
-                            # (but the csv reader might already handle that.)
-                            _d[str(key)] = str(val.strip())
+                    _d = {
+                        k: v
+                        for k, v in zip(current_headers, content, strict=False)
+                        if v != ""
+                    }
                     _dict[sheet_name].append(_d)
         return _dict
 
@@ -440,6 +466,8 @@ def convert_file_to_csv_string(path):
     foo = StringIO(newline="")
     writer = csv.writer(foo, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
     for sheet_name, rows in imported_sheets.items():
+        if sheet_name == "sheet_names":
+            continue
         writer.writerow([sheet_name])
         out_keys = []
         out_rows = []
@@ -526,85 +554,71 @@ def xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name):
 MD_PIPE_OR_ESCAPE = re.compile(r"(?<!\\)\|")
 
 
-def _md_strp_cell(cell):
-    val = cell.strip()
-    if val == "":
+def _md_strp_cell(cell: str) -> str:
+    if not cell or cell.isspace():
         return None
-    val = val.replace(r"\|", "|")
-    return val
-
+    return cell.strip().replace(r"\|", "|")
 
-def _md_extract_array(mdtablerow):
-    match = re.match(MD_CELL, mdtablerow)
-    if match:
-        mtchstr = match.groups()[0]
-        if re.match(MD_SEPARATOR, mtchstr):
-            return False
-        else:
-            return [_md_strp_cell(c) for c in re.split(MD_PIPE_OR_ESCAPE, mtchstr)]
-
-    return False
 
-
-def _md_is_null_row(r_arr):
-    for cell in r_arr:
-        if cell is not None:
-            return False
-
-    return True
-
-
-def _md_table_to_ss_structure(mdstr: str) -> list[tuple[str, list[list[str]]]]:
-    ss_arr = []
-    for item in mdstr.split("\n"):
-        arr = _md_extract_array(item)
-        if arr:
-            ss_arr.append(arr)
+def _md_table_to_ss_structure(mdstr: str) -> dict[str, list[tuple[str, ...]]]:
     sheet_name = False
     sheet_arr = False
-    sheets = []
-    for row in ss_arr:
-        if row[0] is not None:
-            if sheet_arr:
-                sheets.append((sheet_name, sheet_arr))
-            sheet_arr = []
-            sheet_name = row[0]
-        excluding_first_col = row[1:]
-        if sheet_name and not _md_is_null_row(excluding_first_col):
-            sheet_arr.append(excluding_first_col)
-    sheets.append((sheet_name, sheet_arr))
+    sheets = {}
+    for line in mdstr.split("\n"):
+        if re.match(MD_COMMENT, line):
+            # ignore lines which start with pound sign
+            continue
+        elif re.match(MD_COMMENT_INLINE, line):
+            # keep everything before the # outside of the last occurrence of |
+            line = re.match(MD_COMMENT_INLINE, line).groups()[0]
+        match = re.match(MD_CELL, line)
+        if match:
+            mtchstr = match.groups()[0]
+            if not re.match(MD_SEPARATOR, mtchstr):
+                row_split = re.split(MD_PIPE_OR_ESCAPE, mtchstr)
+                first_col = _md_strp_cell(row_split[0])
+                row = tuple(_md_strp_cell(c) for c in row_split[1:])
+                if first_col is None and row is None:
+                    continue
+                if first_col is not None:
+                    if sheet_arr:
+                        sheets[sheet_name] = sheet_arr
+                    sheet_arr = []
+                    sheet_name = first_col
+                if sheet_name and any(c is not None for c in row):
+                    sheet_arr.append(row)
+            sheets[sheet_name] = sheet_arr
 
     return sheets
 
 
 def md_to_dict(md: str | BytesIO):
-    def _row_to_dict(row, headers):
-        out_dict = {}
-        for i in range(len(row)):
-            col = row[i]
-            if col not in [None, ""]:
-                out_dict[headers[i]] = col
-        return out_dict
-
     def list_to_dicts(arr):
-        return [_row_to_dict(r, arr[0]) for r in arr[1:]]
+        return [
+            {arr[0][i]: v for i, v in enumerate(row) if v not in {None, ""}}
+            for row in arr[1:]
+        ]
 
     def process_md_data(md_: str):
-        _md = []
-        for line in md_.split("\n"):
-            if re.match(MD_COMMENT, line):
-                # ignore lines which start with pound sign
-                continue
-            elif re.match(MD_COMMENT_INLINE, line):
-                # keep everything before the # outside of the last occurrence of |
-                _md.append(re.match(MD_COMMENT_INLINE, line).groups()[0].strip())
+        result_book = {"sheet_names": []}
+        ss_structure = _md_table_to_ss_structure(md_)
+        for sheet, contents in ss_structure.items():
+            # Note original in sheet_names for spelling check.
+            result_book["sheet_names"].append(sheet)
+            sheet_name = sheet.lower()
+            # Do not process sheets that have nothing to do with XLSForm.
+            if sheet_name not in constants.SUPPORTED_SHEET_NAMES:
+                if len(ss_structure) == 1:
+                    result_book[constants.SURVEY] = list_to_dicts(contents)
+                    result_book[f"{constants.SURVEY}_header"] = _list_to_dict_list(
+                        contents[0]
+                    )
+                else:
+                    continue
             else:
-                _md.append(line.strip())
-        md_ = "\n".join(_md)
-        sheets = {}
-        for sheet, contents in _md_table_to_ss_structure(md_):
-            sheets[sheet] = list_to_dicts(contents)
-        return sheets
+                result_book[sheet_name] = list_to_dicts(contents)
+                result_book[f"{sheet_name}_header"] = _list_to_dict_list(contents[0])
+        return result_book
 
     try:
         md_data = get_definition_data(definition=md)
@@ -622,7 +636,7 @@ def md_table_to_workbook(mdstr: str) -> pyxlWorkbook:
     """
     md_data = _md_table_to_ss_structure(mdstr=mdstr)
     wb = pyxlWorkbook(write_only=True)
-    for key, rows in md_data:
+    for key, rows in md_data.items():
         sheet = wb.create_sheet(title=key)
         for r in rows:
             sheet.append(r)
@@ -685,13 +699,14 @@ class SupportedFileTypes(Enum):
     def get_processors():
         return {
             SupportedFileTypes.xlsx: xlsx_to_dict,
+            SupportedFileTypes.xlsm: xlsx_to_dict,
             SupportedFileTypes.xls: xls_to_dict,
             SupportedFileTypes.md: md_to_dict,
             SupportedFileTypes.csv: csv_to_dict,
         }
 
 
-@dataclass
+@dataclass(slots=True)
 class Definition:
     data: BytesIO
     file_type: SupportedFileTypes | None
@@ -701,7 +716,7 @@ class Definition:
 def definition_to_dict(
     definition: str | PathLike[str] | bytes | BytesIO | IOBase | Definition,
     file_type: str | None = None,
-) -> dict:
+) -> DefinitionData:
     """
     Convert raw definition data to a dict ready for conversion to a XForm.
 
@@ -724,7 +739,9 @@ def definition_to_dict(
 
     for func in processors.values():
         try:
-            return func(definition)
+            return DefinitionData(
+                fallback_form_name=definition.file_path_stem, **func(definition)
+            )
         except PyXFormReadError:  # noqa: PERF203
             continue
 
@@ -790,3 +807,17 @@ def get_definition_data(
         file_type=file_type,
         file_path_stem=file_path_stem,
     )
+
+
+def get_xlsform(
+    xlsform: str | PathLike[str] | bytes | BytesIO | BinaryIO | dict,
+    file_type: str | None = None,
+) -> DefinitionData:
+    if isinstance(xlsform, dict):
+        workbook_dict = DefinitionData(**xlsform)
+    else:
+        definition = get_definition_data(definition=xlsform)
+        if file_type is None:
+            file_type = definition.file_type
+        workbook_dict = definition_to_dict(definition=definition, file_type=file_type)
+    return workbook_dict
diff --git a/pyxform/xls2xform.py b/pyxform/xls2xform.py
index f9af81a3..a768764a 100644
--- a/pyxform/xls2xform.py
+++ b/pyxform/xls2xform.py
@@ -11,15 +11,17 @@
 from os import PathLike
 from os.path import splitext
 from pathlib import Path
-from typing import TYPE_CHECKING, BinaryIO
+from typing import TYPE_CHECKING, BinaryIO, Optional
 
-from pyxform import builder, xls2json
-from pyxform.utils import coalesce, external_choices_to_csv, has_external_choices
-from pyxform.validators.odk_validate import ODKValidateError
-from pyxform.xls2json_backends import (
-    definition_to_dict,
-    get_definition_data,
+from pyxform.builder import create_survey_element_from_dict
+from pyxform.utils import (
+    coalesce,
+    external_choices_to_csv,
+    has_external_choices,
 )
+from pyxform.validators.odk_validate import ODKValidateError
+from pyxform.xls2json import workbook_to_json
+from pyxform.xls2json_backends import get_xlsform
 
 if TYPE_CHECKING:
     from pyxform.survey import Survey
@@ -39,7 +41,7 @@ def get_xml_path(path):
     return splitext(path)[0] + ".xml"
 
 
-@dataclass
+@dataclass(slots=True)
 class ConvertResult:
     """
     Result data from the XLSForm to XForm conversion.
@@ -54,8 +56,8 @@ class ConvertResult:
     xform: str
     warnings: list[str]
     itemsets: str | None
-    _pyxform: dict
-    _survey: "Survey"
+    _pyxform: dict | None
+    _survey: Optional["Survey"]
 
 
 def convert(
@@ -93,32 +95,26 @@ def convert(
       xlsform is provided as a dict, then it is used directly and this argument is ignored.
     """
     warnings = coalesce(warnings, [])
-    if isinstance(xlsform, dict):
-        workbook_dict = xlsform
-        fallback_form_name = None
-    else:
-        definition = get_definition_data(definition=xlsform)
-        if file_type is None:
-            file_type = definition.file_type
-        workbook_dict = definition_to_dict(definition=definition, file_type=file_type)
-        fallback_form_name = definition.file_path_stem
-    pyxform_data = xls2json.workbook_to_json(
+    workbook_dict = get_xlsform(xlsform=xlsform, file_type=file_type)
+    pyxform_data = workbook_to_json(
         workbook_dict=workbook_dict,
         form_name=form_name,
-        fallback_form_name=fallback_form_name,
+        fallback_form_name=workbook_dict.fallback_form_name,
         default_language=default_language,
         warnings=warnings,
     )
-    survey = builder.create_survey_element_from_dict(pyxform_data)
+    itemsets = None
+    if has_external_choices(json_struct=pyxform_data):
+        itemsets = external_choices_to_csv(workbook_dict=workbook_dict)
+    del workbook_dict
+
+    survey = create_survey_element_from_dict(pyxform_data)
     xform = survey.to_xml(
         validate=validate,
         pretty_print=pretty_print,
         warnings=warnings,
         enketo=enketo,
     )
-    itemsets = None
-    if has_external_choices(json_struct=pyxform_data):
-        itemsets = external_choices_to_csv(workbook_dict=workbook_dict)
     return ConvertResult(
         xform=xform,
         warnings=warnings,
diff --git a/tests/example_xls/case_insensitivity.csv b/tests/example_xls/case_insensitivity.csv
new file mode 100644
index 00000000..0d267179
--- /dev/null
+++ b/tests/example_xls/case_insensitivity.csv
@@ -0,0 +1,23 @@
+SURVEY  ,
+  , TYPE                   , NAME , LABEL::EN      , CHOICE_FILTER
+  , select_one c1          , q1   , Are you good?  ,
+  , select_one_external c1 , q2   , Where are you? , YES_NO=${q1}
+  , osm c1                 , q3   , Where exactly? ,
+CHOICES ,
+  , LIST_NAME , NAME , LABEL::EN
+  , c1        , n1-c , l1-c
+  , c1        , n2-c , l2-c
+SETTINGS ,
+  , FORM_TITLE , FORM_ID , DEFAULT_LANGUAGE
+  , Yes or no  , YesNo   , EN
+EXTERNAL_CHOICES ,
+  , LIST_NAME , NAME , LABEL , YES_NO
+  , c1        , n1-e , l1-e  , yes
+  , c1        , n2-e , l2-e  , yes
+ENTITIES ,
+  , DATASET , LABEL
+  , e1      , l1
+OSM ,
+  , LIST_NAME , NAME , LABEL
+  , c1        , n1-o , l1-o
+  , c1        , n2-o , l2-o
diff --git a/tests/example_xls/case_insensitivity.md b/tests/example_xls/case_insensitivity.md
new file mode 100644
index 00000000..b4751f6b
--- /dev/null
+++ b/tests/example_xls/case_insensitivity.md
@@ -0,0 +1,23 @@
+| SURVEY  |
+|   | TYPE                   | NAME | LABEL::EN      | CHOICE_FILTER |
+|   | select_one c1          | q1   | Are you good?  |               |
+|   | select_one_external c1 | q2   | Where are you? | YES_NO=${q1}  |
+|   | osm c1                 | q3   | Where exactly? |               |
+| CHOICES |
+|   | LIST_NAME | NAME | LABEL::EN  |
+|   | c1        | n1-c | l1-c       |
+|   | c1        | n2-c | l2-c       |
+| SETTINGS | 
+|   | FORM_TITLE | FORM_ID | DEFAULT_LANGUAGE |
+|   | Yes or no  | YesNo   | EN               |
+| EXTERNAL_CHOICES |
+|   | LIST_NAME | NAME | LABEL | YES_NO |
+|   | c1        | n1-e | l1-e  | yes    |
+|   | c1        | n2-e | l2-e  | yes    |
+| ENTITIES |
+|   | DATASET | LABEL |
+|   | e1      | l1    |
+| OSM |
+|   | LIST_NAME | NAME | LABEL |
+|   | c1        | n1-o | l1-o  |
+|   | c1        | n2-o | l2-o  |
diff --git a/tests/example_xls/case_insensitivity.xls b/tests/example_xls/case_insensitivity.xls
new file mode 100644
index 00000000..853b759d
Binary files /dev/null and b/tests/example_xls/case_insensitivity.xls differ
diff --git a/tests/example_xls/case_insensitivity.xlsx b/tests/example_xls/case_insensitivity.xlsx
new file mode 100644
index 00000000..852e8ba2
Binary files /dev/null and b/tests/example_xls/case_insensitivity.xlsx differ
diff --git a/tests/example_xls/include.md b/tests/example_xls/include.md
new file mode 100644
index 00000000..b83d34d7
--- /dev/null
+++ b/tests/example_xls/include.md
@@ -0,0 +1,6 @@
+| survey |
+| | type    | name               | label:English              |
+| | text    | name               | What's your name?          |
+| | include | yes_or_no_question | Yes or no question section |
+| choices |
+| | list name | name | label:english |
diff --git a/tests/example_xls/include_json.md b/tests/example_xls/include_json.md
new file mode 100644
index 00000000..5adc5396
--- /dev/null
+++ b/tests/example_xls/include_json.md
@@ -0,0 +1,5 @@
+| survey |
+| | type    | name            | label:English |
+| | include | how_old_are_you |               |
+| choices |
+| | list name | name | label:english |
diff --git a/tests/example_xls/loop.md b/tests/example_xls/loop.md
new file mode 100644
index 00000000..0a6ad124
--- /dev/null
+++ b/tests/example_xls/loop.md
@@ -0,0 +1,11 @@
+| survey |
+| | type | name | label:english | 
+| | select all that apply from toilet_type or specify other | available_toilet_types | What type of toilets are on the premises? |
+| | begin loop over toilet_type | loop_toilet_types |
+| | integer | number | How many %(label)s are on the premises? |
+| | end loop |
+| choices |
+| | list name | name | label:english |
+| | toilet_type | pit_latrine_with_slab | Pit latrine with slab |
+| | toilet_type | open_pit_latrine | Pit latrine without slab/open pit |
+| | toilet_type | bucket_system | Bucket system |
diff --git a/tests/example_xls/specify_other.md b/tests/example_xls/specify_other.md
new file mode 100644
index 00000000..0d888ff9
--- /dev/null
+++ b/tests/example_xls/specify_other.md
@@ -0,0 +1,7 @@
+| survey |
+| | type                                   | name | label:English     |
+| | select one from sexes or specify other | sex  | What sex are you? |
+| choices |
+| | list name | name   | label:English |
+| | sexes     | male   | Male          |
+| | sexes     | female | Female        |
diff --git a/tests/example_xls/text_and_integer.md b/tests/example_xls/text_and_integer.md
new file mode 100644
index 00000000..4f9e33b5
--- /dev/null
+++ b/tests/example_xls/text_and_integer.md
@@ -0,0 +1,6 @@
+| survey |
+| | type    | name      | label:english               |
+| | text    | your_name | What is your name?          |
+| | integer | your_age  | How many years old are you? |
+| choices |
+| | list name | name | label:english |
diff --git a/tests/example_xls/text_and_integer.xlsx b/tests/example_xls/text_and_integer.xlsx
index b26558d3..2611c70d 100644
Binary files a/tests/example_xls/text_and_integer.xlsx and b/tests/example_xls/text_and_integer.xlsx differ
diff --git a/tests/example_xls/yes_or_no_question.md b/tests/example_xls/yes_or_no_question.md
new file mode 100644
index 00000000..07f3be72
--- /dev/null
+++ b/tests/example_xls/yes_or_no_question.md
@@ -0,0 +1,7 @@
+| survey  |
+|         | type                      | name     | label:english                  |
+|         | select one from yes_or_no | good_day | have you had a good day today? |
+| choices |
+|         | list name | name | label:english |
+|         | yes_or_no | yes  | yes           |
+|         | yes_or_no | no   | no            |
diff --git a/tests/fixtures/strings.ini b/tests/fixtures/strings.ini
index 90dfedd3..5ca67d8a 100644
--- a/tests/fixtures/strings.ini
+++ b/tests/fixtures/strings.ini
@@ -5,14 +5,10 @@ test_answers_can_be_imported_from_xml = <?xml version='1.0' ?><data id="build_Wa
 [Json2XformQuestionValidationTests]
 test_question_type_string_control = <input ref="/test/enumerator_name"><label ref="jr:itext('/test/enumerator_name:label')"/></input>
 test_question_type_string_binding = <bind nodeset="/test/enumerator_name" type="string"/>
-test_select_one_question_multilingual_control = <select1 ref="/test/qname"><label ref="jr:itext('/test/qname:label')"/><item><label ref="jr:itext('/test/qname/a:label')"/><value>a</value></item><item><label ref="jr:itext('/test/qname/b:label')"/><value>b</value></item></select1>
-test_select_one_question_multilingual_binding = <bind nodeset="/test/qname" type="string"/>
 test_simple_integer_question_type_multilingual_control = <input ref="/test/integer_q"><label ref="jr:itext('/test/integer_q:label')"/></input>
 test_simple_integer_question_type_multilingual_binding = <bind nodeset="/test/integer_q" type="int"/>
 test_simple_date_question_type_multilingual_control = <input ref="/test/date_q"><label ref="jr:itext('/test/date_q:label')"/></input>
 test_simple_date_question_type_multilingual_binding = <bind nodeset="/test/date_q" type="date"/>
-test_simple_select_all_question_multilingual_control = <select ref="/test/select_all_q"><label ref="jr:itext('/test/select_all_q:label')"/><item><label ref="jr:itext('/test/select_all_q/f:label')"/><value>f</value></item><item><label ref="jr:itext('/test/select_all_q/g:label')"/><value>g</value></item><item><label ref="jr:itext('/test/select_all_q/h:label')"/><value>h</value></item></select>
-test_simple_select_all_question_multilingual_binding = <bind nodeset="/test/select_all_q" type="string"/>
 test_simple_decimal_question_multilingual_control = <input ref="/test/decimal_q"><label ref="jr:itext('/test/decimal_q:label')"/></input>
 test_simple_decimal_question_multilingual_binding = <bind nodeset="/test/decimal_q" type="decimal"/>
 
diff --git a/tests/pyxform_test_case.py b/tests/pyxform_test_case.py
index 0d9f3e63..b3eb785f 100644
--- a/tests/pyxform_test_case.py
+++ b/tests/pyxform_test_case.py
@@ -18,6 +18,7 @@
 from pyxform.errors import PyXFormError
 from pyxform.utils import coalesce
 from pyxform.validators.odk_validate import ODKValidateError, check_xform
+from pyxform.xls2json_backends import SupportedFileTypes
 from pyxform.xls2xform import ConvertResult, convert
 
 logger = logging.getLogger(__name__)
@@ -35,7 +36,7 @@ class PyxformTestError(Exception):
     pass
 
 
-@dataclass
+@dataclass(slots=True)
 class MatcherContext:
     debug: bool
     nsmap_xpath: "dict[str, str]"
@@ -164,6 +165,7 @@ def assertPyxformXform(
                     pretty_print=True,
                     form_name=coalesce(name, "test_name"),
                     warnings=warnings,
+                    file_type=SupportedFileTypes.md.value,
                 )
                 survey = result._survey
                 xml = result.xform
diff --git a/tests/test_bug_missing_headers.py b/tests/test_bug_missing_headers.py
deleted file mode 100644
index dd61942e..00000000
--- a/tests/test_bug_missing_headers.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-Test missing headers in XLSForm.
-"""
-
-from tests.pyxform_test_case import PyxformTestCase
-
-
-class MissingHeaders(PyxformTestCase):
-    """Test missing headers in XLSForm
-
-    When survey and choices columns are missing headers, it is helpful to see
-    an error message that prompts the user to include necessary headers.
-    """
-
-    def test_missing_survey_headers(self):
-        self.assertPyxformXform(
-            md="""
-            | survey |                 |    |
-            |        | select_one list | S1 |
-            """,
-            errored=True,
-            error__contains=["missing important column headers"],
-        )
-
-    def test_missing_choice_headers(self):
-        self.assertPyxformXform(
-            md="""
-            | survey  |                 |          |      |
-            |         | type            | label    | name |
-            |         | select_one list | S1       | s1   |
-            | choices |                 |          |      |
-            |         | list            | option a | a    |
-            |         | list            | option b | b    |
-            """,
-            errored=True,
-            error__contains=[
-                "has the mandatory columns 'list_name', 'name', and 'label'"
-            ],
-        )
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 2c5ded36..8c320df0 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -225,7 +225,7 @@ def test_loop(self):
                         "name": "pit_latrine_with_slab",
                     },
                     {
-                        "label": {"english": "Pit latrine without " "slab/open pit"},
+                        "label": {"english": "Pit latrine without slab/open pit"},
                         "name": "open_pit_latrine",
                     },
                     {"label": {"english": "Bucket system"}, "name": "bucket_system"},
diff --git a/tests/test_dynamic_default.py b/tests/test_dynamic_default.py
index a603cb37..fcda82b4 100644
--- a/tests/test_dynamic_default.py
+++ b/tests/test_dynamic_default.py
@@ -10,6 +10,7 @@
 
 from psutil import Process
 from pyxform import utils
+from pyxform.xls2json_backends import SupportedFileTypes
 from pyxform.xls2xform import convert
 
 from tests.pyxform_test_case import PyxformTestCase
@@ -17,7 +18,7 @@
 from tests.xpath_helpers.questions import xpq
 
 
-@dataclass()
+@dataclass(slots=True)
 class Case:
     """
     A test case spec for dynamic default scenarios.
@@ -778,11 +779,11 @@ def test_dynamic_default_performance__time(self):
         Results with Python 3.10.14 on VM with 2vCPU (i7-7700HQ) 1GB RAM, x questions
         each, average of 10 runs (seconds), with and without the check, per question:
         | num   | with   | without | peak RSS MB |
-        |   500 | 0.1626 |  0.1886 |          60 |
-        |  1000 | 0.3330 |  0.3916 |          63 |
-        |  2000 | 0.8675 |  0.7823 |          70 |
-        |  5000 | 1.7051 |  1.5653 |          91 |
-        | 10000 | 3.1097 |  3.8525 |         137 |
+        |   500 | 0.2203 |  0.1610 |          59 |
+        |  1000 | 0.2851 |  0.2580 |          63 |
+        |  2000 | 0.5001 |  0.5330 |          71 |
+        |  5000 | 1.2762 |  1.2931 |          92 |
+        | 10000 | 2.6226 |  2.6001 |         132 |
         """
         survey_header = """
         | survey |            |          |          |               |
@@ -792,7 +793,7 @@ def test_dynamic_default_performance__time(self):
         |        | text       | q{i}     | Q{i}     | if(../t2 = 'test', 1, 2) + 15 - int(1.2) |
         """
         process = Process(getpid())
-        for count in (500, 1000, 2000):
+        for count in (500, 1000, 2000, 5000, 10000):
             questions = "\n".join(question.format(i=i) for i in range(count))
             md = "".join((survey_header, questions))
 
@@ -802,7 +803,7 @@ def run(name, case):
                 peak_memory_usage = process.memory_info().rss
                 while runs < 10:
                     start = perf_counter()
-                    convert(xlsform=case)
+                    convert(xlsform=case, file_type=SupportedFileTypes.md.value)
                     results.append(perf_counter() - start)
                     peak_memory_usage = max(process.memory_info().rss, peak_memory_usage)
                     runs += 1
diff --git a/tests/test_entities_create.py b/tests/test_entities_create.py
index fef9b53c..0cab15e7 100644
--- a/tests/test_entities_create.py
+++ b/tests/test_entities_create.py
@@ -1,12 +1,12 @@
 from pyxform import constants as co
 
 from tests.pyxform_test_case import PyxformTestCase
+from tests.xpath_helpers.entities import xpe
 
 
 class EntitiesCreationTest(PyxformTestCase):
     def test_basic_entity_creation_building_blocks(self):
         self.assertPyxformXform(
-            name="data",
             md="""
             | survey   |         |       |       |
             |          | type    | name  | label |
@@ -16,19 +16,21 @@ def test_basic_entity_creation_building_blocks(self):
             |          | trees   | a     |       |
             """,
             xml__xpath_match=[
-                "/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity",
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@dataset = "trees"]',
+                xpe.model_instance_dataset("trees"),
                 # defaults to always creating
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@create = "1"]',
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@id = ""]',
-                '/h:html/h:head/x:model/x:bind[@nodeset = "/data/meta/entity/@id" and @type = "string" and @readonly = "true()"]',
-                '/h:html/h:head/x:model/x:setvalue[@event = "odk-instance-first-load" and @type = "string" and @ref = "/data/meta/entity/@id" and @value = "uuid()"]',
-                "/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity/x:label",
-                '/h:html/h:head/x:model/x:bind[@nodeset = "/data/meta/entity/label" and @type = "string" and @readonly = "true()" and @calculate = "a"]',
+                '/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity[@create = "1"]',
+                '/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity[@id = ""]',
+                '/h:html/h:head/x:model/x:bind[@nodeset = "/test_name/meta/entity/@id" and @type = "string" and @readonly = "true()"]',
+                '/h:html/h:head/x:model/x:setvalue[@event = "odk-instance-first-load" and @type = "string" and @ref = "/test_name/meta/entity/@id" and @value = "uuid()"]',
+                "/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity/x:label",
+                xpe.model_bind_label("a"),
                 f"""/h:html/h:head/x:model[@entities:entities-version = '{co.ENTITIES_OFFLINE_VERSION}']""",
             ],
             xml__xpath_count=[
-                ("/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity/@update", 0),
+                (
+                    "/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity/@update",
+                    0,
+                ),
             ],
             xml__contains=['xmlns:entities="http://www.opendatakit.org/xforms/entities"'],
         )
@@ -137,7 +139,6 @@ def test_create_if_in_entities_sheet__puts_expression_on_bind(self):
 
     def test_label_and_create_if_in_entities_sheet__expand_node_selectors_to_xpath(self):
         self.assertPyxformXform(
-            name="data",
             md="""
             | survey   |         |       |                         |
             |          | type    | name  | label                   |
@@ -147,8 +148,8 @@ def test_label_and_create_if_in_entities_sheet__expand_node_selectors_to_xpath(s
             |          | trees   | ${a}  | string-length(${a}) > 3 |
             """,
             xml__xpath_match=[
-                '/h:html/h:head/x:model/x:bind[@nodeset = "/data/meta/entity/@create" and @calculate = "string-length( /data/a ) > 3"]',
-                '/h:html/h:head/x:model/x:bind[@nodeset = "/data/meta/entity/label" and @type = "string" and @readonly = "true()" and @calculate = " /data/a "]',
+                '/h:html/h:head/x:model/x:bind[@nodeset = "/test_name/meta/entity/@create" and @calculate = "string-length( /test_name/a ) > 3"]',
+                xpe.model_bind_label(" /test_name/a "),
             ],
         )
 
@@ -377,7 +378,6 @@ def test_saveto_in_group__works(self):
 
     def test_list_name_alias_to_dataset(self):
         self.assertPyxformXform(
-            name="data",
             md="""
             | survey   |           |       |       |
             |          | type      | name  | label |
@@ -386,10 +386,7 @@ def test_list_name_alias_to_dataset(self):
             |          | list_name | label |       |
             |          | trees     | a     |       |
             """,
-            xml__xpath_match=[
-                "/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity",
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@dataset = "trees"]',
-            ],
+            xml__xpath_match=[xpe.model_instance_dataset("trees")],
         )
 
     def test_entities_columns__all_expected(self):
diff --git a/tests/test_entities_update.py b/tests/test_entities_update.py
index 1103b629..9e5c7fb5 100644
--- a/tests/test_entities_update.py
+++ b/tests/test_entities_update.py
@@ -1,12 +1,12 @@
 from pyxform import constants as co
 
 from tests.pyxform_test_case import PyxformTestCase
+from tests.xpath_helpers.entities import xpe
 
 
 class EntitiesUpdateTest(PyxformTestCase):
     def test_basic_entity_update_building_blocks(self):
         self.assertPyxformXform(
-            name="data",
             md="""
             | survey   |              |            |         |
             |          | type         | name       | label   |
@@ -18,41 +18,46 @@ def test_basic_entity_update_building_blocks(self):
             |          | trees        | ${id}      |         |
             """,
             xml__xpath_match=[
-                "/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity",
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@dataset = "trees"]',
+                xpe.model_instance_dataset("trees"),
                 # defaults to always updating if an entity_id is specified
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@update = "1"]',
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@id = ""]',
-                '/h:html/h:head/x:model/x:bind[@nodeset = "/data/meta/entity/@id" and @type = "string" and @readonly = "true()" and @calculate = " /data/id "]',
-                '/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[@baseVersion = ""]',
-                '/h:html/h:head/x:model/x:bind[@nodeset = "/data/meta/entity/@baseVersion" and @type = "string" and @readonly = "true()" and @calculate = "instance(\'trees\')/root/item[name= /data/id ]/__version"]',
+                '/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity[@update = "1"]',
+                '/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity[@id = ""]',
+                '/h:html/h:head/x:model/x:bind[@nodeset = "/test_name/meta/entity/@id" and @type = "string" and @readonly = "true()" and @calculate = " /test_name/id "]',
+                '/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity[@baseVersion = ""]',
+                '/h:html/h:head/x:model/x:bind[@nodeset = "/test_name/meta/entity/@baseVersion" and @type = "string" and @readonly = "true()" and @calculate = "instance(\'trees\')/root/item[name= /test_name/id ]/__version"]',
                 f"""/h:html/h:head/x:model[@entities:entities-version = '{co.ENTITIES_OFFLINE_VERSION}']""",
                 """
-                  /h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity[
+                  /h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity[
                     @trunkVersion = ''
                     and @branchId = ''
                   ]
                 """,
                 """
                   /h:html/h:head/x:model/x:bind[
-                    @nodeset = '/data/meta/entity/@trunkVersion'
-                    and @calculate = "instance('trees')/root/item[name= /data/id ]/__trunkVersion"
+                    @nodeset = '/test_name/meta/entity/@trunkVersion'
+                    and @calculate = "instance('trees')/root/item[name= /test_name/id ]/__trunkVersion"
                     and @type = 'string'
                     and @readonly = 'true()'
                   ]
                 """,
                 """
                   /h:html/h:head/x:model/x:bind[
-                    @nodeset = '/data/meta/entity/@branchId'
-                    and @calculate = "instance('trees')/root/item[name= /data/id ]/__branchId"
+                    @nodeset = '/test_name/meta/entity/@branchId'
+                    and @calculate = "instance('trees')/root/item[name= /test_name/id ]/__branchId"
                     and @type = 'string'
                     and @readonly = 'true()'
                   ]
                 """,
             ],
             xml__xpath_count=[
-                ("/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity/x:label", 0),
-                ("/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity/@create", 0),
+                (
+                    "/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity/x:label",
+                    0,
+                ),
+                (
+                    "/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity/@create",
+                    0,
+                ),
                 ("/h:html/h:head/x:model/x:setvalue", 0),
             ],
             xml__contains=['xmlns:entities="http://www.opendatakit.org/xforms/entities"'],
@@ -164,7 +169,6 @@ def test_update_and_create_conditions_with_entity_id__puts_both_in_bind_calculat
 
     def test_entity_id_and_label__updates_label(self):
         self.assertPyxformXform(
-            name="data",
             md="""
             | survey   |              |            |         |
             |          | type         | name       | label   |
@@ -176,8 +180,8 @@ def test_entity_id_and_label__updates_label(self):
             |          | trees        | ${id}      | a       |
             """,
             xml__xpath_match=[
-                "/h:html/h:head/x:model/x:instance/x:data/x:meta/x:entity/x:label",
-                '/h:html/h:head/x:model/x:bind[@nodeset = "/data/meta/entity/label" and @type = "string" and @readonly = "true()" and @calculate = "a"]',
+                "/h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity/x:label",
+                xpe.model_bind_label("a"),
             ],
         )
 
diff --git a/tests/test_external_instances.py b/tests/test_external_instances.py
index a206aaa0..72c1c5e2 100644
--- a/tests/test_external_instances.py
+++ b/tests/test_external_instances.py
@@ -476,7 +476,7 @@ def test_pulldata_calculate_multi_line_expression__multiple_calls(self):
                         "label": "QD",
                         "calculate": dedent(qd),
                     },
-                ]
+                ],
             },
             xml__contains=[
                 """<instance id="my_data_b" src="jr://file-csv/my_data_b.csv"/>""",
diff --git a/tests/test_external_instances_for_selects.py b/tests/test_external_instances_for_selects.py
index 75ee04a9..619fd61d 100644
--- a/tests/test_external_instances_for_selects.py
+++ b/tests/test_external_instances_for_selects.py
@@ -19,7 +19,7 @@
 from tests.xpath_helpers.questions import xpq
 
 
-@dataclass()
+@dataclass(slots=True)
 class XPathHelperSelectFromFile:
     """
     XPath expressions for translations-related assertions.
diff --git a/tests/test_j2x_creation.py b/tests/test_j2x_creation.py
index f4b69989..7bb2f173 100644
--- a/tests/test_j2x_creation.py
+++ b/tests/test_j2x_creation.py
@@ -11,30 +11,27 @@
 
 class Json2XformVerboseSurveyCreationTests(TestCase):
     def test_survey_can_be_created_in_a_slightly_less_verbose_manner(self):
-        option_dict_array = [
-            {"name": "red", "label": "Red"},
-            {"name": "blue", "label": "Blue"},
-        ]
-
+        choices = {
+            "test": [
+                {"name": "red", "label": "Red"},
+                {"name": "blue", "label": "Blue"},
+            ]
+        }
+        s = Survey(name="Roses_are_Red", choices=choices)
         q = MultipleChoiceQuestion(
-            name="Favorite_Color", type="select one", choices=option_dict_array
+            name="Favorite_Color",
+            type="select one",
+            list_name="test",
         )
-        s = Survey(name="Roses_are_Red")
         s.add_child(q)
 
         expected_dict = {
             "name": "Roses_are_Red",
             "type": "survey",
             "children": [
-                {
-                    "name": "Favorite_Color",
-                    "type": "select one",
-                    "children": [
-                        {"label": "Red", "name": "red"},
-                        {"label": "Blue", "name": "blue"},
-                    ],
-                }
+                {"name": "Favorite_Color", "type": "select one", "list_name": "test"}
             ],
+            "choices": choices,
         }
 
         self.assertEqual(expected_dict, s.to_json_dict())
diff --git a/tests/test_j2x_question.py b/tests/test_j2x_question.py
index c2229d90..ed114b98 100644
--- a/tests/test_j2x_question.py
+++ b/tests/test_j2x_question.py
@@ -3,14 +3,14 @@
 """
 
 from collections.abc import Generator
-from unittest import TestCase
 
 from pyxform import Survey
 from pyxform.builder import create_survey_element_from_dict
 
+from tests.pyxform_test_case import PyxformTestCase
 from tests.utils import prep_class_config
-
-TESTING_BINDINGS = True
+from tests.xpath_helpers.choices import xpc
+from tests.xpath_helpers.questions import xpq
 
 
 def ctw(control):
@@ -25,7 +25,7 @@ def ctw(control):
     return control.toxml()
 
 
-class Json2XformQuestionValidationTests(TestCase):
+class Json2XformQuestionValidationTests(PyxformTestCase):
     maxDiff = None
     config = None
     cls_name = None
@@ -59,47 +59,91 @@ def test_question_type_string(self):
 
         self.s.add_child(q)
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_string_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_string_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_string_binding_xml)
 
     def test_select_one_question_multilingual(self):
-        """
-        Test the lowest common denominator of question types.
-        """
-        simple_select_one_json = {
-            "label": {"f": "ftext", "e": "etext"},
-            "type": "select one",
-            "name": "qname",
-            "choices": [
-                {"label": {"f": "fa", "e": "ea"}, "name": "a"},
-                {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+        """Should be able to build a valid XForm from a dict with a multi-language select."""
+        survey = {
+            "type": "survey",
+            "name": "test_name",
+            "id_string": "data",
+            "children": [
+                {
+                    "label": {"f": "ftext", "e": "etext"},
+                    "type": "select one",
+                    "name": "q1",
+                    "itemset": "c1",
+                }
             ],
+            "choices": {
+                "c1": [
+                    {"label": {"f": "fa", "e": "ea"}, "name": "a"},
+                    {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+                ]
+            },
         }
-
-        # I copied the response in, since this is not our method of testing
-        # valid return values.
-        expected_select_one_control_xml = self.config.get(
-            self.cls_name, "test_select_one_question_multilingual_control"
-        )
-
-        expected_select_one_binding_xml = self.config.get(
-            self.cls_name, "test_select_one_question_multilingual_binding"
+        self.assertPyxformXform(
+            survey=create_survey_element_from_dict(survey),
+            xml__xpath_match=[
+                # question has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_itext("select1", "q1"),
+                # itext has secondary choices instance, and labels for each language.
+                xpq.model_instance_exists("c1"),
+                xpq.model_itext_label("q1", "f", "ftext"),
+                xpq.model_itext_label("q1", "e", "etext"),
+                xpc.model_instance_choices_itext("c1", ("a", "b")),
+                xpc.model_itext_choice_text_label_by_pos("f", "c1", ("fa", "fb")),
+                xpc.model_itext_choice_text_label_by_pos("e", "c1", ("ea", "eb")),
+            ],
         )
 
-        q = create_survey_element_from_dict(simple_select_one_json)
-        self.s.add_child(q)
-        self.assertEqual(
-            ctw(q.xml_control(survey=self.s)), expected_select_one_control_xml
+    def test_select_one_question_multilingual__common_choices(self):
+        """Should be able to build a valid XForm from a dict where 2 questions use 1 choice list."""
+        survey = {
+            "type": "survey",
+            "name": "test_name",
+            "id_string": "data",
+            "children": [
+                {
+                    "label": "Q1",
+                    "type": "select one",
+                    "name": "q1",
+                    "itemset": "c1",
+                },
+                {
+                    "label": "Q2",
+                    "type": "select one",
+                    "name": "q2",
+                    "itemset": "c1",
+                },
+            ],
+            "choices": {
+                "c1": [
+                    {"label": {"f": "fa", "e": "ea"}, "name": "a"},
+                    {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+                ]
+            },
+        }
+        self.assertPyxformXform(
+            survey=create_survey_element_from_dict(survey),
+            xml__xpath_match=[
+                # question has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_inline("select1", "q1", "Q1"),
+                xpq.model_instance_item("q2"),
+                xpq.model_instance_bind("q2", "string"),
+                xpq.body_label_inline("select1", "q2", "Q2"),
+                # itext has secondary choices instance, and labels for each language.
+                xpq.model_instance_exists("c1"),
+                xpc.model_instance_choices_itext("c1", ("a", "b")),
+                xpc.model_itext_choice_text_label_by_pos("f", "c1", ("fa", "fb")),
+                xpc.model_itext_choice_text_label_by_pos("e", "c1", ("ea", "eb")),
+            ],
         )
 
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_select_one_binding_xml
-            )
-
     def test_simple_integer_question_type_multilingual(self):
         """
         not sure how integer questions should show up.
@@ -124,11 +168,7 @@ def test_simple_integer_question_type_multilingual(self):
         self.s.add_child(q)
 
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_integer_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_integer_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_integer_binding_xml)
 
     def test_simple_date_question_type_multilingual(self):
         """
@@ -152,11 +192,7 @@ def test_simple_date_question_type_multilingual(self):
         q = create_survey_element_from_dict(simple_date_question)
         self.s.add_child(q)
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_date_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_date_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_date_binding_xml)
 
     def test_simple_phone_number_question_type_multilingual(self):
         """
@@ -199,36 +235,42 @@ def test_simple_select_all_question_multilingual(self):
         """
         not sure how select all questions should show up...
         """
-        simple_select_all_question = {
-            "label": {"f": "f choisit", "e": "e choose"},
-            "type": "select all that apply",
-            "name": "select_all_q",
-            "choices": [
-                {"label": {"f": "ff", "e": "ef"}, "name": "f"},
-                {"label": {"f": "fg", "e": "eg"}, "name": "g"},
-                {"label": {"f": "fh", "e": "eh"}, "name": "h"},
+        survey = {
+            "type": "survey",
+            "name": "test_name",
+            "id_string": "data",
+            "children": [
+                {
+                    "label": {"f": "ftext", "e": "etext"},
+                    "type": "select all that apply",
+                    "name": "q1",
+                    "itemset": "c1",
+                }
             ],
+            "choices": {
+                "c1": [
+                    {"label": {"f": "fa", "e": "ea"}, "name": "a"},
+                    {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+                ]
+            },
         }
-
-        expected_select_all_control_xml = self.config.get(
-            self.cls_name, "test_simple_select_all_question_multilingual_control"
-        )
-
-        expected_select_all_binding_xml = self.config.get(
-            self.cls_name, "test_simple_select_all_question_multilingual_binding"
-        )
-
-        q = create_survey_element_from_dict(simple_select_all_question)
-        self.s.add_child(q)
-        self.assertEqual(
-            ctw(q.xml_control(survey=self.s)), expected_select_all_control_xml
+        self.assertPyxformXform(
+            survey=create_survey_element_from_dict(survey),
+            xml__xpath_match=[
+                # question has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_itext("select", "q1"),
+                # itext has secondary choices instance, and labels for each language.
+                xpq.model_instance_exists("c1"),
+                xpq.model_itext_label("q1", "f", "ftext"),
+                xpq.model_itext_label("q1", "e", "etext"),
+                xpc.model_instance_choices_itext("c1", ("a", "b")),
+                xpc.model_itext_choice_text_label_by_pos("f", "c1", ("fa", "fb")),
+                xpc.model_itext_choice_text_label_by_pos("e", "c1", ("ea", "eb")),
+            ],
         )
 
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_select_all_binding_xml
-            )
-
     def test_simple_decimal_question_multilingual(self):
         """
         not sure how decimal should show up.
@@ -251,8 +293,4 @@ def test_simple_decimal_question_multilingual(self):
         q = create_survey_element_from_dict(simple_decimal_question)
         self.s.add_child(q)
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_decimal_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_decimal_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_decimal_binding_xml)
diff --git a/tests/test_j2x_xform_build_preparation.py b/tests/test_j2x_xform_build_preparation.py
deleted file mode 100644
index c83d7440..00000000
--- a/tests/test_j2x_xform_build_preparation.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Testing preparation of values for XForm exporting
-"""
-
-from unittest import TestCase
-
-from pyxform import MultipleChoiceQuestion, Survey
-
-
-class Json2XformExportingPrepTests(TestCase):
-    def test_dictionary_consolidates_duplicate_entries(self):
-        yes_or_no_dict_array = [
-            {"label": {"French": "Oui", "English": "Yes"}, "name": "yes"},
-            {"label": {"French": "Non", "English": "No"}, "name": "no"},
-        ]
-
-        first_yesno_question = MultipleChoiceQuestion(
-            name="yn_q1", choices=yes_or_no_dict_array, type="select one"
-        )
-        second_yesno_question = MultipleChoiceQuestion(
-            name="yn_q2", choices=yes_or_no_dict_array, type="select one"
-        )
-
-        s = Survey(name="yes_or_no_tests")
-        s.add_child(first_yesno_question)
-        s.add_child(second_yesno_question)
-
-        # begin the processes in survey.to_xml()
-        # 1. validate()
-        s.validate()
-
-        # 2. survey._build_options_list_from_descendants()
-        # options_list = s._build_options_list_from_descendants()
-        # Is this method called somewhere else now?
-
-        # desired_options_list = [first_yesno_question.children]
-
-        # todo: we need to think about whether we care about
-        # consolidating these options lists.
-        # self.assertEqual(options_list, desired_options_list)
-        # self.assertEqual(first_yesno_question._option_list_index_number, 0)
-        # self.assertEqual(second_yesno_question._option_list_index_number, 0)
diff --git a/tests/test_notes.py b/tests/test_notes.py
index 4c76991b..ba9fc95c 100644
--- a/tests/test_notes.py
+++ b/tests/test_notes.py
@@ -8,7 +8,7 @@
 from tests.xpath_helpers.questions import xpq
 
 
-@dataclass()
+@dataclass(slots=True)
 class Case:
     """
     A test case spec for note output scenarios.
diff --git a/tests/test_osm.py b/tests/test_osm.py
index 7740a21d..1f52019a 100644
--- a/tests/test_osm.py
+++ b/tests/test_osm.py
@@ -3,6 +3,8 @@
 """
 
 from tests.pyxform_test_case import PyxformTestCase
+from tests.xpath_helpers.choices import xpc
+from tests.xpath_helpers.questions import xpq
 
 expected_xml_output = """
     <upload mediatype="osm/*" ref="/osm/osm_building">
@@ -54,3 +56,120 @@ def test_osm_type_with_list_underscore_name(self):
             """,
             xml__contains=[expected_xml_output],
         )
+
+    def test_osm_type_with_select(self):
+        """Should find that the OSM tags are output in the body and selects are as normal."""
+        md = """
+        | survey  |
+        |         | type              | name      | label    |
+        |         | osm               | osm_road  | Road     |
+        |         | osm building_tags | osm_build | Building |
+        |         | select_one c1     | q1        | Q1       |
+        | osm     |
+        |         | list_name     | name      | label |
+        |         | building_tags | name      | Name  |
+        |         | building_tags | addr:city | City  |
+        | choices |
+        |         | list_name | name | label |
+        |         | c1        | n1   | l1    |
+        |         | c1        | n2   | l2    |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                # q1 has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_inline("select1", "q1", "Q1"),
+                # q1 has secondary choices instance with inline labels.
+                xpc.model_instance_choices_label("c1", (("n1", "l1"), ("n2", "l2"))),
+                # osm_road has data binding and control
+                xpq.model_instance_item("osm_road"),
+                xpq.model_instance_bind("osm_road", "binary"),
+                xpq.body_label_inline("upload", "osm_road", "Road"),
+                # osm_build has data binding and control with tags from building_tags
+                xpq.model_instance_item("osm_build"),
+                xpq.model_instance_bind("osm_build", "binary"),
+                xpq.body_label_inline("upload", "osm_build", "Building"),
+                xpq.body_upload_tags(
+                    "osm_build", (("name", "Name"), ("addr:city", "City"))
+                ),
+            ],
+            # The OSM list names aren't output anywhere (tags copied inline).
+            xml__excludes=["room_tags"],
+        )
+
+    def test_osm_type_with_multiple_lists__separate(self):
+        """Should find that the OSM tags are output for the corresponding question."""
+        md = """
+        | survey  |
+        |         | type              | name      | label    |
+        |         | osm               | osm_road  | Road     |
+        |         | osm building_tags | osm_build | Building |
+        |         | osm room_tags     | osm_room  | Room     |
+        | osm     |
+        |         | list_name     | name       | label     |
+        |         | building_tags | name       | Name      |
+        |         | building_tags | addr:city  | City      |
+        |         | room_tags     | room:type  | Type      |
+        |         | room_tags     | habitable  | Habitable |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                # osm_road has data binding and control
+                xpq.model_instance_item("osm_road"),
+                xpq.model_instance_bind("osm_road", "binary"),
+                xpq.body_label_inline("upload", "osm_road", "Road"),
+                # osm_build has data binding and control with tags from building_tags
+                xpq.model_instance_item("osm_build"),
+                xpq.model_instance_bind("osm_build", "binary"),
+                xpq.body_label_inline("upload", "osm_build", "Building"),
+                xpq.body_upload_tags(
+                    "osm_build", (("name", "Name"), ("addr:city", "City"))
+                ),
+                # osm_room has data binding and control with tags from room_tags
+                xpq.model_instance_item("osm_room"),
+                xpq.model_instance_bind("osm_room", "binary"),
+                xpq.body_label_inline("upload", "osm_room", "Room"),
+                xpq.body_upload_tags(
+                    "osm_room", (("room:type", "Type"), ("habitable", "Habitable"))
+                ),
+            ],
+            # The OSM list names aren't output anywhere (tags copied inline).
+            xml__excludes=["building_tags", "room_tags"],
+        )
+
+    def test_osm_type_with_multiple_lists__shared(self):
+        """Should find that the OSM tags are output for the corresponding question."""
+        md = """
+        | survey  |
+        |         | type          | name       | label       |
+        |         | osm room_tags | osm_resi   | Residential |
+        |         | osm room_tags | osm_office | Office      |
+        | osm     |
+        |         | list_name     | name       | label     |
+        |         | room_tags     | room:type  | Type      |
+        |         | room_tags     | habitable  | Habitable |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                # osm_resi has data binding and control with tags from room_tags
+                xpq.model_instance_item("osm_resi"),
+                xpq.model_instance_bind("osm_resi", "binary"),
+                xpq.body_label_inline("upload", "osm_resi", "Residential"),
+                xpq.body_upload_tags(
+                    "osm_resi", (("room:type", "Type"), ("habitable", "Habitable"))
+                ),
+                # osm_office has data binding and control with tags from room_tags
+                xpq.model_instance_item("osm_office"),
+                xpq.model_instance_bind("osm_office", "binary"),
+                xpq.body_label_inline("upload", "osm_office", "Office"),
+                xpq.body_upload_tags(
+                    "osm_office", (("room:type", "Type"), ("habitable", "Habitable"))
+                ),
+            ],
+            # The OSM list names aren't output anywhere (tags copied inline).
+            xml__excludes=["room_tags"],
+        )
diff --git a/tests/test_pyxform_test_case.py b/tests/test_pyxform_test_case.py
index 87fa566a..bb9788d2 100644
--- a/tests/test_pyxform_test_case.py
+++ b/tests/test_pyxform_test_case.py
@@ -4,7 +4,7 @@
 from tests.pyxform_test_case import PyxformTestCase
 
 
-@dataclass
+@dataclass(slots=True)
 class CaseData:
     xpath: str
     exact: "set[str]"
diff --git a/tests/test_pyxformtestcase.py b/tests/test_pyxformtestcase.py
index 3fb2ec3a..89b4b3ea 100644
--- a/tests/test_pyxformtestcase.py
+++ b/tests/test_pyxformtestcase.py
@@ -4,6 +4,7 @@
 """
 
 from tests.pyxform_test_case import PyxformTestCase
+from tests.xpath_helpers.settings import xps
 
 
 class PyxformTestCaseNonMarkdownSurveyAlternatives(PyxformTestCase):
@@ -63,6 +64,6 @@ def test_formid_is_not_none(self):
             |        | note | q    | Q     |
             """,
             xml__xpath_match=[
-                "/h:html/h:head/x:model/x:instance/x:test_name[@id='data']"
+                xps.form_id("data"),
             ],
         )
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 64a9078c..b74484bd 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -1,6 +1,7 @@
 from tests.pyxform_test_case import PyxformTestCase
 from tests.xpath_helpers.choices import xpc
 from tests.xpath_helpers.questions import xpq
+from tests.xpath_helpers.settings import xps
 
 
 class TestSettings(PyxformTestCase):
@@ -22,7 +23,7 @@ def test_form_title(self):
         """
         self.assertPyxformXform(
             md=md,
-            xml__xpath_match=["/h:html/h:head/h:title[.='My Form']"],
+            xml__xpath_match=[xps.form_title("My Form")],
         )
 
     def test_form_id(self):
@@ -37,9 +38,7 @@ def test_form_id(self):
         """
         self.assertPyxformXform(
             md=md,
-            xml__xpath_match=[
-                "/h:html/h:head/x:model/x:instance/x:test_name[@id='my_form']"
-            ],
+            xml__xpath_match=[xps.form_id("my_form")],
         )
 
     def test_clean_text_values__yes(self):
diff --git a/tests/test_sheet_columns.py b/tests/test_sheet_columns.py
index 3a7e708d..18ff4b9f 100644
--- a/tests/test_sheet_columns.py
+++ b/tests/test_sheet_columns.py
@@ -2,15 +2,73 @@
 Test XLSForm sheet names.
 """
 
+from collections.abc import Container
+from dataclasses import dataclass
+from unittest import skip
+
+from pyxform import constants
+from pyxform.errors import PyXFormError
+from pyxform.parsing.sheet_headers import (
+    INVALID_DUPLICATE,
+    INVALID_HEADER,
+    INVALID_MISSING_REQUIRED,
+    dealias_and_group_headers,
+    process_header,
+    process_row,
+    to_snake_case,
+)
 from pyxform.validators.pyxform import choices as vc
+from pyxform.xls2xform import convert
 
 from tests.pyxform_test_case import PyxformTestCase
 from tests.utils import prep_for_xml_contains
 from tests.xpath_helpers.choices import xpc
 from tests.xpath_helpers.questions import xpq
+from tests.xpath_helpers.settings import xps
+
+
+class TestSettingsColumns(PyxformTestCase):
+    def test_case_insensitive_form_id_form_title(self):
+        """Should find that settings column headers are case insensitive."""
+        # Motivation for case insensitivity https://github.com/XLSForm/pyxform/issues/738
+        # but also https://github.com/XLSForm/pyxform/issues/138
+        md = """
+        | settings |
+        |          | Form_ID | Form_Title |
+        |          | My Form | Welcome!   |
+        | survey |
+        |        | type  | name | label |
+        |        | text  | q1   | hello |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[xps.form_title("Welcome!"), xps.form_id("My Form")],
+        )
+
+    def test_form_id_variant(self):
+        md = """
+        | survey   |      |             |       |
+        |          | type | name        | label |
+        |          | text | member_name | name  |
+        | settings |                                   |                        |             |
+        |          | id_string                         | version                | form_id     |
+        |          | get_option_from_two_repeat_answer | vWvvk3GYzjXcJQyvTWELej | AUTO-v2-jef |
+        """
+        self.assertPyxformXform(
+            md=md,
+            # setting 'id_string' is ignored.
+            xml__xpath_match=[
+                """
+                  /h:html/h:head/x:model/x:instance/x:test_name[
+                    @id='AUTO-v2-jef'
+                    and @version='vWvvk3GYzjXcJQyvTWELej'
+                  ]
+                """
+            ],
+        )
 
 
-class InvalidSurveyColumnsTests(PyxformTestCase):
+class TestSurveyColumns(PyxformTestCase):
     """
     Invalid survey column tests
     """
@@ -94,7 +152,6 @@ def test_column_case(self):
         Ensure that column name is case insensitive
         """
         self.assertPyxformXform(
-            name="mixedcasecolumns",
             md="""
             | Survey |         |         |               |
             |        | Type    | name    | Label         |
@@ -104,8 +161,54 @@ def test_column_case(self):
             """,
         )
 
+    def test_label_caps_alternatives(self):
+        """
+        re: https://github.com/SEL-Columbia/pyxform/issues/76
+        Capitalization of 'label' column can lead to confusing errors.
+        """
+
+        self.assertPyxformXform(
+            md="""
+            | survey |      |      |       |
+            |        | type | name | label |
+            |        | note | q    | Q     |
+            """,
+            xml__xpath_match=["/h:html/h:body/x:input[./x:label='Q']"],
+        )
+        self.assertPyxformXform(
+            md="""
+            | survey |      |      |       |
+            |        | type | name | Label |
+            |        | note | q    | Q     |
+            """,
+            xml__xpath_match=["/h:html/h:body/x:input[./x:label='Q']"],
+        )
+
+    def test_calculate_alias(self):
+        self.assertPyxformXform(
+            name="calculatealias",
+            md="""
+            | survey |           |         |         |               |
+            |        | type      | name    | label   | calculate     |
+            |        | decimal   | amount  | Counter |               |
+            |        | calculate | doubled | Doubled | ${amount} * 2 |
+            """,
+        )
 
-class InvalidChoiceSheetColumnsTests(PyxformTestCase):
+    def test_missing_survey_headers(self):
+        self.assertPyxformXform(
+            md="""
+            | survey |                 |    |
+            |        | select_one list | S1 |
+            """,
+            errored=True,
+            error__contains=[
+                INVALID_MISSING_REQUIRED.format(sheet_name="survey", missing="'type'")
+            ],
+        )
+
+
+class TestChoicesColumns(PyxformTestCase):
     """
     Invalid choice sheet column tests
     """
@@ -158,7 +261,9 @@ def test_invalid_choices_sheet_fails(self):
                 ]
             ),
             errored=True,
-            error__contains=[vc.INVALID_NAME.format(row=2)],
+            error__contains=[
+                INVALID_MISSING_REQUIRED.format(sheet_name="choices", missing="'name'")
+            ],
         )
 
     def test_missing_list_name(self):
@@ -194,8 +299,24 @@ def test_clear_filename_error_message(self):
             error__contains=[error_message],
         )
 
+    def test_missing_choice_headers(self):
+        self.assertPyxformXform(
+            md="""
+            | survey  |                 |          |      |
+            |         | type            | label    | name |
+            |         | select_one list | S1       | s1   |
+            | choices |                 |          |      |
+            |         | list            | option a | a    |
+            |         | list            | option b | b    |
+            """,
+            errored=True,
+            error__contains=[
+                INVALID_MISSING_REQUIRED.format(sheet_name="choices", missing="'name'")
+            ],
+        )
 
-class AliasesTests(PyxformTestCase):
+
+class TestColumnAliases(PyxformTestCase):
     """
     Aliases Tests
     """
@@ -226,22 +347,443 @@ def test_value_and_name(self):
                 ],
             )
 
-
-''' # uncomment when re-implemented
-    # TODO: test that this fails for the correct reason
     def test_conflicting_aliased_values_raises_error(self):
-        # example:
-        # an xlsform has {"name": "q_name", "value": "q_value"}
-        # should not compile because "name" and "value" columns are aliases
-
+        """Should find that specifying a column and its alias raises an error."""
         self.assertPyxformXform(
-            # debug=True,
-            name="aliases",
             md="""
             | survey |      |        |         |            |
             |        | type | name   | value   | label      |
             |        | text | q_name | q_value | Question 1 |
             """,
             errored=True,
+            error__contains=[
+                INVALID_DUPLICATE.format(
+                    sheet_name="survey", other="name", header="value"
+                )
+            ],
+        )
+
+    # TODO: should warn about duplicate headers but requires xls2json_backends refactoring.
+    @skip
+    def test_duplicate_header_raises_error(self):
+        """Should find that specifying a column more than once raises an error."""
+        self.assertPyxformXform(
+            md="""
+            | survey |      |        |         |            |
+            |        | type | name   | name    | label      |
+            |        | text | q_name | q_value | Question 1 |
+            """,
+            errored=True,
+            error__contains=[
+                INVALID_DUPLICATE.format(sheet_name="survey", other="name", header="name")
+            ],
+        )
+
+    def test_repeat_count__jr_alias(self):
+        """Should find that the old "jr:" alias for repeat_count is recognised."""
+        md = """
+        | survey |
+        |        | type         | name    | label | jr:count |
+        |        | begin repeat | a       | 1     | 3        |
+        |        | text         | b       | 2     |          |
+        |        | end repeat   | a       |       |          |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                """/h:html/h:head/x:model/x:bind[
+                     @nodeset='/test_name/a_count' and @calculate='3'
+                   ]
+                """
+            ],
+        )
+
+
+class TestHeaderProcessing(PyxformTestCase):
+    def test_to_snake_case(self):
+        """Should find input strings are snake_cased."""
+        cases = (
+            # Lowercased
+            ("name", "name"),
+            ("NAME", "name"),
+            ("Name", "name"),
+            # Snaked
+            ("constraint_message", "constraint_message"),
+            ("constraint message", "constraint_message"),
+            # Collapse/strip spaces
+            ("constraint  message", "constraint_message"),
+            (" constraint message ", "constraint_message"),
+            # Edge cases
+            ("", ""),
+            (" ", ""),
+            ("@", "@"),
+        )
+        for case in cases:
+            with self.subTest(case=case):
+                self.assertEqual(case[1], to_snake_case(case[0]))
+
+    def test_process_header(self):
+        """Should find the header input is processed per each case expectation."""
+
+        @dataclass(slots=True)
+        class Case:
+            header: str
+            use_double_colon: bool
+            header_aliases: dict[str, str | tuple[str, ...]]
+            header_columns: Container[str]
+
+        # key is expected value, values are inputs that result in that value.
+        case_groups = {
+            # # # # # # # # # #
+            # No delimiter.
+            # # # # # # # # # #
+            ("my_col", ("my_col",)): (
+                Case("my_col", False, {}, {"my_col"}),
+                Case("my col", False, {}, {"my_col"}),
+                Case("my_Col", False, {}, {"my_col"}),
+                Case("MY Col", False, {}, {"my_col"}),
+                Case("my_col", False, {"my_col": "my_col"}, {}),
+                Case("my col", False, {"my_col": "my_col"}, {}),
+                Case("my_Col", False, {"my_col": "my_col"}, {}),
+                Case("MY Col", False, {"my_col": "my_col"}, {}),
+                # has jr: prefix is an alias.
+                Case("jr:my_col", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:my_Col", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:my col", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:MY Col", False, {"jr:my_col": "my_col"}, {}),
+            ),
+            # header_aliases is a tuple.
+            (("bind", "my_col"), ("bind", "my_col")): (
+                Case("my_col", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("my_Col", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("my col", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("MY Col", False, {"my_col": ("bind", "my_col")}, {}),
+            ),
+            # jr: prefix is an expected column.
+            ("jr:my_col", ("jr:my_col",)): (
+                Case("jr:my_col", False, {}, {"jr:my_col"}),
+                Case("jr:my_Col", False, {}, {"jr:my_col"}),
+                Case("jr:my col", False, {}, {"jr:my_col"}),
+                Case("jr:MY Col", False, {}, {"jr:my_col"}),
+            ),
+            # # # # # # # # # #
+            # Has delimiter.
+            # # # # # # # # # #
+            ("my_col", ("my_col", "English (en)")): (
+                # Has :: delimiter
+                Case("my_col::English (en)", False, {}, {"my_col"}),
+                Case("my col::English (en)", False, {}, {"my_col"}),
+                Case("my_Col::English (en)", False, {}, {"my_col"}),
+                Case("MY Col::English (en)", False, {}, {"my_col"}),
+                Case("my_col::English (en)", False, {"my_col": "my_col"}, {}),
+                Case("my col::English (en)", False, {"my_col": "my_col"}, {}),
+                Case("my_Col::English (en)", False, {"my_col": "my_col"}, {}),
+                Case("MY Col::English (en)", False, {"my_col": "my_col"}, {}),
+                # + has jr: prefix is an alias.
+                Case("jr:my_col::English (en)", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:my_Col::English (en)", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:my col::English (en)", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:MY Col::English (en)", False, {"jr:my_col": "my_col"}, {}),
+                # Has : delimiter
+                Case("my_col:English (en)", False, {}, {"my_col"}),
+                Case("my col:English (en)", False, {}, {"my_col"}),
+                Case("my_Col:English (en)", False, {}, {"my_col"}),
+                Case("MY Col:English (en)", False, {}, {"my_col"}),
+                Case("my_col:English (en)", False, {"my_col": "my_col"}, {}),
+                Case("my col:English (en)", False, {"my_col": "my_col"}, {}),
+                Case("my_Col:English (en)", False, {"my_col": "my_col"}, {}),
+                Case("MY Col:English (en)", False, {"my_col": "my_col"}, {}),
+                # + has jr: prefix is an alias.
+                Case("jr:my_col:English (en)", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:my_Col:English (en)", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:my col:English (en)", False, {"jr:my_col": "my_col"}, {}),
+                Case("jr:MY Col:English (en)", False, {"jr:my_col": "my_col"}, {}),
+            ),
+            # header_aliases is a tuple.
+            (("bind", "my_col"), ("bind", "my_col", "English (en)")): (
+                # Has :: delimiter
+                Case("my_col::English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("my_Col::English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("my col::English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("MY Col::English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+                # Has : delimiter
+                Case("my_col:English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("my_Col:English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("my col:English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+                Case("MY Col:English (en)", False, {"my_col": ("bind", "my_col")}, {}),
+            ),
+            # jr: prefix is an expected column.
+            ("jr:my_col", ("jr:my_col", "English (en)")): (
+                # Has :: delimiter
+                Case("jr:my_col::English (en)", False, {}, {"jr:my_col"}),
+                Case("jr:my_Col::English (en)", False, {}, {"jr:my_col"}),
+                Case("jr:my col::English (en)", False, {}, {"jr:my_col"}),
+                Case("jr:MY Col::English (en)", False, {}, {"jr:my_col"}),
+                # Has : delimiter
+                Case("jr:my_col:English (en)", False, {}, {"jr:my_col"}),
+                Case("jr:my_Col:English (en)", False, {}, {"jr:my_col"}),
+                Case("jr:my col:English (en)", False, {}, {"jr:my_col"}),
+                Case("jr:MY Col:English (en)", False, {}, {"jr:my_col"}),
+            ),
+            # # # # # # # # # #
+            # Unknown columns.
+            # # # # # # # # # #
+            # No delimiter.
+            ("NAME", ("NAME",)): (Case("NAME", False, {}, {}),),
+            ("NA_ME", ("NA_ME",)): (Case("NA_ME", False, {}, {}),),
+            ("NA Me", ("NA Me",)): (Case("NA Me", False, {}, {}),),
+            # Has :: delimiter.
+            ("name::English (en)", ("name", "English (en)")): (
+                Case("name::English (en)", False, {}, {}),
+            ),
+            ("NA_ME::English (en)", ("NA_ME", "English (en)")): (
+                Case("NA_ME::English (en)", False, {}, {}),
+            ),
+            ("NA Me::English (en)", ("NA Me", "English (en)")): (
+                Case("NA Me::English (en)", False, {}, {}),
+            ),
+            # Has : delimiter.
+            ("name:English (en)", ("name", "English (en)")): (
+                Case("name:English (en)", False, {}, {}),
+            ),
+            ("NA_ME:English (en)", ("NA_ME", "English (en)")): (
+                Case("NA_ME:English (en)", False, {}, {}),
+            ),
+            ("NA Me:English (en)", ("NA Me", "English (en)")): (
+                Case("NA Me:English (en)", False, {}, {}),
+            ),
+            # Has jr: prefix.
+            ("jr:NAME", ("jr:NAME",)): (Case("jr:NAME", False, {}, {}),),
+            ("jr:NA_ME", ("jr:NA_ME",)): (Case("jr:NA_ME", False, {}, {}),),
+            ("jr:NA Me", ("jr:NA Me",)): (Case("jr:NA Me", False, {}, {}),),
+        }
+        for expected, cases in case_groups.items():
+            for case in cases:
+                with self.subTest(case=(expected, case)):
+                    self.assertEqual(
+                        expected,
+                        process_header(
+                            header=case.header,
+                            use_double_colon=case.use_double_colon,
+                            header_aliases=case.header_aliases,
+                            header_columns=case.header_columns,
+                        ),
+                    )
+
+    def test_choice_filter_columns_not_normalised(self):
+        """Should find that non-XLSForm choices columns are not changed."""
+        # choice_filter expressions would break if the extra choices column headers changed.
+        md = """
+        | survey  |
+        |         | type          | name | label | choice_filter |
+        |         | text          | q0   | Q0    |               |
+        |         | select_one c1 | q1   | Q1    | ${q0} = CF or ${q0} = A_B or ${q0} = Cd or ${q0} = h-I or ${q0} = J.k |
+        | choices |
+        |         | list name | name | label | CF | A_B | Cd | e f | h-I | J.k |
+        |         | c1        | na   | la    | a1 | a2  | a3 | a4  | a5  | a6  |
+        |         | c1        | nb   | lb    | b1 | b2  | b3 | b4  | b5  | b6  |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=(
+                # Choices instance has valid header keys, but not the invalid one ("e f").
+                # The not(*[not(...)]) is to check there are no elements other than those
+                # listed, rather than selecting unwanted cases like 'e f', 'e', 'f', etc.
+                """
+                /h:html/h:head/x:model/x:instance[@id = 'c1']/x:root/x:item[
+                  ./x:name = 'na'
+                  and not(*[not(
+                    local-name()='name'
+                    or local-name()='label'
+                    or local-name()='CF'
+                    or local-name()='A_B'
+                    or local-name()='Cd'
+                    or local-name()='h-I'
+                    or local-name()='J.k'
+                  )])
+                ]
+                """,
+            ),
+            warnings__contains=(vc.INVALID_HEADER.format(column="e f"),),
+        )
+
+    def test_dealias_and_group_headers__use_double_colon_modes(self):
+        """Should find headers are split based on whether a double colon is found."""
+        cases = (
+            # No delimiter
+            (
+                [{"col1": "val1", "col2": "val2"}],
+                (("col1",), ("col2",)),
+                ({"col1": "val1", "col2": "val2"},),
+            ),
+            # Double colon before/after no delimiter
+            (
+                [{"col1::sep1": "val1", "col2": "val2"}],
+                (("col1", "sep1"), ("col2",)),
+                ({"col1": {"sep1": "val1"}, "col2": "val2"},),
+            ),
+            (
+                [{"col1": "val1", "col2::sep2": "val2"}],
+                (("col1",), ("col2", "sep2")),
+                ({"col1": "val1", "col2": {"sep2": "val2"}},),
+            ),
+            # Single colon before/after no delimiter
+            (
+                [{"col1:sep1": "val1", "col2": "val2"}],
+                (("col1", "sep1"), ("col2",)),
+                ({"col1": {"sep1": "val1"}, "col2": "val2"},),
+            ),
+            (
+                [{"col1": "val1", "col2:sep2": "val2"}],
+                (("col1",), ("col2", "sep2")),
+                ({"col1": "val1", "col2": {"sep2": "val2"}},),
+            ),
+            # Single colon before/after double
+            (
+                [{"col1:sep1": "val1", "col2::sep2": "val2"}],
+                (("col1:sep1",), ("col2", "sep2")),
+                ({"col1:sep1": "val1", "col2": {"sep2": "val2"}},),
+            ),
+            (
+                [{"col1::sep1": "val1", "col2:sep2": "val2"}],
+                (("col1", "sep1"), ("col2:sep2",)),
+                ({"col1": {"sep1": "val1"}, "col2:sep2": "val2"},),
+            ),
+            # No delimiter, jr: prefix with single/double colon
+            (
+                [{"col1": "val1", "jr:col2:sep2": "val2"}],
+                (("col1",), ("jr:col2", "sep2")),
+                ({"col1": "val1", "jr:col2": {"sep2": "val2"}},),
+            ),
+            (
+                [{"jr:col2:sep2": "val2", "col1": "val1"}],
+                (("jr:col2", "sep2"), ("col1",)),
+                ({"jr:col2": {"sep2": "val2"}, "col1": "val1"},),
+            ),
+            (
+                [{"col1": "val1", "jr:col2::sep2": "val2"}],
+                (("col1",), ("jr:col2", "sep2")),
+                ({"col1": "val1", "jr:col2": {"sep2": "val2"}},),
+            ),
+            (
+                [{"jr:col2::sep2": "val2", "col1": "val1"}],
+                (("jr:col2", "sep2"), ("col1",)),
+                ({"jr:col2": {"sep2": "val2"}, "col1": "val1"},),
+            ),
+            # Single colon, jr: prefix with single/double colon
+            (
+                [{"col1:sep1": "val1", "jr:col2:sep2": "val2"}],
+                (("col1", "sep1"), ("jr:col2", "sep2")),
+                ({"col1": {"sep1": "val1"}, "jr:col2": {"sep2": "val2"}},),
+            ),
+            (
+                [{"jr:col2:sep2": "val2", "col1:sep1": "val1"}],
+                (("jr:col2", "sep2"), ("col1", "sep1")),
+                ({"jr:col2": {"sep2": "val2"}, "col1": {"sep1": "val1"}},),
+            ),
+            (
+                [{"col1:sep1": "val1", "jr:col2::sep2": "val2"}],
+                (("col1:sep1",), ("jr:col2", "sep2")),
+                ({"col1:sep1": "val1", "jr:col2": {"sep2": "val2"}},),
+            ),
+            (
+                [{"jr:col2::sep2": "val2", "col1:sep1": "val1"}],
+                (("jr:col2", "sep2"), ("col1:sep1",)),
+                ({"jr:col2": {"sep2": "val2"}, "col1:sep1": "val1"},),
+            ),
+            ## Double colon, jr: prefix with single/double colon
+            (
+                [{"col1::sep1": "val1", "jr:col2:sep2": "val2"}],
+                (("col1", "sep1"), ("jr:col2:sep2",)),
+                ({"col1": {"sep1": "val1"}, "jr:col2:sep2": "val2"},),
+            ),
+            (
+                [{"jr:col2:sep2": "val2", "col1::sep1": "val1"}],
+                (("jr:col2:sep2",), ("col1", "sep1")),
+                ({"jr:col2:sep2": "val2", "col1": {"sep1": "val1"}},),
+            ),
+            (
+                [{"col1::sep1": "val1", "jr:col2::sep2": "val2"}],
+                (("col1", "sep1"), ("jr:col2", "sep2")),
+                ({"col1": {"sep1": "val1"}, "jr:col2": {"sep2": "val2"}},),
+            ),
+            (
+                [{"jr:col2::sep2": "val2", "col1::sep1": "val1"}],
+                (("jr:col2", "sep2"), ("col1", "sep1")),
+                ({"jr:col2": {"sep2": "val2"}, "col1": {"sep1": "val1"}},),
+            ),
+        )
+        for i, case in enumerate(cases):
+            with self.subTest((i, case[0])):
+                observed = dealias_and_group_headers(
+                    sheet_name="test",
+                    sheet_data=case[0],
+                    sheet_header=[{k: None for k in case[0][0]}],
+                    header_aliases={},
+                    header_columns=set(),
+                )
+                self.assertEqual(case[1], observed.headers)
+                self.assertEqual(case[2], observed.data)
+
+    def test_process_row__bad_header_info__unit(self):
+        """Should raise an error if incomplete header info is provided."""
+        # Unit test for bad sheet_header info received.
+        with self.assertRaises(PyXFormError) as err:
+            process_row(
+                sheet_name="survey",
+                row={"a": "b", "c": "d", "e": "f"},
+                header_key={"a": ("a",), "c": ("b", "z")},
+                default_language=constants.DEFAULT_LANGUAGE_VALUE,
+            )
+        self.assertEqual(
+            INVALID_HEADER.format(sheet_name="survey", header="e"),
+            err.exception.args[0],
+        )
+
+    def test_process_row__bad_header_info__dict(self):
+        """Should raise an error if incomplete header info is provided."""
+        # For dict input, sheet_header guess takes first 100 rows, so additional keys in
+        # rows beyond that would trigger the error.
+        survey_data = [
+            {"type": "text", "name": f"q{i}", "label": f"Q{i}"} for i in range(100)
+        ]
+        survey_data.append({"type": "text", "name": "q101", "label": "Q101", "e": "?"})
+        with self.assertRaises(PyXFormError) as err:
+            convert(xlsform={"survey": survey_data})
+        self.assertEqual(
+            INVALID_HEADER.format(sheet_name="survey", header="e"),
+            err.exception.args[0],
+        )
+
+    def test_process_row__bad_header_info__markdown(self):
+        """Should raise an error if incomplete header info is provided."""
+        # For markdown (or other) input, could get an error if no header is provided. The
+        # expected error "None" is due to the first empty cell: "|        |".
+        header = """
+        | survey |
+        """
+        question = """
+        |        | text | q{i} | Q{i}  | {e} |
+        """
+        questions = "\n".join(question.format(i=i, e="") for i in range(100))
+        md = "".join((header, questions, question.format(i=101, e="?")))
+        self.assertPyxformXform(
+            md=md,
+            errored=True,
+            error__contains=INVALID_HEADER.format(sheet_name="survey", header="None"),
         )
-'''
+
+    def test_process_row__bad_header_info__happy_path(self):
+        """Should not raise an error if complete header info is provided."""
+        # For markdown (or other) input, normally no error due to header gathered during
+        # backend processing of properly formatted XLSForms.
+        header = """
+        | survey |
+        |        | type | name | label | e   |
+        """
+        question = """
+        |        | text | q{i} | Q{i}  | {e} |
+        """
+        questions = "\n".join(question.format(i=i, e="") for i in range(100))
+        md = "".join((header, questions, question.format(i=101, e="?")))
+        self.assertPyxformXform(md=md)
diff --git a/tests/test_survey.py b/tests/test_survey.py
index d38f3f82..634f61e6 100644
--- a/tests/test_survey.py
+++ b/tests/test_survey.py
@@ -1,3 +1,6 @@
+from pyxform.question import InputQuestion
+from pyxform.survey import Survey
+
 from tests.pyxform_test_case import PyxformTestCase
 
 
@@ -65,3 +68,20 @@ def test_autoplay_attribute_added_to_question_body_control(self):
                 """
             ],
         )
+
+    def test_xpath_dict_initialised_once(self):
+        """Should be able to convert a valid form to XML repeatedly with the same result."""
+        s = Survey(name="guest_list")
+        s.add_children(
+            [
+                InputQuestion(name="q1", type="text", label="Your first name?"),
+                InputQuestion(name="q2", type="text", label="${q1}, last name?"),
+            ]
+        )
+        s._setup_xpath_dictionary()
+        # If the dict is re-initialised, "duplicate" elements will be found, which
+        # results in the value being set to None.
+        self.assertFalse(any(i for i in s._xpath.values() if i is None))
+        # Due to the pyxform reference, _var_repl_function() raises an error for None,
+        # so calling to_xml() twice would trigger a "duplicates" error.
+        self.assertEqual(s.to_xml(validate=False), s.to_xml(validate=False))
diff --git a/tests/test_translations.py b/tests/test_translations.py
index 0a44b887..1699924e 100644
--- a/tests/test_translations.py
+++ b/tests/test_translations.py
@@ -15,14 +15,16 @@
     OR_OTHER_WARNING,
     format_missing_translations_msg,
 )
+from pyxform.xls2json_backends import SupportedFileTypes
 from pyxform.xls2xform import convert
 
 from tests.pyxform_test_case import PyxformTestCase
 from tests.xpath_helpers.choices import xpc
 from tests.xpath_helpers.questions import xpq
+from tests.xpath_helpers.settings import xps
 
 
-@dataclass()
+@dataclass(slots=True)
 class XPathHelper:
     """
     XPath expressions for translations-related assertions.
@@ -31,27 +33,6 @@ class XPathHelper:
     question_type: str
     question_name: str
 
-    @staticmethod
-    def language_is_default(lang):
-        """The language translation has itext and is marked as the default."""
-        return f"""
-        /h:html/h:head/x:model/x:itext/x:translation[@default='true()' and @lang='{lang}']
-        """
-
-    @staticmethod
-    def language_is_not_default(lang):
-        """The language translation has itext and is not marked as the default."""
-        return f"""
-        /h:html/h:head/x:model/x:itext/x:translation[not(@default='true()') and @lang='{lang}']
-        """
-
-    @staticmethod
-    def language_no_itext(lang):
-        """The language translation has no itext."""
-        return f"""
-        /h:html/h:head/x:model/x:itext[not(descendant::x:translation[@lang='{lang}'])]
-        """
-
     def question_label_in_body(self, label):
         """The Question label value is in the body."""
         return f"""
@@ -208,9 +189,9 @@ def test_double_colon_translations(self):
                 xp.question_label_references_itext(),
                 xp.question_itext_label("english (en)", "hello"),
                 xp.question_itext_label("french (fr)", "bonjour"),
-                xp.language_is_not_default("english (en)"),
-                xp.language_is_not_default("french (fr)"),
-                xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("english (en)"),
+                xps.language_is_not_default("french (fr)"),
+                xps.language_no_itext(DEFAULT_LANG),
                 # Expected model binding found.
                 """/h:html/h:head/x:model
                      /x:bind[@nodeset='/test_name/n1' and @readonly='true()' and @type='string']
@@ -276,9 +257,9 @@ def test_missing_media_itext(self):
                 xpc.model_itext_choice_text_label_by_pos(
                     "Kyrgyz", "yn", ("Нет (ky)", "Да (ky)")
                 ),
-                xp.language_is_default(DEFAULT_LANG),
-                xp.language_is_not_default("Russian"),
-                xp.language_is_not_default("Kyrgyz"),
+                xps.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("Russian"),
+                xps.language_is_not_default("Kyrgyz"),
             ],
         )
 
@@ -303,18 +284,18 @@ def test_missing_translation__one_lang_all_cols(self):
                 DEFAULT_LANG: (
                     "hint",
                     "guidance_hint",
-                    "media::image",
-                    "media::video",
-                    "media::audio",
+                    "image",
+                    "video",
+                    "audio",
                     "constraint_message",
                     "required_message",
                 )
             },
             CHOICES: {
                 DEFAULT_LANG: (
-                    "media::image",
-                    "media::video",
-                    "media::audio",
+                    "image",
+                    "video",
+                    "audio",
                 )
             },
         }
@@ -378,8 +359,8 @@ def test_missing_translation__one_lang_all_cols(self):
                         ),
                     ),
                 ),
-                xp.language_is_default(DEFAULT_LANG),
-                xp.language_is_not_default("eng"),
+                xps.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
             ],
         )
         # default_language set case
@@ -389,8 +370,8 @@ def test_missing_translation__one_lang_all_cols(self):
             # warnings__contains=[warning],
             xml__xpath_match=[
                 *common_xpaths,
-                xp.language_is_default("eng"),
-                xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_default("eng"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -403,11 +384,11 @@ def test_missing_translations_check_performance(self):
         with 2 choices each, average of 10 runs (seconds), with and without the check,
         per question:
         | num   | with   | without | peak RSS MB |
-        |   500 | 0.7427 |  0.8133 |          77 |
-        |  1000 | 1.7908 |  1.7777 |          94 |
-        |  2000 | 5.6719 |  4.8387 |         141 |
-        |  5000 | 20.452 |  19.502 |         239 |
-        | 10000 | 70.871 |  62.106 |         416 |
+        |   500 | 0.6467 |  0.5648 |          77 |
+        |  1000 | 1.1448 |  1.2868 |          94 |
+        |  2000 | 2.3626 |  2.1485 |         129 |
+        |  5000 | 5.9631 |  5.7911 |         247 |
+        | 10000 | 11.404 |  11.399 |         423 |
         """
         survey_header = """
         | survey |                 |        |                    |                   |
@@ -425,7 +406,7 @@ def test_missing_translations_check_performance(self):
         |         | c{i}        | nb   | lb-d  | lb-e       |
         """
         process = Process(getpid())
-        for count in (500, 1000, 2000):
+        for count in (500, 1000, 2000, 5000, 10000):
             questions = "\n".join(question.format(i=i) for i in range(count))
             choice_lists = "\n".join(choice_list.format(i=i) for i in range(count))
             md = "".join((survey_header, questions, choices_header, choice_lists))
@@ -436,7 +417,7 @@ def run(name, case):
                 peak_memory_usage = process.memory_info().rss
                 while runs < 10:
                     start = perf_counter()
-                    convert(xlsform=case)
+                    convert(xlsform=case, file_type=SupportedFileTypes.md.value)
                     results.append(perf_counter() - start)
                     peak_memory_usage = max(process.memory_info().rss, peak_memory_usage)
                     runs += 1
@@ -636,7 +617,7 @@ def test_no_default__no_translation__label_and_hint_with_image(self):
                 self.xp.question_hint_in_body("salutation"),
                 self.xp.question_no_itext_hint(DEFAULT_LANG, "salutation"),
                 self.xp.question_itext_form(DEFAULT_LANG, "image", "greeting.jpg"),
-                self.xp.language_is_default(DEFAULT_LANG),
+                xps.language_is_default(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -657,7 +638,7 @@ def test_no_default__no_translation__label_and_hint_with_guidance(self):
                 self.xp.question_hint_references_itext(),
                 self.xp.question_itext_hint(DEFAULT_LANG, "salutation"),
                 self.xp.question_itext_form(DEFAULT_LANG, "guidance", "greeting"),
-                self.xp.language_is_default(DEFAULT_LANG),
+                xps.language_is_default(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -682,7 +663,7 @@ def test_no_default__no_translation__label_and_hint_all_cols(self):
                 self.xp.question_itext_form(DEFAULT_LANG, "audio", "greeting.mp3"),
                 self.xp.constraint_msg_in_bind("check me"),
                 self.xp.required_msg_in_bind("mandatory"),
-                self.xp.language_is_default(DEFAULT_LANG),
+                xps.language_is_default(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -699,7 +680,7 @@ def test_no_default__no_translation__image_with_big_image(self):
             xml__xpath_match=[
                 self.xp.question_itext_form(DEFAULT_LANG, "image", "greeting.jpg"),
                 self.xp.question_itext_form(DEFAULT_LANG, "big-image", "greeting.jpg"),
-                self.xp.language_is_default(DEFAULT_LANG),
+                xps.language_is_default(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -719,8 +700,8 @@ def test_no_default__one_translation__label_and_hint(self):
                 self.xp.question_hint_references_itext(),
                 self.xp.question_itext_hint("eng(en)", "salutation"),
                 # TODO: is this a bug? Only one language but not marked default.
-                self.xp.language_is_not_default("eng(en)"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("eng(en)"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -740,8 +721,8 @@ def test_no_default__one_translation__label_and_hint_with_image(self):
                 self.xp.question_hint_references_itext(),
                 self.xp.question_itext_hint("eng(en)", "salutation"),
                 self.xp.question_itext_form("eng(en)", "image", "greeting.jpg"),
-                self.xp.language_is_not_default("eng(en)"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("eng(en)"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -761,8 +742,8 @@ def test_no_default__one_translation__label_and_hint_with_guidance(self):
                 self.xp.question_hint_references_itext(),
                 self.xp.question_itext_hint("eng(en)", "salutation"),
                 self.xp.question_itext_form("eng(en)", "guidance", "greeting"),
-                self.xp.language_is_not_default("eng(en)"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("eng(en)"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -790,8 +771,8 @@ def test_no_default__one_translation__label_and_hint_all_cols(self):
                 self.xp.constraint_msg_itext("eng(en)", "check me"),
                 self.xp.required_msg_references_itext(),
                 self.xp.required_msg_itext("eng(en)", "mandatory"),
-                self.xp.language_is_not_default("eng(en)"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("eng(en)"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
             warnings_count=0,
         )
@@ -812,8 +793,8 @@ def test_missing_translation__one_lang_simple__warn__no_default(self):
                 self.xp.question_itext_label(DEFAULT_LANG, "hello"),
                 self.xp.question_itext_label("eng(en)", "hi there"),
                 self.xp.question_hint_in_body("salutation"),
-                self.xp.language_is_default(DEFAULT_LANG),
-                self.xp.language_is_not_default("eng(en)"),
+                xps.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("eng(en)"),
             ],
         )
 
@@ -836,9 +817,9 @@ def test_missing_translation__one_lang_simple__warn__default(self):
                 self.xp.question_itext_label("eng", "hi there"),
                 self.xp.question_hint_in_body("salutation"),
                 self.xp.question_no_itext_hint("eng", "salutation"),
-                self.xp.language_is_default("eng"),
+                xps.language_is_default("eng"),
                 # TODO: bug - missing default lang translatable/itext values.
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -854,10 +835,10 @@ def test_missing_translation__one_lang_all_cols__warn__no_default(self):
                 DEFAULT_LANG: (
                     "hint",
                     "guidance_hint",
-                    "media::image",
-                    "media::big-image",
-                    "media::video",
-                    "media::audio",
+                    "image",
+                    "big-image",
+                    "video",
+                    "audio",
                     "constraint_message",
                     "required_message",
                 )
@@ -890,8 +871,8 @@ def test_missing_translation__one_lang_all_cols__warn__no_default(self):
                 self.xp.required_msg_references_itext(),
                 self.xp.required_msg_itext("eng", "mandatory"),
                 self.xp.required_msg_itext(DEFAULT_LANG, "-"),
-                self.xp.language_is_default(DEFAULT_LANG),
-                self.xp.language_is_not_default("eng"),
+                xps.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
             ],
         )
 
@@ -932,12 +913,12 @@ def test_missing_translation__one_lang_all_cols__warn__default(self):
                 self.xp.question_itext_form("eng", "image", "greeting.jpg"),
                 self.xp.question_itext_form("eng", "video", "greeting.mkv"),
                 self.xp.question_itext_form("eng", "audio", "greeting.mp3"),
-                self.xp.language_is_default("eng"),
+                xps.language_is_default("eng"),
                 self.xp.constraint_msg_references_itext(),
                 self.xp.constraint_msg_itext("eng", "check me"),
                 self.xp.required_msg_references_itext(),
                 self.xp.required_msg_itext("eng", "mandatory"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -960,8 +941,8 @@ def test_missing_translation__one_lang_overlap__warn__no_default(self):
                 self.xp.question_hint_in_body("salutation"),
                 self.xp.question_no_itext_hint("eng", "salutation"),
                 self.xp.question_no_itext_hint(DEFAULT_LANG, "salutation"),
-                self.xp.language_is_not_default("eng"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -986,8 +967,8 @@ def test_missing_translation__one_lang_overlap__warn__default(self):
                 self.xp.question_hint_in_body("salutation"),
                 self.xp.question_no_itext_hint(DEFAULT_LANG, "salutation"),
                 self.xp.question_no_itext_hint("eng", "salutation"),
-                self.xp.language_is_default("eng"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_default("eng"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -1011,9 +992,9 @@ def test_missing_translation__two_lang__warn__no_default(self):
                 # Output of a dash for empty translation is not a bug, it's a reminder /
                 # placeholder since XForms spec requires a value for every translation.
                 self.xp.question_itext_hint("french", "-"),
-                self.xp.language_is_not_default("eng"),
-                self.xp.language_is_not_default("french"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
+                xps.language_is_not_default("french"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -1038,9 +1019,9 @@ def test_missing_translation__two_lang__warn__default(self):
                 self.xp.question_hint_references_itext(),
                 self.xp.question_itext_hint("eng", "salutation"),
                 self.xp.question_itext_hint("french", "-"),
-                self.xp.language_is_default("eng"),
-                self.xp.language_is_not_default("french"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_default("eng"),
+                xps.language_is_not_default("french"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -1055,7 +1036,7 @@ def test_missing_translation__issue_157__warn__no_default(self):
             _in={
                 SURVEY: {
                     "default": ("hint", "label"),
-                    "french": ("media::image",),
+                    "french": ("image",),
                 }
             }
         )
@@ -1071,8 +1052,8 @@ def test_missing_translation__issue_157__warn__no_default(self):
                 self.xp.question_itext_hint(DEFAULT_LANG, "-"),
                 self.xp.question_no_itext_form("french", "audio", "greeting.mp3"),
                 self.xp.question_itext_form(DEFAULT_LANG, "image", "greeting.jpg"),
-                self.xp.language_is_not_default("french"),
-                self.xp.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("french"),
+                xps.language_is_default(DEFAULT_LANG),
             ],
         )
 
@@ -1087,7 +1068,7 @@ def test_missing_translation__issue_157__warn__default(self):
         |        | note | n1   | bonjour       | salutation   | greeting.jpg |
         """
         warning = format_missing_translations_msg(
-            _in={SURVEY: {"default": ("hint", "label"), "french": ("media::image",)}}
+            _in={SURVEY: {"default": ("hint", "label"), "french": ("image",)}}
         )
         self.assertPyxformXform(
             md=md,
@@ -1098,9 +1079,9 @@ def test_missing_translation__issue_157__warn__default(self):
                 self.xp.question_hint_references_itext(),
                 self.xp.question_itext_hint("french", "salutation"),
                 self.xp.question_itext_form("french", "image", "greeting.jpg"),
-                self.xp.language_is_default("french"),
+                xps.language_is_default("french"),
                 # TODO: bug - missing default lang translatable/itext values.
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
             warnings__not_contains=[OR_OTHER_WARNING],
         )
@@ -1234,9 +1215,7 @@ def test_missing_translation__one_lang_simple__warn__no_default(self):
         |         | c1        | na   | la-d  | la-e       | la-d.mp3     |
         |         | c1        | nb   | lb-d  | lb-e       | lb-d.mp3     |
         """
-        warning = format_missing_translations_msg(
-            _in={CHOICES: {"eng": ("media::audio",)}}
-        )
+        warning = format_missing_translations_msg(_in={CHOICES: {"eng": ("audio",)}})
         self.assertPyxformXform(
             md=md,
             warnings__contains=[warning],
@@ -1252,8 +1231,8 @@ def test_missing_translation__one_lang_simple__warn__no_default(self):
                     DEFAULT_LANG, "c1", self.forms__l_audio
                 ),
                 xpc.model_no_itext_choice_media_by_pos("eng", "c1", self.forms__l_audio),
-                self.xp.language_is_default(DEFAULT_LANG),
-                self.xp.language_is_not_default("eng"),
+                xps.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
             ],
         )
 
@@ -1271,7 +1250,7 @@ def test_missing_translation__one_lang_simple__warn__default(self):
         |         | c1        | na   | la-e       | la-d.mp3     |
         |         | c1        | nb   | lb-e       | lb-d.mp3     |
         """
-        cols = {CHOICES: {"default": ("label",), "eng": ("media::audio",)}}
+        cols = {CHOICES: {"default": ("label",), "eng": ("audio",)}}
         warning = format_missing_translations_msg(_in=cols)
         self.assertPyxformXform(
             md=md,
@@ -1282,9 +1261,9 @@ def test_missing_translation__one_lang_simple__warn__default(self):
                 xpc.model_instance_choices_itext("c1", ("na", "nb")),
                 xpc.model_itext_choice_text_label_by_pos("eng", "c1", ("la-e", "lb-e")),
                 xpc.model_itext_choice_media_by_pos("eng", "c1", self.forms__l_audio),
-                self.xp.language_is_default("eng"),
+                xps.language_is_default("eng"),
                 # TODO: bug - missing default lang translatable/itext values.
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -1299,16 +1278,7 @@ def test_missing_translation__one_lang_all_cols__warn__no_default(self):
         |         | c1        | na   | la-d  | la-e       | la-d.mp3          | la-d.jpg          | la-d.jpg              | la-d.mkv          |
         |         | c1        | nb   | lb-d  | lb-e       | lb-d.mp3          | lb-d.jpg          | lb-d.jpg              | lb-d.mkv          |
         """
-        cols = {
-            CHOICES: {
-                DEFAULT_LANG: (
-                    "media::image",
-                    "media::big-image",
-                    "media::video",
-                    "media::audio",
-                )
-            }
-        }
+        cols = {CHOICES: {DEFAULT_LANG: ("image", "big-image", "video", "audio")}}
         warning = format_missing_translations_msg(_in=cols)
         self.assertPyxformXform(
             md=md,
@@ -1357,8 +1327,8 @@ def test_missing_translation__one_lang_all_cols__warn__no_default(self):
                         ),
                     ),
                 ),
-                self.xp.language_is_default(DEFAULT_LANG),
-                self.xp.language_is_not_default("eng"),
+                xps.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
             ],
         )
 
@@ -1413,8 +1383,8 @@ def test_missing_translation__one_lang_all_cols__warn__default(self):
                         ),
                     ),
                 ),
-                self.xp.language_is_default("eng"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_default("eng"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -1430,7 +1400,7 @@ def test_missing_translation__one_lang_overlap__warn__no_default(self):
         |         | c1        | nb   | lb-e       | lb-d.mp3     |
         """
         warning = format_missing_translations_msg(
-            _in={CHOICES: {"eng": ("media::audio",), "default": ("label",)}}
+            _in={CHOICES: {"eng": ("audio",), "default": ("label",)}}
         )
         self.assertPyxformXform(
             md=md,
@@ -1447,8 +1417,8 @@ def test_missing_translation__one_lang_overlap__warn__no_default(self):
                     DEFAULT_LANG, "c1", self.forms__l_audio
                 ),
                 xpc.model_no_itext_choice_media_by_pos("eng", "c1", self.forms__l_audio),
-                self.xp.language_is_default(DEFAULT_LANG),
-                self.xp.language_is_not_default("eng"),
+                xps.language_is_default(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
             ],
         )
 
@@ -1467,7 +1437,7 @@ def test_missing_translation__one_lang_overlap__warn__default(self):
         |         | c1        | nb   | lb-e       | lb-d.mp3     |
         """
         warning = format_missing_translations_msg(
-            _in={CHOICES: {"eng": ("media::audio",), "default": ("label",)}}
+            _in={CHOICES: {"eng": ("audio",), "default": ("label",)}}
         )
         self.assertPyxformXform(
             md=md,
@@ -1479,8 +1449,8 @@ def test_missing_translation__one_lang_overlap__warn__default(self):
                 xpc.model_itext_choice_text_label_by_pos("eng", "c1", ("la-e", "lb-e")),
                 # TODO: is this a bug? Default audio gets merged into eng hint.
                 xpc.model_itext_choice_media_by_pos("eng", "c1", self.forms__l_audio),
-                self.xp.language_is_default("eng"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_default("eng"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -1495,9 +1465,7 @@ def test_missing_translation__two_lang__warn__no_default(self):
         |         | c1        | na   | la-e       | la-f          | la-d.mp3          |
         |         | c1        | nb   | lb-e       | lb-f          | lb-d.mp3          |
         """
-        warning = format_missing_translations_msg(
-            _in={CHOICES: {"french": ("media::audio",)}}
-        )
+        warning = format_missing_translations_msg(_in={CHOICES: {"french": ("audio",)}})
         self.assertPyxformXform(
             md=md,
             warnings__contains=[warning],
@@ -1513,9 +1481,9 @@ def test_missing_translation__two_lang__warn__no_default(self):
                 xpc.model_no_itext_choice_media_by_pos(
                     "french", "c1", self.forms__l_audio
                 ),
-                self.xp.language_is_not_default("eng"),
-                self.xp.language_is_not_default("french"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_not_default("eng"),
+                xps.language_is_not_default("french"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
         )
 
@@ -1533,9 +1501,7 @@ def test_missing_translation__two_lang__warn__default(self):
         |         | c1        | na   | la-e       | la-f          | la-d.mp3          |
         |         | c1        | nb   | lb-e       | lb-f          | lb-d.mp3          |
         """
-        warning = format_missing_translations_msg(
-            _in={CHOICES: {"french": ("media::audio",)}}
-        )
+        warning = format_missing_translations_msg(_in={CHOICES: {"french": ("audio",)}})
         self.assertPyxformXform(
             md=md,
             warnings__contains=[warning],
@@ -1551,9 +1517,9 @@ def test_missing_translation__two_lang__warn__default(self):
                 xpc.model_no_itext_choice_media_by_pos(
                     "french", "c1", self.forms__l_audio
                 ),
-                self.xp.language_is_default("eng"),
-                self.xp.language_is_not_default("french"),
-                self.xp.language_no_itext(DEFAULT_LANG),
+                xps.language_is_default("eng"),
+                xps.language_is_not_default("french"),
+                xps.language_no_itext(DEFAULT_LANG),
             ],
             warnings__not_contains=[OR_OTHER_WARNING],
         )
@@ -1950,7 +1916,7 @@ def test_specify_other__choice_filter(self):
         md = """
         | survey  |                        |       |            |
         |         | type                   | name  | label      | choice_filter |
-        |         | input                  | q0    | Question 0 |               |
+        |         | text                   | q0    | Question 0 |               |
         |         | select_one c1 or_other | q1    | Question 1 | ${q0} = cf    |
         | choices |           |      |       |
         |         | list name | name | label | cf |
diff --git a/tests/test_xform2json.py b/tests/test_xform2json.py
index 3c78dbb1..19b353f4 100644
--- a/tests/test_xform2json.py
+++ b/tests/test_xform2json.py
@@ -49,9 +49,9 @@ def test_load_from_dump(self):
         for filename, survey in self.surveys.items():
             with self.subTest(msg=filename):
                 survey.json_dump()
-                expected = survey.to_xml(pretty_print=False)
+                expected = survey.to_xml(validate=False, pretty_print=False)
                 survey_from_dump = create_survey_element_from_xml(expected)
-                observed = survey_from_dump.to_xml(pretty_print=False)
+                observed = survey_from_dump.to_xml(validate=False, pretty_print=False)
                 self.assertXFormEqual(expected, observed)
 
     def tearDown(self):
diff --git a/tests/test_xls2json.py b/tests/test_xls2json.py
index 93f7d015..302f9d63 100644
--- a/tests/test_xls2json.py
+++ b/tests/test_xls2json.py
@@ -1,7 +1,7 @@
 import os
 
 import psutil
-from pyxform.xls2json_backends import md_table_to_workbook, xlsx_to_dict
+from pyxform.xls2json_backends import get_xlsform, md_table_to_workbook
 from pyxform.xls2xform import get_xml_path, xls2xform_convert
 
 from tests import example_xls, test_output
@@ -40,6 +40,9 @@
 | {name}   |           |           |       |
 |          | type      | name      | label |
 |          | text      | q1        | Q1    |
+| settings |
+|          | form_title |
+|          | My Form    |
 """
 
 
@@ -330,7 +333,7 @@ def test_workbook_to_json__misspelled_found__survey_multiple(self):
                 self.err_survey_required,
                 self.err_similar_found,
                 "'surveys'",
-                "'surve'",
+                "'Surve'",
             ],
         )
 
@@ -612,8 +615,8 @@ def test_xls2xform_convert__e2e_with_extra_columns__does_not_use_excessive_memor
 
     def test_xlsx_to_dict__extra_sheet_names_are_returned_by_parser(self):
         """Should return all sheet names so that later steps can do spellcheck."""
-        d = xlsx_to_dict(os.path.join(example_xls.PATH, "extra_sheet_names.xlsx"))
-        self.assertIn("survey", d)
-        self.assertIn("my_sheet", d)
-        self.assertIn("stettings", d)
-        self.assertIn("choices", d)
+        d = get_xlsform(os.path.join(example_xls.PATH, "extra_sheet_names.xlsx"))
+        self.assertIn("survey", d.sheet_names)
+        self.assertIn("my_sheet", d.sheet_names)
+        self.assertIn("stettings", d.sheet_names)
+        self.assertIn("choices", d.sheet_names)
diff --git a/tests/test_xls2json_backends.py b/tests/test_xls2json_backends.py
index 6ad6cf53..6604cd52 100644
--- a/tests/test_xls2json_backends.py
+++ b/tests/test_xls2json_backends.py
@@ -4,11 +4,15 @@
 
 import datetime
 import os
-from unittest import TestCase
 
 import openpyxl
 import xlrd
+from pyxform.builder import create_survey_element_from_dict
+from pyxform.xls2json import workbook_to_json
 from pyxform.xls2json_backends import (
+    csv_to_dict,
+    get_xlsform,
+    md_to_dict,
     xls_to_dict,
     xls_value_to_unicode,
     xlsx_to_dict,
@@ -16,13 +20,20 @@
 )
 
 from tests import bug_example_xls, utils
+from tests.pyxform_test_case import PyxformTestCase
+from tests.xpath_helpers.choices import xpc
+from tests.xpath_helpers.entities import xpe
+from tests.xpath_helpers.questions import xpq
+from tests.xpath_helpers.settings import xps
 
 
-class TestXLS2JSONBackends(TestCase):
+class TestXLS2JSONBackends(PyxformTestCase):
     """
     Test xls2json_backends module.
     """
 
+    maxDiff = None
+
     def test_xls_value_to_unicode(self):
         """
         Test external choices sheet with numeric values is processed successfully.
@@ -58,27 +69,115 @@ def test_xlsx_value_to_str(self):
     def test_defusedxml_enabled(self):
         self.assertTrue(openpyxl.DEFUSEDXML)
 
+    def test_case_insensitivity(self):
+        """Should find all input types are case-insensitive for sheet names and headers."""
+        # Exhaustive matches for XLSForm content to check all sheets / headers work.
+        xml__xpath_match = [
+            # survey
+            xpq.model_instance_item("q1"),
+            xpq.model_itext_label("q1", "EN", "Are you good?"),
+            xpq.body_control("q1", "select1"),
+            xpq.model_instance_item("q2"),
+            xpq.model_itext_label("q2", "EN", "Where are you?"),
+            xpq.body_control("q2", "input"),
+            xpq.model_instance_item("q3"),
+            xpq.model_itext_label("q3", "EN", "Where exactly?"),
+            xpq.body_control("q3", "upload"),
+            # choices
+            xpc.model_instance_choices_itext("c1", ("n1-c", "n2-c")),
+            xpc.model_itext_choice_text_label_by_pos("EN", "c1", ("l1-c", "l2-c")),
+            # settings
+            xps.form_title("Yes or no"),
+            xps.form_id("YesNo"),
+            xps.language_is_default("EN"),
+            # external choices
+            """
+            /h:html/h:body/x:input[
+            @ref='/test_name/q2'
+            and @query="instance('c1')/root/item[YES_NO= /test_name/q1 ]"
+            ]
+            """,
+            # entities
+            xpe.model_instance_dataset("e1"),
+            xpe.model_bind_label("l1"),
+            # osm
+            xpq.body_upload_tags("q3", (("n1-o", "l1-o"), ("n2-o", "l2-o"))),
+        ]
+        file_types = [".xlsx", ".xls", ".csv", ".md"]
+        for file_type in file_types:
+            with self.subTest(msg=file_type):
+                data = get_xlsform(
+                    xlsform=utils.path_to_text_fixture(f"case_insensitivity{file_type}"),
+                )
+                # All sheets recognised.
+                for attr in data.__slots__:
+                    self.assertIsNotNone(getattr(data, attr))
+                # Expected original sheet_names - needed for spellchecks.
+                self.assertEqual(
+                    [
+                        "SURVEY",
+                        "CHOICES",
+                        "SETTINGS",
+                        "EXTERNAL_CHOICES",
+                        "ENTITIES",
+                        "OSM",
+                    ],
+                    data.sheet_names,
+                )
+                # Headers stripped, but not split or lower-cased yet, since this requires
+                # more complex logic that is part of workbook_to_json.
+                self.assertEqual(
+                    ["TYPE", "NAME", "LABEL::EN", "CHOICE_FILTER"],
+                    list(data.survey_header[0].keys()),
+                )
+                self.assertEqual(
+                    ["LIST_NAME", "NAME", "LABEL::EN"],
+                    list(data.choices_header[0].keys()),
+                )
+                self.assertEqual(
+                    ["FORM_TITLE", "FORM_ID", "DEFAULT_LANGUAGE"],
+                    list(data.settings_header[0].keys()),
+                )
+                self.assertEqual(
+                    ["LIST_NAME", "NAME", "LABEL", "YES_NO"],
+                    list(data.external_choices_header[0].keys()),
+                )
+                self.assertEqual(
+                    ["DATASET", "LABEL"], list(data.entities_header[0].keys())
+                )
+                self.assertEqual(
+                    ["LIST_NAME", "NAME", "LABEL"], list(data.osm_header[0].keys())
+                )
+                # All columns recognised.
+                pyxform = workbook_to_json(workbook_dict=data, form_name="test_name")
+                self.assertPyxformXform(
+                    survey=create_survey_element_from_dict(pyxform),
+                    xml__xpath_match=xml__xpath_match,
+                )
+
     def test_equivalency(self):
+        """Should get the same data from equivalent files using each file type reader."""
         equivalent_fixtures = [
             "group",
+            "include",
+            "include_json",
             "loop",
             "specify_other",
-            "include",
             "text_and_integer",
-            "include_json",
             "yes_or_no_question",
         ]
         for fixture in equivalent_fixtures:
-            xls_path = utils.path_to_text_fixture(f"{fixture}.xls")
-            xlsx_path = utils.path_to_text_fixture(f"{fixture}.xlsx")
-            xls_inp = xls_to_dict(xls_path)
-            xlsx_inp = xlsx_to_dict(xlsx_path)
-            self.maxDiff = None
-            self.assertEqual(xls_inp, xlsx_inp)
+            xlsx_inp = xlsx_to_dict(utils.path_to_text_fixture(f"{fixture}.xlsx"))
+            xls_inp = xls_to_dict(utils.path_to_text_fixture(f"{fixture}.xls"))
+            csv_inp = csv_to_dict(utils.path_to_text_fixture(f"{fixture}.csv"))
+            md_inp = md_to_dict(utils.path_to_text_fixture(f"{fixture}.md"))
+
+            self.assertEqual(xlsx_inp, xls_inp)
+            self.assertEqual(xlsx_inp, csv_inp)
+            self.assertEqual(xlsx_inp, md_inp)
 
     def test_xls_with_many_empty_cells(self):
         """Should quickly produce expected data, and find large input sheet dimensions."""
-        self.maxDiff = None
         # Test fixture produced by adding data at cells IV1 and A19999.
         xls_path = os.path.join(bug_example_xls.PATH, "extra_columns.xls")
         before = datetime.datetime.now(datetime.timezone.utc)
@@ -102,7 +201,6 @@ def test_xls_with_many_empty_cells(self):
 
     def test_xlsx_with_many_empty_cells(self):
         """Should quickly produce expected data, and find large input sheet dimensions."""
-        self.maxDiff = None
         # Test fixture produced (presumably) by a LibreOffice serialisation bug.
         xlsx_path = os.path.join(bug_example_xls.PATH, "UCL_Biomass_Plot_Form.xlsx")
         before = datetime.datetime.now(datetime.timezone.utc)
diff --git a/tests/test_xls2json_xls.py b/tests/test_xls2json_xls.py
index b362a6a6..121ea2a2 100644
--- a/tests/test_xls2json_xls.py
+++ b/tests/test_xls2json_xls.py
@@ -7,7 +7,7 @@
 from unittest import TestCase
 
 from pyxform.xls2json import SurveyReader
-from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict
+from pyxform.xls2json_backends import csv_to_dict, xlsx_to_dict
 from pyxform.xls2xform import convert
 
 from tests import example_xls, test_expected_output, utils
@@ -175,26 +175,6 @@ def test_choice_filter_choice_fields(self):
         self.assertEqual(choice_filter_survey.to_json_dict()["children"], expected_dict)
 
 
-class CsvReaderEquivalencyTest(TestCase):
-    def test_equivalency(self):
-        equivalent_fixtures = [
-            "group",
-            "loop",  # 'gps',
-            "specify_other",
-            "include",
-            "text_and_integer",
-            "include_json",
-            "yes_or_no_question",
-        ]
-        for fixture in equivalent_fixtures:
-            xls_path = utils.path_to_text_fixture(f"{fixture}.xls")
-            csv_path = utils.path_to_text_fixture(f"{fixture}.csv")
-            xls_inp = xls_to_dict(xls_path)
-            csv_inp = csv_to_dict(csv_path)
-            self.maxDiff = None
-            self.assertEqual(csv_inp, xls_inp)
-
-
 class UnicodeCsvTest(TestCase):
     def test_a_unicode_csv_works(self):
         """
diff --git a/tests/test_xlsform_headers.py b/tests/test_xlsform_headers.py
deleted file mode 100644
index 3c37e335..00000000
--- a/tests/test_xlsform_headers.py
+++ /dev/null
@@ -1,62 +0,0 @@
-"""
-Test XLSForm headers syntax.
-"""
-
-from tests.pyxform_test_case import PyxformTestCase
-
-
-class XlsFormHeadersTest(PyxformTestCase):
-    def test_label_caps_alternatives(self):
-        """
-        re: https://github.com/SEL-Columbia/pyxform/issues/76
-        Capitalization of 'label' column can lead to confusing errors.
-        """
-        self.assertPyxformXform(
-            md="""
-            | survey |      |      |       |
-            |        | type | name | label |
-            |        | note | q    | Q     |
-            """,
-            xml__xpath_match=["/h:html/h:body/x:input[./x:label='Q']"],
-        )
-        self.assertPyxformXform(
-            md="""
-            | survey |      |      |       |
-            |        | type | name | Label | # <-- note: capital L
-            |        | note | q    | Q     |
-            """,
-            xml__xpath_match=["/h:html/h:body/x:input[./x:label='Q']"],
-        )
-
-    def test_calculate_alias(self):
-        self.assertPyxformXform(
-            name="calculatealias",
-            md="""
-            | survey |           |         |         |               |
-            |        | type      | name    | label   | calculate     |
-            |        | decimal   | amount  | Counter |               |
-            |        | calculate | doubled | Doubled | ${amount} * 2 |
-            """,
-        )
-
-    def test_form_id_variant(self):
-        md = """
-        | survey   |      |             |       |
-        |          | type | name        | label |
-        |          | text | member_name | name  |
-        | settings |                                   |                        |             |
-        |          | id_string                         | version                | form_id     |
-        |          | get_option_from_two_repeat_answer | vWvvk3GYzjXcJQyvTWELej | AUTO-v2-jef |
-        """
-        self.assertPyxformXform(
-            md=md,
-            # setting 'id_string' is ignored.
-            xml__xpath_match=[
-                """
-                  /h:html/h:head/x:model/x:instance/x:test_name[
-                    @id='AUTO-v2-jef'
-                    and @version='vWvvk3GYzjXcJQyvTWELej'
-                  ]
-                """
-            ],
-        )
diff --git a/tests/xform_test_case/test_bugs.py b/tests/xform_test_case/test_bugs.py
index c5db20d1..7b2f574e 100644
--- a/tests/xform_test_case/test_bugs.py
+++ b/tests/xform_test_case/test_bugs.py
@@ -11,8 +11,8 @@
 from pyxform.utils import has_external_choices
 from pyxform.validators.odk_validate import ODKValidateError, check_xform
 from pyxform.validators.pyxform import choices as vc
-from pyxform.xls2json import SurveyReader, parse_file_to_workbook_dict
-from pyxform.xls2json_backends import xlsx_to_dict
+from pyxform.xls2json import SurveyReader
+from pyxform.xls2json_backends import DefinitionData, get_xlsform, xlsx_to_dict
 from pyxform.xls2xform import convert
 
 from tests import bug_example_xls, example_xls, test_output
@@ -61,11 +61,10 @@ def test_conversion():
 class EmptyStringOnRelevantColumnTest(TestCase):
     def test_conversion(self):
         filename = "ict_survey_fails.xls"
-        path_to_excel_file = os.path.join(bug_example_xls.PATH, filename)
-        workbook_dict = pyxform.xls2json.parse_file_to_workbook_dict(path_to_excel_file)
+        workbook_dict = get_xlsform(xlsform=os.path.join(bug_example_xls.PATH, filename))
         with self.assertRaises(KeyError):
             # bind:relevant should not be part of workbook_dict
-            workbook_dict["survey"][0]["bind: relevant"].strip()
+            workbook_dict.survey[0]["bind: relevant"].strip()
 
 
 class BadChoicesSheetHeaders(TestCase):
@@ -153,9 +152,8 @@ class TestSpreadSheetFilesWithMacrosAreAllowed(TestCase):
 
     def test_xlsm_files_are_allowed(self):
         filename = "excel_with_macros.xlsm"
-        path_to_excel_file = os.path.join(bug_example_xls.PATH, filename)
-        result = parse_file_to_workbook_dict(path_to_excel_file)
-        self.assertIsInstance(result, dict)
+        result = get_xlsform(xlsform=os.path.join(bug_example_xls.PATH, filename))
+        self.assertIsInstance(result, DefinitionData)
 
 
 class TestBadCalculation(TestCase):
diff --git a/tests/xpath_helpers/entities.py b/tests/xpath_helpers/entities.py
new file mode 100644
index 00000000..d83868ca
--- /dev/null
+++ b/tests/xpath_helpers/entities.py
@@ -0,0 +1,33 @@
+class XPathHelper:
+    """
+    XPath expressions for entities assertions.
+    """
+
+    @staticmethod
+    def model_instance_entity() -> str:
+        """The base path to the expected entities nodeset."""
+        return """
+        /h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity
+        """
+
+    @staticmethod
+    def model_instance_dataset(value) -> str:
+        """An entity dataset has this value."""
+        return f"""
+        /h:html/h:head/x:model/x:instance/x:test_name/x:meta/x:entity[@dataset='{value}']
+        """
+
+    @staticmethod
+    def model_bind_label(value) -> str:
+        """An entity binding label has this value, with expected properties."""
+        return f"""
+        /h:html/h:head/x:model/x:bind[
+          @nodeset="/test_name/meta/entity/label"
+          and @calculate="{value}"
+          and @type="string"
+          and @readonly="true()"
+        ]
+        """
+
+
+xpe = XPathHelper()
diff --git a/tests/xpath_helpers/questions.py b/tests/xpath_helpers/questions.py
index 911f0944..dc8a048e 100644
--- a/tests/xpath_helpers/questions.py
+++ b/tests/xpath_helpers/questions.py
@@ -4,21 +4,21 @@ class XPathHelper:
     """
 
     @staticmethod
-    def model_instance_exists(i_id: str):
+    def model_instance_exists(i_id: str) -> str:
         """Model instance with the given instance id exists."""
         return rf"""
           /h:html/h:head/x:model[./x:instance[@id='{i_id}']]
         """
 
     @staticmethod
-    def model_instance_item(q_name: str):
+    def model_instance_item(q_name: str) -> str:
         """Model instance contains the question item."""
         return rf"""
           /h:html/h:head/x:model/x:instance/x:test_name/x:{q_name}
         """
 
     @staticmethod
-    def model_instance_bind(q_name: str, _type: str):
+    def model_instance_bind(q_name: str, _type: str) -> str:
         """Model instance contains the question item."""
         return rf"""
           /h:html/h:head/x:model/x:bind[
@@ -38,7 +38,7 @@ def model_instance_bind_attr(qname: str, key: str, value: str) -> str:
         """
 
     @staticmethod
-    def model_itext_label(q_name: str, lang: str, q_label: str):
+    def model_itext_label(q_name: str, lang: str, q_label: str) -> str:
         """Model itext contains the question label."""
         return f"""
         /h:html/h:head/x:model/x:itext/x:translation[@lang='{lang}']
@@ -47,7 +47,7 @@ def model_itext_label(q_name: str, lang: str, q_label: str):
         """
 
     @staticmethod
-    def model_itext_form(q_name: str, lang: str, form: str, fname: str):
+    def model_itext_form(q_name: str, lang: str, form: str, fname: str) -> str:
         """Model itext contains an alternate form itext for the label or hint."""
         prefix = {
             "audio": ("label", "jr://audio/"),
@@ -63,7 +63,7 @@ def model_itext_form(q_name: str, lang: str, form: str, fname: str):
         """
 
     @staticmethod
-    def body_label_inline(q_type: str, q_name: str, q_label: str):
+    def body_label_inline(q_type: str, q_name: str, q_label: str) -> str:
         """Body element contains the question label."""
         return f"""
         /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
@@ -71,7 +71,7 @@ def body_label_inline(q_type: str, q_name: str, q_label: str):
         """
 
     @staticmethod
-    def body_label_itext(q_type: str, q_name: str):
+    def body_label_itext(q_type: str, q_name: str) -> str:
         """Body element references itext for the question label."""
         return f"""
         /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
@@ -79,7 +79,7 @@ def body_label_itext(q_type: str, q_name: str):
         """
 
     @staticmethod
-    def body_select1_itemset(q_name: str):
+    def body_select1_itemset(q_name: str) -> str:
         """Body has a select1 with an itemset, and no inline items."""
         return rf"""
         /h:html/h:body/x:select1[
@@ -90,7 +90,7 @@ def body_select1_itemset(q_name: str):
         """
 
     @staticmethod
-    def body_group_select1_itemset(g_name: str, q_name: str):
+    def body_group_select1_itemset(g_name: str, q_name: str) -> str:
         """Body has a select1 with an itemset, and no inline items."""
         return rf"""
         /h:html/h:body/x:group[@ref='/test_name/{g_name}']/x:select1[
@@ -101,7 +101,7 @@ def body_group_select1_itemset(g_name: str, q_name: str):
         """
 
     @staticmethod
-    def body_repeat_select1_itemset(r_name: str, q_name: str):
+    def body_repeat_select1_itemset(r_name: str, q_name: str) -> str:
         """Body has a select1 with an itemset, and no inline items."""
         return rf"""
         /h:html/h:body/x:group[@ref='/test_name/{r_name}']
@@ -114,7 +114,7 @@ def body_repeat_select1_itemset(r_name: str, q_name: str):
         """
 
     @staticmethod
-    def body_odk_rank_itemset(q_name: str):
+    def body_odk_rank_itemset(q_name: str) -> str:
         """Body has a rank with an itemset, and no inline items."""
         return rf"""
         /h:html/h:body/odk:rank[
@@ -125,11 +125,38 @@ def body_odk_rank_itemset(q_name: str):
         """
 
     @staticmethod
-    def body_input_label_output_value(q_name: str):
+    def body_input_label_output_value(q_name: str) -> str:
         """Body has an input (note) with output reference in the label."""
         return rf"""
         /h:html/h:body/x:input[@ref='/test_name/{q_name}']/x:label/x:output/@value
         """
 
+    @staticmethod
+    def body_control(
+        qname: str, control_type: str, namespace: str = "http://www.w3.org/2002/xforms"
+    ) -> str:
+        """Body has a control element of this type."""
+        return f"""
+        /h:html/h:body/*[
+          namespace-uri()='{namespace}'
+          and local-name()='{control_type}'
+          and @ref = '/test_name/{qname}'
+        ]
+        """
+
+    @staticmethod
+    def body_upload_tags(qname: str, tags: tuple[tuple[str, ...], ...]) -> str:
+        """Body has osm upload control with tags data inline."""
+        tags_xp = "\n          and ".join(
+            (f"""./x:tag[@key='{k}']/x:label[text()='{v}']""" for k, v in tags)
+        )
+        return f"""
+        /h:html/h:body/x:upload[
+          @ref='/test_name/{qname}'
+          and @mediatype='osm/*'
+          and {tags_xp}
+        ]
+        """
+
 
 xpq = XPathHelper()
diff --git a/tests/xpath_helpers/settings.py b/tests/xpath_helpers/settings.py
new file mode 100644
index 00000000..36e9fc0d
--- /dev/null
+++ b/tests/xpath_helpers/settings.py
@@ -0,0 +1,42 @@
+class XPathHelper:
+    """
+    XPath expressions for settings assertions.
+    """
+
+    @staticmethod
+    def form_title(value: str) -> str:
+        """The form_title is set to this value."""
+        return f"""
+        /h:html/h:head/h:title[text()='{value}']
+        """
+
+    @staticmethod
+    def form_id(value: str) -> str:
+        """The form_id is set to this value."""
+        return f"""
+        /h:html/h:head/x:model/x:instance/x:test_name/@id[.='{value}']
+        """
+
+    @staticmethod
+    def language_is_default(lang: str) -> str:
+        """The language translation has itext and is marked as the default."""
+        return f"""
+        /h:html/h:head/x:model/x:itext/x:translation[@default='true()' and @lang='{lang}']
+        """
+
+    @staticmethod
+    def language_is_not_default(lang: str) -> str:
+        """The language translation has itext and is not marked as the default."""
+        return f"""
+        /h:html/h:head/x:model/x:itext/x:translation[not(@default='true()') and @lang='{lang}']
+        """
+
+    @staticmethod
+    def language_no_itext(lang: str) -> str:
+        """The language translation has no itext."""
+        return f"""
+        /h:html/h:head/x:model/x:itext[not(descendant::x:translation[@lang='{lang}'])]
+        """
+
+
+xps = XPathHelper()