diff --git a/pyxform/aliases.py b/pyxform/aliases.py index 17cfa5c6..7497c720 100644 --- a/pyxform/aliases.py +++ b/pyxform/aliases.py @@ -44,77 +44,57 @@ } settings_header = { "form_title": constants.TITLE, - "set form title": constants.TITLE, + "set_form_title": constants.TITLE, "form_id": constants.ID_STRING, - "sms_keyword": constants.SMS_KEYWORD, - "sms_separator": constants.SMS_SEPARATOR, - "sms_allow_media": constants.SMS_ALLOW_MEDIA, - "sms_date_format": constants.SMS_DATE_FORMAT, - "sms_datetime_format": constants.SMS_DATETIME_FORMAT, + "set_form_id": constants.ID_STRING, "prefix": constants.COMPACT_PREFIX, - "delimiter": constants.COMPACT_DELIMITER, - "set form id": constants.ID_STRING, - "public_key": constants.PUBLIC_KEY, - "submission_url": constants.SUBMISSION_URL, - "auto_send": constants.AUTO_SEND, - "auto_delete": constants.AUTO_DELETE, - "allow_choice_duplicates": constants.ALLOW_CHOICE_DUPLICATES, } # TODO: Check on bind prefix approach in json. # Conversion dictionary from user friendly column names to meaningful values survey_header = { - "Label": "label", - "Name": "name", - "SMS Field": constants.SMS_FIELD, - "SMS Option": constants.SMS_OPTION, - "SMS Separator": constants.SMS_SEPARATOR, - "SMS Allow Media": constants.SMS_ALLOW_MEDIA, - "SMS Date Format": constants.SMS_DATE_FORMAT, - "SMS DateTime Format": constants.SMS_DATETIME_FORMAT, - "SMS Response": constants.SMS_RESPONSE, - "compact_tag": "instance::odk:tag", # used for compact representation - "Type": "type", - "List_name": constants.LIST_NAME_U, - # u"repeat_count": u"jr:count", duplicate key - "read_only": "bind::readonly", - "readonly": "bind::readonly", - "relevant": "bind::relevant", + "sms_field": constants.SMS_FIELD, + "sms_option": constants.SMS_OPTION, + "sms_separator": constants.SMS_SEPARATOR, + "sms_allow_media": constants.SMS_ALLOW_MEDIA, + "sms_date_format": constants.SMS_DATE_FORMAT, + "sms_datetime_format": constants.SMS_DATETIME_FORMAT, + "sms_response": constants.SMS_RESPONSE, + "compact_tag": ("instance", "odk:tag"), # used for compact representation + "read_only": ("bind", "readonly"), + "readonly": ("bind", "readonly"), + "relevant": ("bind", "relevant"), "caption": constants.LABEL, - "appearance": "control::appearance", # TODO: this is also an issue - "relevance": "bind::relevant", - "required": "bind::required", - "constraint": "bind::constraint", - "constraining message": "bind::jr:constraintMsg", - "constraint message": "bind::jr:constraintMsg", - "constraint_message": "bind::jr:constraintMsg", - "calculation": "bind::calculate", - "calculate": "bind::calculate", + "appearance": ("control", "appearance"), + "relevance": ("bind", "relevant"), + "required": ("bind", "required"), + "constraint": ("bind", "constraint"), + "constraining_message": ("bind", "jr:constraintMsg"), + "constraint_message": ("bind", "jr:constraintMsg"), + "calculation": ("bind", "calculate"), + "calculate": ("bind", "calculate"), "command": constants.TYPE, "tag": constants.NAME, "value": constants.NAME, - "image": "media::image", - "big-image": "media::big-image", - "audio": "media::audio", - "video": "media::video", - "count": "control::jr:count", - "repeat_count": "control::jr:count", - "jr:count": "control::jr:count", - "autoplay": "control::autoplay", - "rows": "control::rows", + "image": ("media", "image"), + "big-image": ("media", "big-image"), + "audio": ("media", "audio"), + "video": ("media", "video"), + "count": ("control", "jr:count"), + "repeat_count": ("control", "jr:count"), + "jr:count": ("control", "jr:count"), + "autoplay": ("control", "autoplay"), + "rows": ("control", "rows"), # New elements that have to go into itext elements: - "noAppErrorString": "bind::jr:noAppErrorString", - "no_app_error_string": "bind::jr:noAppErrorString", - "requiredMsg": "bind::jr:requiredMsg", - "required_message": "bind::jr:requiredMsg", - "required message": "bind::jr:requiredMsg", + "noapperrorstring": ("bind", "jr:noAppErrorString"), + "no_app_error_string": ("bind", "jr:noAppErrorString"), + "requiredmsg": ("bind", "jr:requiredMsg"), + "required_message": ("bind", "jr:requiredMsg"), "body": "control", - "parameters": "parameters", - constants.ENTITIES_SAVETO: "bind::entities:saveto", + constants.ENTITIES_SAVETO: ("bind", "entities:saveto"), } entities_header = {constants.LIST_NAME_U: "dataset"} -# Key is the pyxform internal name, Value is the name used in error/warning messages. TRANSLATABLE_SURVEY_COLUMNS = { constants.LABEL: constants.LABEL, # Per ODK Spec, could include "short" once pyxform supports it. @@ -129,19 +109,19 @@ } TRANSLATABLE_CHOICES_COLUMNS = { "label": constants.LABEL, - "image": "media::image", - "big-image": "media::big-image", - "audio": "media::audio", - "video": "media::video", + "image": survey_header["image"], + "big-image": survey_header["big-image"], + "audio": survey_header["audio"], + "video": survey_header["video"], } list_header = { "caption": constants.LABEL, constants.LIST_NAME_U: constants.LIST_NAME_S, "value": constants.NAME, - "image": "media::image", - "big-image": "media::big-image", - "audio": "media::audio", - "video": "media::video", + "image": survey_header["image"], + "big-image": survey_header["big-image"], + "audio": survey_header["audio"], + "video": survey_header["video"], } # Note that most of the type aliasing happens in all.xls _type_alias_map = { diff --git a/pyxform/builder.py b/pyxform/builder.py index 86675f14..2b699de9 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -146,16 +146,20 @@ def _create_question_from_dict( ) if question_class: - if const.CHOICES in d and choices: - return question_class( - question_type_dictionary=question_type_dictionary, - choices=choices.get(d[const.ITEMSET], d[const.CHOICES]), - **{k: v for k, v in d.items() if k != const.CHOICES}, - ) - else: - return question_class( - question_type_dictionary=question_type_dictionary, **d - ) + if choices: + d_choices = d.get(const.CHOICES, d.get(const.CHILDREN)) + if d_choices: + return question_class( + question_type_dictionary=question_type_dictionary, + **{ + k: v + for k, v in d.items() + if k not in {const.CHOICES, const.CHILDREN} + }, + choices=choices.get(d[const.ITEMSET], d_choices), + ) + + return question_class(question_type_dictionary=question_type_dictionary, **d) return () @@ -259,16 +263,16 @@ def _name_and_label_substitutions(question_template, column_headers): const.NAME: column_headers[const.NAME], const.LABEL: column_headers[const.LABEL][lang], } - for lang in column_headers[const.LABEL].keys() + for lang in column_headers[const.LABEL] } result = question_template.copy() - for key in result.keys(): + for key in result: if isinstance(result[key], str): result[key] %= column_headers elif isinstance(result[key], dict): result[key] = result[key].copy() - for key2 in result[key].keys(): + for key2 in result[key]: if info_by_lang and isinstance(column_headers[const.LABEL], dict): result[key][key2] %= info_by_lang.get(key2, column_headers) else: diff --git a/pyxform/entities/entities_parsing.py b/pyxform/entities/entities_parsing.py index 39f06dce..f67263e1 100644 --- a/pyxform/entities/entities_parsing.py +++ b/pyxform/entities/entities_parsing.py @@ -1,22 +1,17 @@ +from collections.abc import Sequence from typing import Any from pyxform import constants as const from pyxform.errors import PyXFormError from pyxform.parsing.expression import is_xml_tag -from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings EC = const.EntityColumns def get_entity_declaration( - entities_sheet: list[dict], workbook_dict: dict[str, list[dict]], warnings: list[str] + entities_sheet: Sequence[dict], ) -> dict[str, Any]: - if len(entities_sheet) == 0: - similar = find_sheet_misspellings(key=const.ENTITIES, keys=workbook_dict.keys()) - if similar is not None: - warnings.append(similar + const._MSG_SUPPRESS_SPELLING) - return {} - elif len(entities_sheet) > 1: + if len(entities_sheet) > 1: raise PyXFormError( "Currently, you can only declare a single entity per form. Please make sure your entities sheet only declares one entity." ) @@ -49,11 +44,11 @@ def get_entity_declaration( const.NAME: const.ENTITY, const.TYPE: const.ENTITY, const.PARAMETERS: { - EC.DATASET: dataset_name, - EC.ENTITY_ID: entity_id, - EC.CREATE_IF: create_condition, - EC.UPDATE_IF: update_condition, - EC.LABEL: entity_label, + EC.DATASET.value: dataset_name, + EC.ENTITY_ID.value: entity_id, + EC.CREATE_IF.value: create_condition, + EC.UPDATE_IF.value: update_condition, + EC.LABEL.value: entity_label, }, } @@ -83,13 +78,16 @@ def get_validated_dataset_name(entity): def validate_entity_saveto( - row: dict, row_number: int, entity_declaration: dict[str, Any], in_repeat: bool + row: dict, + row_number: int, + in_repeat: bool, + entity_declaration: dict[str, Any] | None = None, ): save_to = row.get(const.BIND, {}).get("entities:saveto", "") if not save_to: return - if len(entity_declaration) == 0: + if not entity_declaration: raise PyXFormError( "To save entity properties using the save_to column, you must add an entities sheet and declare an entity." ) @@ -126,9 +124,9 @@ def validate_entity_saveto( def validate_entities_columns(row: dict): - extra = {k: None for k in row.keys() if k not in EC.value_list()} + extra = {k: None for k in row if k not in EC.value_list()} if 0 < len(extra): - fmt_extra = ", ".join(f"'{k}'" for k in extra.keys()) + fmt_extra = ", ".join(f"'{k}'" for k in extra) msg = ( f"The entities sheet included the following unexpected column(s): {fmt_extra}. " f"These columns are not supported by this version of pyxform. Please either: " diff --git a/pyxform/parsing/sheet_headers.py b/pyxform/parsing/sheet_headers.py new file mode 100644 index 00000000..9c69e242 --- /dev/null +++ b/pyxform/parsing/sheet_headers.py @@ -0,0 +1,262 @@ +from collections.abc import Container, Sequence +from itertools import chain, islice +from typing import Any + +from pyxform import constants +from pyxform.errors import PyXFormError + +INVALID_HEADER = ( + "Invalid headers provided for sheet: '{sheet_name}'. For XLSForms, this may be due " + "a missing header row, in which case add a header row as per the reference template " + "https://xlsform.org/en/ref-table/. For internal API usage, may be due to a missing " + "mapping for '{header}', in which case ensure that the full set of headers appear " + "within the first 100 rows, or specify the header row in '{sheet_name}_header'." +) +INVALID_DUPLICATE = ( + "Invalid headers provided for sheet: '{sheet_name}'. Headers that are different " + "names for the same column were found: '{other}', '{header}'. Rename or remove one " + "of these columns." +) +INVALID_MISSING_REQUIRED = ( + "Invalid headers provided for sheet: '{sheet_name}'. One or more required column " + "headers were not found: {missing}. " + "Learn more: https://xlsform.org/en/#setting-up-your-worksheets" +) + + +def merge_dicts( + dict_a: dict, dict_b: dict, default_key: str = constants.DEFAULT_LANGUAGE_VALUE +) -> dict: + """ + Recursively merge two nested dicts into a single dict. + + When keys match their values are merged using a recursive call to this function, + otherwise they are just added to the output dict. + """ + if not dict_a: + return dict_b + if not dict_b: + return dict_a + + if not isinstance(dict_a, dict): + if default_key in dict_b: + return dict_b + dict_a = {default_key: dict_a} + if not isinstance(dict_b, dict): + if default_key in dict_a: + return dict_a + dict_b = {default_key: dict_b} + + # Union keys but retain order (as opposed to set()), preferencing dict_a then dict_b. + # E.g. {"a": 1, "b": 2} + {"c": 3, "a": 4} -> {"a": None, "b": None, "c": None} + out_dict = dict_a + for key in {k: None for k in (chain(dict_a, dict_b))}: + out_dict[key] = merge_dicts(dict_a.get(key), dict_b.get(key), default_key) + return out_dict + + +def list_to_nested_dict(lst: Sequence) -> dict: + """ + [1,2,3,4] -> {1:{2:{3:4}}} + """ + if len(lst) > 1: + return {lst[0]: list_to_nested_dict(lst[1:])} + else: + return lst[0] + + +class DealiasAndGroupHeadersResult: + __slots__ = ("headers", "data") + + def __init__(self, headers: tuple[tuple[str, ...], ...], data: Sequence[dict]): + """ + :param headers: Distinct headers seen in the sheet, parsed / split if applicable. + :param data: Sheet data rows, in grouped dict format. + """ + self.headers: tuple[tuple[str, ...], ...] = headers + self.data: Sequence[dict] = data + + +def to_snake_case(value: str) -> str: + """ + Convert a name (e.g. column name or question type) to snake case. + + Removes duplicate, leading, trailing spaces. + """ + return "_".join(value.split()).lower() + + +def process_header( + header: str, + use_double_colon: bool, + header_aliases: dict[str, str | tuple[str, ...]], + header_columns: Container[str], +) -> tuple[str, tuple[str, ...]]: + """ + Lookup the header in the provided expected columns or aliases, or split the header. + + :param header: Original XLSForm data header. + :param use_double_colon: If True, split the header on "::" rather than ":" (deprecated). + :param header_aliases: Mapping of original headers to aliased (possibly split) headers. + :param header_columns: The expected headers for the sheet. + :return e.g. tuple[original, tuple[new,]] | tuple[original, tuple[new1, new2]] + """ + # If the header is already recognised then nothing further needed. + if header in header_columns and header not in header_aliases: + return header, (header,) + + # Also try normalising to snake_case. + header_normalised = to_snake_case(value=header) + if header_normalised in header_columns and header_normalised not in header_aliases: + return header_normalised, (header_normalised,) + + # Check for double columns to determine whether to use them or single colons to + # delimit grouped headers. Single colons are bad because they conflict with with the + # xform namespace syntax (i.e. jr:constraintMsg), so they are only used if necessary + # for backwards compatibility. + group_delimiter = "::" + if use_double_colon or group_delimiter in header: + tokens = tuple(t.strip() for t in header.split(group_delimiter)) + else: + tokens = tuple(t.strip() for t in header.split(":")) + # Handle "jr:count" or similar when used with single colon delimiters. + if "jr" in tokens: + jr_idx = tokens.index("jr") + tokens = ( + *tokens[0:jr_idx], + f"jr:{tokens[jr_idx + 1]}", + *tokens[jr_idx + 2 :], + ) + + new_header = to_snake_case(tokens[0]) + dealiased_first_token = header_aliases.get(new_header) + if dealiased_first_token: + new_header = dealiased_first_token + if isinstance(new_header, tuple): + tokens = (*new_header, *tokens[1:]) + else: + tokens = (new_header, *tokens[1:]) + elif new_header in header_columns: + tokens = (new_header, *tokens[1:]) + # Avoid changing unknown columns, since it could break choice_filter expressions. + else: + new_header = header + tokens = tuple(tokens) + return new_header, tokens + + +def process_row( + sheet_name: str, + row: dict[str, str], + header_key: dict[str, tuple[str, ...]], + default_language: str = constants.DEFAULT_LANGUAGE_VALUE, +) -> dict[str, str]: + """ + Convert original headers and values to a possibly nested structure. + + :param sheet_name: Name of the sheet data being processed. + :param row: Original XLSForm data row. + :param header_key: Mapping from original headers to headers split on a delimiter. + :param default_language: Default translation language for the form, used to group + used to group labels/hints/etc without a language specified with localized versions. + """ + out_row = {} + for header, val in row.items(): + tokens = header_key.get(header, None) + if header == "__row": + out_row[header] = val + elif not tokens: + raise PyXFormError( + INVALID_HEADER.format(sheet_name=sheet_name, header=header) + ) + elif len(tokens) == 1: + out_row[tokens[0]] = val + else: + new_value = list_to_nested_dict((*tokens[1:], val)) + out_row = merge_dicts(out_row, {tokens[0]: new_value}, default_language) + + return out_row + + +def dealias_and_group_headers( + sheet_name: str, + sheet_data: Sequence[dict[str, str]], + sheet_header: Sequence[dict[str, Any]], + header_aliases: dict[str, str], + header_columns: set[str], + headers_required: set[str] | None = None, + default_language: str = constants.DEFAULT_LANGUAGE_VALUE, +) -> DealiasAndGroupHeadersResult: + """ + Normalise headers and group keys that contain a delimiter. + + For example a row: + {"text::english": "hello", "text::french" : "bonjour"} + Becomes + {"text": {"english": "hello", "french" : "bonjour"}. + + Dealiasing is done to the first token (the first term separated by the delimiter). + + :param sheet_name: Name of the sheet data being processed. + :param sheet_data: The sheet data. + :param sheet_header: The sheet column names (headers). + :param header_aliases: Mapping of allowed column aliases (backwards compatibility). + :param header_columns: Expected columns for the sheet. + :param headers_required: Required columns for the sheet. + :param default_language: Default translation language for the form, used to group + used to group labels/hints/etc without a language specified with localized versions. + """ + + header_key: dict[str, tuple[str, ...]] = {} + tokens_key: dict[tuple[str, ...], str] = {} + + # If not specified, try to guess the headers from the first 100 rows of data. + # Should only happen if the XLSForm is provided as a dict with no "_headers" keys. + if not sheet_header and sheet_data: + sheet_header = {} + for row in islice(sheet_data, 0, 100): + for k in row: + sheet_header[k] = None + sheet_header = [sheet_header] + + if sheet_header: + use_double_colon = any("::" in k for k in sheet_header[0]) + for header in sheet_header[0]: + tokens = header_key.get(header, None) + if tokens is None: + new_header, tokens = process_header( + header=header, + use_double_colon=use_double_colon, + header_aliases=header_aliases, + header_columns=header_columns, + ) + other_header = tokens_key.get(tokens) + if other_header and new_header != header: + raise PyXFormError( + INVALID_DUPLICATE.format( + sheet_name=sheet_name, + other=other_header, + header=header, + ) + ) + header_key[header] = tokens + tokens_key[tokens] = header + + data = tuple( + process_row( + sheet_name=sheet_name, + row=row, + header_key=header_key, + default_language=default_language, + ) + for row in sheet_data + ) + if headers_required and (data or sheet_name == constants.SURVEY): + missing = {h for h in headers_required if h not in {h[0] for h in tokens_key}} + if missing: + raise PyXFormError( + INVALID_MISSING_REQUIRED.format( + sheet_name=sheet_name, missing=", ".join(f"'{h}'" for h in missing) + ) + ) + return DealiasAndGroupHeadersResult(headers=tuple(tokens_key), data=data) diff --git a/pyxform/question.py b/pyxform/question.py index 4c7f2329..6ebac4c4 100644 --- a/pyxform/question.py +++ b/pyxform/question.py @@ -3,6 +3,7 @@ """ import os.path +import re from collections.abc import Callable, Generator, Iterable from itertools import chain from typing import TYPE_CHECKING @@ -16,12 +17,12 @@ EXTERNAL_INSTANCE_EXTENSIONS, ) from pyxform.errors import PyXFormError +from pyxform.parsing.expression import RE_ANY_PYXFORM_REF from pyxform.question_type_dictionary import QUESTION_TYPE_DICT from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import ( PYXFORM_REFERENCE_REGEX, DetachableElement, - coalesce, combine_lists, default_is_dynamic, node, @@ -32,9 +33,6 @@ QUESTION_EXTRA_FIELDS = ( - "_itemset_dyn_label", - "_itemset_has_media", - "_itemset_multi_language", "_qtd_defaults", "_qtd_kwargs", "action", @@ -55,7 +53,7 @@ QUESTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *QUESTION_EXTRA_FIELDS) SELECT_QUESTION_EXTRA_FIELDS = ( - constants.CHILDREN, + constants.CHOICES, constants.ITEMSET, constants.LIST_NAME_U, ) @@ -65,15 +63,12 @@ OSM_QUESTION_FIELDS = (*QUESTION_FIELDS, *SELECT_QUESTION_EXTRA_FIELDS) OPTION_EXTRA_FIELDS = ( - "_choice_itext_id", + "_choice_itext_ref", constants.MEDIA, "sms_option", ) OPTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *OPTION_EXTRA_FIELDS) -TAG_EXTRA_FIELDS = (constants.CHILDREN,) -TAG_FIELDS = (*SURVEY_ELEMENT_FIELDS, *TAG_EXTRA_FIELDS) - class Question(SurveyElement): __slots__ = QUESTION_EXTRA_FIELDS @@ -110,8 +105,7 @@ def __init__(self, fields: tuple[str, ...] | None = None, **kwargs): qtd = kwargs.pop("question_type_dictionary", QUESTION_TYPE_DICT) type_arg = kwargs.get("type") - default_type = qtd.get(type_arg) - if default_type is None: + if type_arg not in qtd: raise PyXFormError(f"Unknown question type '{type_arg}'.") # Keeping original qtd_kwargs is only needed if output of QTD data is not @@ -139,25 +133,16 @@ def __init__(self, fields: tuple[str, ...] | None = None, **kwargs): fields = chain(QUESTION_EXTRA_FIELDS, fields) super().__init__(fields=fields, **kwargs) - def validate(self): - SurveyElement.validate(self) - - # make sure that the type of this question exists in the - # question type dictionary. - if self.type not in QUESTION_TYPE_DICT: - raise PyXFormError(f"Unknown question type '{self.type}'.") - def xml_instance(self, survey: "Survey", **kwargs): - attributes = self.instance - if attributes is None: - attributes = {} - else: - for key, value in attributes.items(): - attributes[key] = survey.insert_xpaths(value, self) - if self.default and not default_is_dynamic(self.default, self.type): - return node(self.name, str(self.default), **attributes) - return node(self.name, **attributes) + result = node(self.name, str(self.default)) + else: + result = node(self.name) + attributes = self.instance + if attributes: + for k, v in attributes.items(): + result.setAttribute(k, survey.insert_xpaths(v, self)) + return result def xml_control(self, survey: "Survey"): if self.type == "calculate" or ( @@ -198,18 +183,16 @@ def xml_control(self, survey: "Survey"): return xml_node - def xml_action(self): + def xml_action(self) -> DetachableElement | None: """ Return the action for this survey element. """ if self.action: - return node( - self.action["name"], - ref=self.get_xpath(), - **{k: v for k, v in self.action.items() if k != "name"}, - ) - - return None + result = node(self.action["name"], ref=self.get_xpath()) + for k, v in self.action.items(): + if k != "name": + result.setAttribute(k, v) + return result def nest_set_nodes(self, survey, xml_node, tag, nested_items): for item in nested_items: @@ -222,6 +205,24 @@ def nest_set_nodes(self, survey, xml_node, tag, nested_items): set_node = node(tag, **node_attrs) xml_node.appendChild(set_node) + def _build_xml(self, survey: "Survey") -> DetachableElement | None: + """ + Initial control node result for further processing depending on Question type. + """ + control_dict = self.control + result = node( + control_dict["tag"], + *self.xml_label_and_hint(survey=survey), + ref=self.get_xpath(), + ) + # Resolve field references in attributes + for k, v in control_dict.items(): + # "tag" is from the question type dict so it can't include references. Also, + # if it did include references, then the node element name would be invalid. + if k != "tag": + result.setAttribute(k, survey.insert_xpaths(v, self)) + return result + def build_xml(self, survey: "Survey") -> DetachableElement | None: return None @@ -246,23 +247,12 @@ class InputQuestion(Question): """ def build_xml(self, survey: "Survey"): - control_dict = self.control - label_and_hint = self.xml_label_and_hint(survey=survey) - # Resolve field references in attributes - for key, value in control_dict.items(): - control_dict[key] = survey.insert_xpaths(value, self) - control_dict["ref"] = self.get_xpath() - - result = node(**control_dict) - if label_and_hint: - for element in self.xml_label_and_hint(survey=survey): - if element: - result.appendChild(element) + result = self._build_xml(survey=survey) # Input types are used for selects with external choices sheets. if self.query: choice_filter = self.choice_filter - if choice_filter is not None: + if choice_filter: pred = survey.insert_xpaths(choice_filter, self, True) query = f"""instance('{self.query}')/root/item[{pred}]""" else: @@ -273,26 +263,12 @@ def build_xml(self, survey: "Survey"): class TriggerQuestion(Question): def build_xml(self, survey: "Survey"): - control_dict = self.control - # Resolve field references in attributes - for key, value in control_dict.items(): - control_dict[key] = survey.insert_xpaths(value, self) - control_dict["ref"] = self.get_xpath() - return node("trigger", *self.xml_label_and_hint(survey=survey), **control_dict) + return self._build_xml(survey=survey) class UploadQuestion(Question): - def _get_media_type(self): - return self.control["mediatype"] - def build_xml(self, survey: "Survey"): - control_dict = self.control - # Resolve field references in attributes - for key, value in control_dict.items(): - control_dict[key] = survey.insert_xpaths(value, self) - control_dict["ref"] = self.get_xpath() - control_dict["mediatype"] = self._get_media_type() - return node("upload", *self.xml_label_and_hint(survey=survey), **control_dict) + return self._build_xml(survey=survey) class Option(SurveyElement): @@ -310,33 +286,18 @@ def __init__( sms_option: str | None = None, **kwargs, ): - self._choice_itext_id: str | None = None + self._choice_itext_ref: str | None = None self.media: dict | None = media self.sms_option: str | None = sms_option super().__init__(name=name, label=label, **kwargs) - def xml_value(self): - return node("value", self.name) - - def xml(self, survey: "Survey"): - item = node("item") - item.appendChild(self.xml_label(survey=survey)) - item.appendChild(self.xml_value()) - - return item - def validate(self): pass def xml_control(self, survey: "Survey"): raise NotImplementedError() - def _translation_path(self, display_element): - if self._choice_itext_id is not None: - return self._choice_itext_id - return super()._translation_path(display_element=display_element) - def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: to_delete = (k for k in self.get_slot_names() if k.startswith("_")) if delete_keys is not None: @@ -344,6 +305,41 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: return super().to_json_dict(delete_keys=to_delete) +class Itemset: + """Itemset details and metadata detection.""" + + __slots__ = ("name", "options", "requires_itext", "used_by_search") + + def __init__(self, name: str, choices: Iterable[dict]): + self.requires_itext: bool = False + self.used_by_search: bool = False + self.name: str = name + self.options: tuple[Option, ...] = tuple(o for o in self.get_options(choices)) + + def get_options(self, choices: Iterable[dict]) -> Generator[Option, None, None]: + requires_itext = False + for c in choices: + option = Option(**c) + if not requires_itext: + # Media: dict of image, audio, etc. Defaults to None. + if option.media: + requires_itext = True + else: + choice_label = option.label + label_is_dict = isinstance(choice_label, dict) + # Multi-language: dict of labels etc per language. Can be just a string. + if label_is_dict: + requires_itext = True + # Dynamic label: string contains a pyxform reference. + elif ( + choice_label + and re.search(RE_ANY_PYXFORM_REF, choice_label) is not None + ): + requires_itext = True + yield option + self.requires_itext = requires_itext + + class MultipleChoiceQuestion(Question): __slots__ = SELECT_QUESTION_EXTRA_FIELDS @@ -354,67 +350,26 @@ def get_slot_names() -> tuple[str, ...]: def __init__( self, itemset: str | None = None, list_name: str | None = None, **kwargs ): - # Internals - self._itemset_dyn_label: bool = False - self._itemset_has_media: bool = False - self._itemset_multi_language: bool = False + if not itemset and not list_name: + raise PyXFormError( + "Arguments 'itemset' and 'list_name' must not both be None or empty." + ) # Structure - self.children: tuple[Option, ...] | None = None + self.choices: Itemset | None = None self.itemset: str | None = itemset self.list_name: str | None = list_name - # Notice that choices can be specified under choices or children. - # I'm going to try to stick to just choices. - # Aliases in the json format will make it more difficult - # to use going forward. - kw_choices = kwargs.pop(constants.CHOICES, None) - kw_children = kwargs.pop(constants.CHILDREN, None) - choices = coalesce(kw_choices, kw_children) - if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option): - self.children = choices - elif choices: - self.children = tuple( - Option(**c) for c in combine_lists(kw_choices, kw_children) - ) + choices = kwargs.pop(constants.CHOICES, None) + if isinstance(choices, Itemset): + self.choices = choices super().__init__(**kwargs) - def validate(self): - Question.validate(self) - if self.children: - for child in self.children: - child.validate() - - def iter_descendants( - self, - condition: Callable[["SurveyElement"], bool] | None = None, - iter_into_section_items: bool = False, - ) -> Generator["SurveyElement", None, None]: - if condition is None: - yield self - elif condition(self): - yield self - if iter_into_section_items and self.children: - for e in self.children: - yield from e.iter_descendants( - condition=condition, - iter_into_section_items=iter_into_section_items, - ) - def build_xml(self, survey: "Survey"): if self.bind["type"] not in {"string", "odk:rank"}: raise PyXFormError("""Invalid value for `self.bind["type"]`.""") - # Resolve field references in attributes - control_dict = { - key: survey.insert_xpaths(value, self) for key, value in self.control.items() - } - control_dict["ref"] = self.get_xpath() - - result = node(**control_dict) - for element in self.xml_label_and_hint(survey=survey): - if element: - result.appendChild(element) + result = self._build_xml(survey=survey) # itemset are only supposed to be strings, # check to prevent the rare dicts that show up @@ -431,21 +386,18 @@ def build_xml(self, survey: "Survey"): itemset_value_ref = self.parameters.get("value", itemset_value_ref) itemset_label_ref = self.parameters.get("label", itemset_label_ref) - multi_language = self._itemset_multi_language - has_media = self._itemset_has_media - has_dyn_label = self._itemset_dyn_label is_previous_question = bool(PYXFORM_REFERENCE_REGEX.search(self.itemset)) if file_extension in EXTERNAL_INSTANCE_EXTENSIONS: pass - elif not multi_language and not has_media and not has_dyn_label: + elif self.choices and self.choices.requires_itext: itemset = self.itemset + itemset_label_ref = "jr:itext(itextId)" else: itemset = self.itemset - itemset_label_ref = "jr:itext(itextId)" choice_filter = self.choice_filter - if choice_filter is not None: + if choice_filter: choice_filter = survey.insert_xpaths( choice_filter, self, True, is_previous_question ) @@ -488,63 +440,43 @@ def build_xml(self, survey: "Survey"): nodeset += ")" - itemset_children = [ - node("value", ref=itemset_value_ref), - node("label", ref=itemset_label_ref), - ] - result.appendChild(node("itemset", *itemset_children, nodeset=nodeset)) - elif self.children: - for child in self.children: - result.appendChild(child.xml(survey=survey)) + result.appendChild( + node( + "itemset", + node("value", ref=itemset_value_ref), + node("label", ref=itemset_label_ref), + nodeset=nodeset, + ) + ) + elif self.choices: + # Options processing specific to XLSForms using the "search()" function. + # The _choice_itext_ref is prepared by Survey._redirect_is_search_itext. + itemset = self.choices + if itemset.used_by_search: + for option in itemset.options: + if itemset.requires_itext: + label_node = node("label", ref=option._choice_itext_ref) + elif self.label: + label, output_inserted = survey.insert_output_values( + option.label, option + ) + label_node = node("label", label, toParseString=output_inserted) + else: + label_node = node("label") + result.appendChild( + node("item", label_node, node("value", option.name)) + ) return result class Tag(SurveyElement): - __slots__ = TAG_EXTRA_FIELDS - @staticmethod def get_slot_names() -> tuple[str, ...]: - return TAG_FIELDS - - def __init__(self, name: str, label: str | dict | None = None, **kwargs): - self.children: tuple[Option, ...] | None = None - - kw_choices = kwargs.pop(constants.CHOICES, None) - kw_children = kwargs.pop(constants.CHILDREN, None) - choices = coalesce(kw_choices, kw_children) - if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option): - self.children = choices - elif choices: - self.children = tuple( - Option(**c) for c in combine_lists(kw_choices, kw_children) - ) - super().__init__(name=name, label=label, **kwargs) - - def iter_descendants( - self, - condition: Callable[["SurveyElement"], bool] | None = None, - iter_into_section_items: bool = False, - ) -> Generator["SurveyElement", None, None]: - if condition is None: - yield self - elif condition(self): - yield self - if iter_into_section_items and self.children: - for e in self.children: - yield from e.iter_descendants( - condition=condition, - iter_into_section_items=iter_into_section_items, - ) + return SURVEY_ELEMENT_FIELDS def xml(self, survey: "Survey"): - result = node("tag", key=self.name) - result.appendChild(self.xml_label(survey=survey)) - if self.children: - for choice in self.children: - result.appendChild(choice.xml(survey=survey)) - - return result + return node("tag", self.xml_label(survey=survey), key=self.name) def validate(self): pass @@ -588,32 +520,18 @@ def iter_descendants( ) def build_xml(self, survey: "Survey"): - control_dict = self.control - control_dict["ref"] = self.get_xpath() - control_dict["mediatype"] = self._get_media_type() - result = node("upload", *self.xml_label_and_hint(survey=survey), **control_dict) - + result = self._build_xml(survey=survey) if self.children: for osm_tag in self.children: result.appendChild(osm_tag.xml(survey=survey)) - return result class RangeQuestion(Question): def build_xml(self, survey: "Survey"): - control_dict = self.control - label_and_hint = self.xml_label_and_hint(survey=survey) - # Resolve field references in attributes - for key, value in control_dict.items(): - control_dict[key] = survey.insert_xpaths(value, self) - control_dict["ref"] = self.get_xpath() + result = self._build_xml(survey=survey) params = self.parameters if params: - control_dict.update(params) - result = node(**control_dict) - if label_and_hint: - for element in self.xml_label_and_hint(survey=survey): - result.appendChild(element) - + for k, v in params.items(): + result.setAttribute(k, v) return result diff --git a/pyxform/survey.py b/pyxform/survey.py index 50ecafe9..aa6ca193 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -20,7 +20,7 @@ from pyxform.instance import SurveyInstance from pyxform.parsing.expression import has_last_saved from pyxform.parsing.instance_expression import replace_with_output -from pyxform.question import MultipleChoiceQuestion, Option, Question, Tag +from pyxform.question import Itemset, MultipleChoiceQuestion, Option, Question, Tag from pyxform.section import SECTION_EXTRA_FIELDS, Section from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import ( @@ -28,7 +28,6 @@ LAST_SAVED_INSTANCE_NAME, DetachableElement, escape_text_for_xml, - has_dynamic_label, node, ) from pyxform.validators import enketo_validate, odk_validate @@ -43,6 +42,7 @@ ) RE_PULLDATA = re.compile(r"(pulldata\s*\(\s*)(.*?),") SEARCH_FUNCTION_REGEX = re.compile(r"search\(.*?\)") +SELECT_TYPES = set(aliases.select) class InstanceInfo: @@ -170,22 +170,12 @@ def _get_steps_and_target_xpath(context_parent, xpath_parent, include_parent=Fal return (None, None) -@lru_cache(maxsize=128) -def is_label_dynamic(label: str) -> bool: - return ( - label is not None - and isinstance(label, str) - and re.search(BRACKETED_TAG_REGEX, label) is not None - ) - - def recursive_dict(): return defaultdict(recursive_dict) SURVEY_EXTRA_FIELDS = ( "_created", - "_search_lists", "_translations", "_xpath", "add_none_option", @@ -236,14 +226,13 @@ def get_slot_names() -> tuple[str, ...]: def __init__(self, **kwargs): # Internals self._created: datetime.now = datetime.now() - self._search_lists: set = set() self._translations: recursive_dict = recursive_dict() - self._xpath: dict[str, Section | Question | None] = {} + self._xpath: dict[str, Section | Question | None] | None = None # Structure # attribute is for custom instance attrs from settings e.g. attribute::abc:xyz self.attribute: dict | None = None - self.choices: dict[str, tuple[Option, ...]] | None = None + self.choices: dict[str, Itemset] | None = None self.entity_features: list[str] | None = None self.setgeopoint_by_triggering_ref: dict[str, list[str]] = {} self.setvalues_by_triggering_ref: dict[str, list[str]] = {} @@ -279,11 +268,9 @@ def __init__(self, **kwargs): self.sms_separator: str | None = None choices = kwargs.pop("choices", None) - if choices is not None: + if choices and isinstance(choices, dict): self.choices = { - list_name: tuple( - c if isinstance(c, Option) else Option(**c) for c in values - ) + list_name: Itemset(name=list_name, choices=values) for list_name, values in choices.items() } kwargs[constants.TYPE] = constants.SURVEY @@ -296,7 +283,7 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: return super().to_json_dict(delete_keys=to_delete) def validate(self): - if self.id_string in [None, "None"]: + if self.id_string in {None, "None"}: raise PyXFormError("Survey cannot have an empty id_string") super().validate() self._validate_uniqueness_of_section_names() @@ -353,7 +340,7 @@ def xml(self): self.validate() self._setup_xpath_dictionary() - for triggering_reference in self.setvalues_by_triggering_ref.keys(): + for triggering_reference in self.setvalues_by_triggering_ref: if not re.search(BRACKETED_TAG_REGEX, triggering_reference): raise PyXFormError( "Only references to other fields are allowed in the 'trigger' column." @@ -380,80 +367,60 @@ def get_trigger_values_for_question_name(self, question_name: str, trigger_type: elif trigger_type == "setgeopoint": return self.setgeopoint_by_triggering_ref.get(f"${{{question_name}}}") - def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo: + def _generate_static_instances( + self, list_name: str, itemset: Itemset + ) -> InstanceInfo: """ Generate elements for static data (e.g. choices for selects) """ - instance_element_list = [] - has_media = bool(choice_list[0].get("media")) - has_dyn_label = has_dynamic_label(choice_list) - multi_language = False - if isinstance(self._translations, dict): - choices = ( - True - for items in self._translations.values() - for k, v in items.items() - if v.get(constants.TYPE, "") == constants.CHOICE - and "-".join(k.split("-")[:-1]) == list_name - ) - try: - if next(choices): - multi_language = True - except StopIteration: - pass - for idx, choice in enumerate(choice_list): - choice_element_list = [] + def choice_nodes(idx, choice): # Add a unique id to the choice element in case there are itext references - if multi_language or has_media or has_dyn_label: - itext_id = f"{list_name}-{idx}" - choice_element_list.append(node("itextId", itext_id)) - - for name, value in choice.items(): - if not value: - continue - elif name != "label" and isinstance(value, str): - choice_element_list.append(node(name, value)) - elif name == "extra_data" and isinstance(value, dict): - for k, v in value.items(): - choice_element_list.append(node(k, v)) - elif ( - not multi_language - and not has_media - and not has_dyn_label - and isinstance(value, str) - and name == "label" - ): - choice_element_list.append(node(name, value)) - - instance_element_list.append(node("item", *choice_element_list)) + if itemset.requires_itext: + yield node("itextId", f"{list_name}-{idx}") + yield node(constants.NAME, choice.name) + choice_label = choice.label + if not itemset.requires_itext and isinstance(choice_label, str): + yield node(constants.LABEL, choice_label) + choice_extra_data = choice.extra_data + if choice_extra_data and isinstance(choice_extra_data, dict): + for k, v in choice_extra_data.items(): + yield node(k, v) + choice_sms_option = choice.sms_option + if choice_sms_option and isinstance(choice_sms_option, str): + yield node("sms_option", choice_sms_option) + + def instance_nodes(choices): + for idx, choice in enumerate(choices): + yield node("item", choice_nodes(idx, choice)) return InstanceInfo( type="choice", context="survey", name=list_name, src=None, - instance=node("instance", node("root", *instance_element_list), id=list_name), + instance=node( + "instance", + node("root", instance_nodes(itemset.options)), + id=list_name, + ), ) @staticmethod - def _generate_external_instances(element: SurveyElement) -> InstanceInfo | None: - if isinstance(element, ExternalInstance): - name = element["name"] - extension = element["type"].split("-")[0] - prefix = "file-csv" if extension == "csv" else "file" - src = f"jr://{prefix}/{name}.{extension}" - return InstanceInfo( - type="external", - context="[type: {t}, name: {n}]".format( - t=element["parent"]["type"], n=element["parent"]["name"] - ), - name=name, - src=src, - instance=node("instance", id=name, src=src), - ) - - return None + def _generate_external_instances(element: ExternalInstance) -> InstanceInfo: + name = element["name"] + extension = element["type"].split("-")[0] + prefix = "file-csv" if extension == "csv" else "file" + src = f"jr://{prefix}/{name}.{extension}" + return InstanceInfo( + type="external", + context="[type: {t}, name: {n}]".format( + t=element["parent"]["type"], n=element["parent"]["name"] + ), + name=name, + src=src, + instance=node("instance", id=name, src=src), + ) @staticmethod def _validate_external_instances(instances) -> None: @@ -483,14 +450,14 @@ def _validate_external_instances(instances) -> None: raise ValidationError("\n".join(errors)) @staticmethod - def _generate_pulldata_instances(element: SurveyElement) -> list[InstanceInfo] | None: + def _generate_pulldata_instances( + element: Question | Section, + ) -> Generator[InstanceInfo, None, None]: def get_pulldata_functions(element): """ Returns a list of different pulldata(... function strings if pulldata function is defined at least once for any of: calculate, constraint, readonly, required, relevant - - :param: element (pyxform.survey.Survey): """ functions_present = [] for formula_name in constants.EXTERNAL_INSTANCES: @@ -515,24 +482,20 @@ def get_pulldata_functions(element): return functions_present - def get_instance_info(element, file_id): + def get_instance_info(elem, file_id): uri = f"jr://file-csv/{file_id}.csv" + parent = elem.parent return InstanceInfo( type="pulldata", - context="[type: {t}, name: {n}]".format( - t=element["parent"]["type"], n=element["parent"]["name"] - ), + context=f"[type: {parent.type}, name: {parent.name}]", name=file_id, src=uri, instance=node("instance", id=file_id, src=uri), ) - if isinstance(element, Option | ExternalInstance | Tag | Survey): - return None pulldata_usages = get_pulldata_functions(element) if len(pulldata_usages) > 0: - pulldata_instances = [] for usage in pulldata_usages: for call_match in re.finditer(RE_PULLDATA, usage): groups = call_match.groups() @@ -540,40 +503,32 @@ def get_instance_info(element, file_id): first_argument = ( # first argument to pulldata() groups[1].replace("'", "").replace('"', "").strip() ) - pulldata_instances.append( - get_instance_info(element, first_argument) - ) - return pulldata_instances - return None + yield get_instance_info(element, first_argument) @staticmethod - def _generate_from_file_instances(element: SurveyElement) -> InstanceInfo | None: - if not isinstance(element, MultipleChoiceQuestion) or element.itemset is None: + def _generate_from_file_instances( + element: MultipleChoiceQuestion, + ) -> InstanceInfo | None: + itemset = element.itemset + if not itemset: return None - itemset = element.get("itemset") file_id, ext = os.path.splitext(itemset) if itemset and ext in EXTERNAL_INSTANCE_EXTENSIONS: file_ext = "file" if ext in {".xml", ".geojson"} else f"file-{ext[1:]}" uri = f"jr://{file_ext}/{itemset}" return InstanceInfo( type="file", - context="[type: {t}, name: {n}]".format( - t=element["parent"]["type"], n=element["parent"]["name"] - ), + context=f"[type: {element.parent.type}, name: {element.parent.name}]", name=file_id, src=uri, instance=node("instance", id=file_id, src=uri), ) - return None - @staticmethod - def _generate_last_saved_instance(element: SurveyElement) -> bool: + def _generate_last_saved_instance(element: Question) -> bool: """ True if a last-saved instance should be generated, false otherwise. """ - if not isinstance(element, Question): - return False if has_last_saved(element.default): return True if has_last_saved(element.choice_filter): @@ -633,49 +588,59 @@ def _generate_instances(self) -> Generator[DetachableElement, None, None]: - `select_one_external`: implicitly relies on a `itemsets.csv` file and uses XPath-like expressions for querying. """ - instances = [] - generate_last_saved = False - for i in self.iter_descendants(): - i_ext = self._generate_external_instances(element=i) - i_pull = self._generate_pulldata_instances(element=i) - i_file = self._generate_from_file_instances(element=i) - if not generate_last_saved: - generate_last_saved = self._generate_last_saved_instance(element=i) - for x in [i_ext, i_pull, i_file]: - if x is not None: - instances += x if isinstance(x, list) else [x] - - if generate_last_saved: - instances += [self._get_last_saved_instance()] - - # Append last so the choice instance is excluded on a name clash. - if self.choices: - for name, value in self.choices.items(): - if name not in self._search_lists: - instances += [ - self._generate_static_instances(list_name=name, choice_list=value) - ] + + def get_element_instances(): + generate_last_saved = False + for i in self.iter_descendants(): + if isinstance(i, Question): + yield from self._generate_pulldata_instances(element=i) + if isinstance(i, MultipleChoiceQuestion): + i_file = self._generate_from_file_instances(element=i) + if i_file: + yield i_file + if not generate_last_saved: + generate_last_saved = self._generate_last_saved_instance( + element=i + ) + elif isinstance(i, Section): + yield from self._generate_pulldata_instances(element=i) + elif isinstance(i, ExternalInstance): + yield self._generate_external_instances(element=i) + + if generate_last_saved: + yield self._get_last_saved_instance() + + # Append last so the choice instance is excluded on a name clash. + if self.choices: + for k, v in self.choices.items(): + if not v.used_by_search: + yield self._generate_static_instances(list_name=k, itemset=v) + + instances = tuple(get_element_instances()) # Check that external instances have unique names. if instances: - ext_only = [x for x in instances if x.type == "external"] - self._validate_external_instances(instances=ext_only) + self._validate_external_instances( + instances=(x for x in instances if x.type == "external") + ) seen = {} for i in instances: - if i.name in seen.keys() and seen[i.name].src != i.src: - # Instance id exists with different src URI -> error. - msg = ( - "The same instance id will be generated for different " - "external instance source URIs. Please check the form." - f" Instance name: '{i.name}', Existing type: '{seen[i.name].type}', " - f"Existing URI: '{seen[i.name].src}', Duplicate type: '{i.type}', " - f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'." - ) - raise PyXFormError(msg) - elif i.name in seen.keys() and seen[i.name].src == i.src: - # Instance id exists with same src URI -> ok, don't duplicate. - continue + prior = seen.get(i.name) + if prior: + if prior.src != i.src: + # Instance id exists with different src URI -> error. + msg = ( + "The same instance id will be generated for different " + "external instance source URIs. Please check the form." + f" Instance name: '{i.name}', Existing type: '{prior.type}', " + f"Existing URI: '{prior.src}', Duplicate type: '{i.type}', " + f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'." + ) + raise PyXFormError(msg) + else: + # Instance id exists with same src URI -> ok, don't duplicate. + continue else: # Instance doesn't exist yet -> add it. yield i.instance @@ -786,7 +751,7 @@ def _add_to_nested_dict(self, dicty, path, value): dicty[path[0]] = {} self._add_to_nested_dict(dicty[path[0]], path[1:], value) - def _redirect_is_search_itext(self, element: Question) -> bool: + def _redirect_is_search_itext(self, element: MultipleChoiceQuestion) -> bool: """ For selects using the "search()" function, redirect itext for in-line items. @@ -801,29 +766,29 @@ def _redirect_is_search_itext(self, element: Question) -> bool: :param element: A select type question. :return: If True, the element uses the search function. """ + is_search = False try: - is_search = bool( - SEARCH_FUNCTION_REGEX.search( - element[constants.CONTROL][constants.APPEARANCE] - ) - ) + appearance = element.control[constants.APPEARANCE] + if appearance and len(appearance) > 7: + is_search = bool(SEARCH_FUNCTION_REGEX.search(appearance)) except (KeyError, TypeError): - is_search = False + pass if is_search: - file_id, ext = os.path.splitext(element[constants.ITEMSET]) - if ext in EXTERNAL_INSTANCE_EXTENSIONS: + ext = os.path.splitext(element.itemset)[1] + if ext and ext in EXTERNAL_INSTANCE_EXTENSIONS: msg = ( - f"Question '{element[constants.NAME]}' is a select from file type, " + f"Question '{element.name}' is a select from file type, " "using 'search()'. This combination is not supported. " "Remove the 'search()' usage, or change the select type." ) raise PyXFormError(msg) - if self.choices: - element.children = self.choices.get(element[constants.ITEMSET], None) - element[constants.ITEMSET] = "" - if element.children is not None: - for i, opt in enumerate(element.children): - opt["_choice_itext_id"] = f"{element[constants.LIST_NAME_U]}-{i}" + + element.itemset = "" + itemset = element.choices + if not itemset.used_by_search: + itemset.used_by_search = True + for i, opt in enumerate(itemset.options): + opt._choice_itext_ref = f"jr:itext('{itemset.name}-{i}')" return is_search def _setup_translations(self): @@ -832,58 +797,35 @@ def _setup_translations(self): setup media and itext functions """ - def _setup_choice_translations( - name, choice_value, itext_id - ) -> Generator[tuple[list[str], str], None, None]: - for media_or_lang, value in choice_value.items(): - if isinstance(value, dict): - for language, val in value.items(): - yield ([language, itext_id, media_or_lang], val) - elif name == constants.MEDIA: - yield ([self.default_language, itext_id, media_or_lang], value) - else: - yield ([media_or_lang, itext_id, "long"], value) + def get_choice_content(name, idx, choice): + itext_id = f"{name}-{idx}" - itemsets_multi_language = set() - itemsets_has_media = set() - itemsets_has_dyn_label = set() + choice_label = choice.label + if choice_label: + if isinstance(choice_label, dict): + for lang, value in choice_label.items(): + if isinstance(value, dict): + for language, val in value.items(): + yield ([language, itext_id, lang], val) + else: + yield ([lang, itext_id, "long"], value) + else: + yield ([self.default_language, itext_id, "long"], choice_label) + + choice_media = choice.media + if choice_media: + for media, value in choice_media.items(): + if isinstance(value, dict): + for language, val in value.items(): + yield ([language, itext_id, media], val) + else: + yield ([self.default_language, itext_id, media], value) def get_choices(): - for list_name, choice_list in self.choices.items(): - multi_language = False - has_media = False - dyn_label = False - choices = [] - for idx, choice in enumerate(choice_list): - for col_name, choice_value in choice.items(): - lang_choice = None - if not choice_value: - continue - if col_name == constants.MEDIA: - has_media = True - lang_choice = choice_value - elif col_name == constants.LABEL: - if isinstance(choice_value, dict): - lang_choice = choice_value - multi_language = True - else: - lang_choice = {self.default_language: choice_value} - if is_label_dynamic(choice_value): - dyn_label = True - if lang_choice is not None: - # e.g. (label, {"default": "Yes"}, "consent", 0) - choices.append((col_name, lang_choice, list_name, idx)) - if multi_language or has_media or dyn_label: - if multi_language: - itemsets_multi_language.add(list_name) - if has_media: - itemsets_has_media.add(list_name) - if dyn_label: - itemsets_has_dyn_label.add(list_name) - for c in choices: - yield from _setup_choice_translations( - c[0], c[1], f"{c[2]}-{c[3]}" - ) + for name, itemset in self.choices.items(): + if itemset.requires_itext: + for idx, choice in enumerate(itemset.options): + yield from get_choice_content(name, idx, choice) if self.choices: for path, value in get_choices(): @@ -891,53 +833,40 @@ def get_choices(): leaf_value = {last_path: value, constants.TYPE: constants.CHOICE} self._add_to_nested_dict(self._translations, path, leaf_value) - select_types = set(aliases.select.keys()) search_lists = set() non_search_lists = set() for element in self.iter_descendants( condition=lambda i: isinstance(i, Question | Section) ): if isinstance(element, MultipleChoiceQuestion): - if element.itemset is not None: - element._itemset_multi_language = ( - element.itemset in itemsets_multi_language - ) - element._itemset_has_media = element.itemset in itemsets_has_media - element._itemset_dyn_label = element.itemset in itemsets_has_dyn_label - - if element.type in select_types: - select_ref = (element[constants.NAME], element[constants.LIST_NAME_U]) - if self._redirect_is_search_itext(element=element): - search_lists.add(select_ref) - self._search_lists.add(element[constants.LIST_NAME_U]) - else: - non_search_lists.add(select_ref) - - # Skip creation of translations for choices in selects. The creation of these - # translations is done above in this function. - parent = element.get("parent") - if parent is not None and parent[constants.TYPE] not in select_types: - for d in element.get_translations(self.default_language): - translation_path = d["path"] - form = "long" - - if "guidance_hint" in d["path"]: - translation_path = d["path"].replace("guidance_hint", "hint") - form = "guidance" - - self._translations[d["lang"]][translation_path] = self._translations[ - d["lang"] - ].get(translation_path, {}) - - self._translations[d["lang"]][translation_path].update( - { - form: { - "text": d["text"], - "output_context": d["output_context"], - }, - constants.TYPE: constants.QUESTION, - } - ) + select_ref = (element.name, element.list_name) + if self._redirect_is_search_itext(element=element): + search_lists.add(select_ref) + else: + non_search_lists.add(select_ref) + + # Create translations questions. + for d in element.get_translations(self.default_language): + translation_path = d["path"] + form = "long" + + if "guidance_hint" in d["path"]: + translation_path = d["path"].replace("guidance_hint", "hint") + form = "guidance" + + self._translations[d["lang"]][translation_path] = self._translations[ + d["lang"] + ].get(translation_path, {}) + + self._translations[d["lang"]][translation_path].update( + { + form: { + "text": d["text"], + "output_context": d["output_context"], + }, + constants.TYPE: constants.QUESTION, + } + ) for q_name, list_name in search_lists: choice_refs = [f"'{q}'" for q, c in non_search_lists if c == list_name] @@ -962,7 +891,7 @@ def _add_empty_translations(self): paths = {} for translation in self._translations.values(): for path, content in translation.items(): - paths[path] = paths.get(path, set()).union(content.keys()) + paths[path] = paths.get(path, set()).union(content) for lang in self._translations: for path, content_types in paths.items(): @@ -1128,12 +1057,16 @@ def __unicode__(self): return f"" def _setup_xpath_dictionary(self): + if self._xpath: + return + xpaths = {} for element in self.iter_descendants(lambda i: isinstance(i, Question | Section)): element_name = element.name - if element_name in self._xpath: - self._xpath[element_name] = None + if element_name in xpaths: + xpaths[element_name] = None else: - self._xpath[element_name] = element + xpaths[element_name] = element + self._xpath = xpaths def _var_repl_function( self, matchobj, context, use_current=False, reference_parent=False @@ -1353,7 +1286,7 @@ def print_xform_to_file( warnings.extend(enketo_validate.check_xform(path)) # Warn if one or more translation is missing a valid IANA subtag - translations = self._translations.keys() + translations = self._translations if translations: bad_languages = get_languages_with_bad_tags(translations) if bad_languages: diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index b6c851b1..509228f8 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -35,6 +35,7 @@ ) SURVEY_ELEMENT_EXTRA_FIELDS = ("_survey_element_xpath",) SURVEY_ELEMENT_SLOTS = (*SURVEY_ELEMENT_FIELDS, *SURVEY_ELEMENT_EXTRA_FIELDS) +_SURVEY_ELEMENT_FIELDS_SET = set(SURVEY_ELEMENT_FIELDS) class SurveyElement(Mapping): @@ -58,6 +59,9 @@ def get_slot_names() -> tuple[str, ...]: """Each subclass must provide a list of slots from itself and all parents.""" return SURVEY_ELEMENT_SLOTS + def __bool__(self): + return True + def __len__(self): return len(self.get_slot_names()) @@ -93,7 +97,7 @@ def __init__( if fields is not None: for key in fields: - if key not in SURVEY_ELEMENT_FIELDS: + if key not in _SURVEY_ELEMENT_FIELDS_SET: value = kwargs.pop(key, None) if value or not hasattr(self, key): self[key] = value @@ -295,10 +299,18 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: ] choices = result.pop("choices", None) if choices: - result["choices"] = { - list_name: [o.to_json_dict(delete_keys=("parent",)) for o in options] - for list_name, options in choices.items() - } + if isinstance(choices, dict): + result["choices"] = { + list_name: [ + o.to_json_dict(delete_keys=("parent",)) for o in itemset.options + ] + for list_name, itemset in choices.items() + } + else: + result["children"] = [ + o.to_json_dict(delete_keys=("parent",)) for o in choices.options + ] + # Translation items with "output_context" have circular references. if "_translations" in result: for lang in result["_translations"].values(): diff --git a/pyxform/utils.py b/pyxform/utils.py index e42445a6..f5562ba5 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -11,7 +11,6 @@ from io import StringIO from itertools import chain from json.decoder import JSONDecodeError -from typing import Any from xml.dom import Node from xml.dom.minidom import Element, Text, _write_data @@ -20,15 +19,15 @@ from pyxform import constants as const from pyxform.errors import PyXFormError from pyxform.parsing.expression import parse_expression +from pyxform.xls2json_backends import DefinitionData -SEP = "_" +BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}") INVALID_XFORM_TAG_REGEXP = re.compile(r"[^a-zA-Z:_][^a-zA-Z:_0-9\-.]*") LAST_SAVED_INSTANCE_NAME = "__last-saved" -BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}") -PYXFORM_REFERENCE_REGEX = re.compile(r"\$\{(.*?)\}") NODE_TYPE_TEXT = {Node.TEXT_NODE, Node.CDATA_SECTION_NODE} +PYXFORM_REFERENCE_REGEX = re.compile(r"\$\{(.*?)\}") +SPACE_TRANS_TABLE = str.maketrans({" ": "_"}) XML_TEXT_SUBS = {"&": "&", "<": "<", ">": ">"} -XML_TEXT_SUBS_KEYS = set(XML_TEXT_SUBS) XML_TEXT_TABLE = str.maketrans(XML_TEXT_SUBS) @@ -44,7 +43,7 @@ class DetachableElement(Element): """ def __init__(self, *args, **kwargs): - Element.__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) self.ownerDocument = None def writexml(self, writer, indent="", addindent="", newl=""): @@ -82,7 +81,7 @@ def writexml(self, writer, indent="", addindent="", newl=""): @lru_cache(maxsize=64) def escape_text_for_xml(text: str) -> str: - if any(c in set(text) for c in XML_TEXT_SUBS_KEYS): + if any(c in set(text) for c in XML_TEXT_SUBS): return text.translate(XML_TEXT_TABLE) else: return text @@ -171,7 +170,7 @@ def flatten(li): def external_choices_to_csv( - workbook_dict: dict[str, Any], warnings: list | None = None + workbook_dict: DefinitionData, warnings: list | None = None ) -> str | None: """ Convert the 'external_choices' sheet data to CSV. @@ -180,7 +179,7 @@ def external_choices_to_csv( :param warnings: The conversions warnings list. """ warnings = coalesce(warnings, []) - if const.EXTERNAL_CHOICES not in workbook_dict: + if not workbook_dict.external_choices: warnings.append( f"Could not export itemsets.csv, the '{const.EXTERNAL_CHOICES}' sheet is missing." ) @@ -189,11 +188,11 @@ def external_choices_to_csv( itemsets = StringIO(newline="") csv_writer = csv.writer(itemsets, quoting=csv.QUOTE_ALL) try: - header = workbook_dict["external_choices_header"][0] + header = workbook_dict.external_choices_header[0] except (IndexError, KeyError, TypeError): - header = {k for d in workbook_dict[const.EXTERNAL_CHOICES] for k in d} + header = {k for d in workbook_dict.external_choices for k in d} csv_writer.writerow(header) - for row in workbook_dict[const.EXTERNAL_CHOICES]: + for row in workbook_dict.external_choices: csv_writer.writerow(row.values()) return itemsets.getvalue() @@ -227,13 +226,13 @@ def default_is_dynamic(element_default, element_type=None): * Contains arithmetic operator, including 'div' and 'mod' (except '-' for 'date' type). * Contains brackets, parentheses or braces. """ - if not isinstance(element_default, str): + if not element_default or not isinstance(element_default, str): return False tokens, _ = parse_expression(element_default) for t in tokens: # Data types which are likely to have non-dynamic defaults containing a hyphen. - if element_type in ("date", "dateTime", "geopoint", "geotrace", "geoshape"): + if element_type in {"date", "dateTime", "geopoint", "geotrace", "geoshape"}: # Nested to avoid extra string comparisons if not a relevant data type. if t.name == "OPS_MATH" and t.value == "-": return False @@ -252,23 +251,6 @@ def default_is_dynamic(element_default, element_type=None): return False -def has_dynamic_label(choice_list: "list[dict[str, str]]") -> bool: - """ - If the first or second choice label includes a reference, we must use itext. - - Check the first two choices in case first is something like "Other". - """ - for c in choice_list[:2]: - choice_label = c.get("label") - if ( - choice_label is not None - and isinstance(choice_label, str) - and re.search(BRACKETED_TAG_REGEX, choice_label) is not None - ): - return True - return False - - def levenshtein_distance(a: str, b: str) -> int: """ Calculate Levenshtein distance between two strings. diff --git a/pyxform/validators/pyxform/choices.py b/pyxform/validators/pyxform/choices.py index 3b347c35..3e2ab55c 100644 --- a/pyxform/validators/pyxform/choices.py +++ b/pyxform/validators/pyxform/choices.py @@ -26,7 +26,7 @@ def validate_headers( headers: tuple[tuple[str, ...], ...], warnings: list[str] -) -> list[str]: +) -> tuple[str, ...]: def check(): for header in headers: header = header[0] @@ -34,7 +34,7 @@ def check(): warnings.append(INVALID_HEADER.format(column=header)) yield header - return list(check()) + return tuple(check()) def validate_choice_list( @@ -43,28 +43,38 @@ def validate_choice_list( seen_options = set() duplicate_errors = [] for option in options: - if "name" not in option: + if constants.NAME not in option: raise PyXFormError(INVALID_NAME.format(row=option["__row"])) - elif "label" not in option: + elif constants.LABEL not in option: warnings.append(INVALID_LABEL.format(row=option["__row"])) if not allow_duplicates: - name = option["name"] + name = option[constants.NAME] if name in seen_options: duplicate_errors.append(INVALID_DUPLICATE.format(row=option["__row"])) else: seen_options.add(name) - if 0 < len(duplicate_errors): + if duplicate_errors: raise PyXFormError("\n".join(duplicate_errors)) -def validate_choices( +def validate_and_clean_choices( choices: dict[str, list[dict]], warnings: list[str], headers: tuple[tuple[str, ...], ...], allow_duplicates: bool = False, -): +) -> dict[str, list[dict]]: + """ + Warn about invalid or duplicate choices, and remove choices with invalid headers. + + Choices columns are output as XML elements so they must be valid XML tags. + + :param choices: Choices data from the XLSForm. + :param warnings: Warnings list. + :param headers: choices data headers i.e. unique dict keys. + :param allow_duplicates: If True, duplicate choice names are allowed in the XLSForm. + """ invalid_headers = validate_headers(headers, warnings) for options in choices.values(): validate_choice_list( @@ -75,4 +85,5 @@ def validate_choices( for option in options: for invalid_header in invalid_headers: option.pop(invalid_header, None) - del option["__row"] + option.pop("__row", None) + return choices diff --git a/pyxform/validators/pyxform/parameters_generic.py b/pyxform/validators/pyxform/parameters_generic.py index cd95843d..a0524d9a 100644 --- a/pyxform/validators/pyxform/parameters_generic.py +++ b/pyxform/validators/pyxform/parameters_generic.py @@ -37,7 +37,7 @@ def validate( """ Raise an error if 'parameters' includes any keys not named in 'allowed'. """ - extras = set(parameters.keys()) - (set(allowed)) + extras = set(parameters) - (set(allowed)) if 0 < len(extras): msg = ( "Accepted parameters are '{a}'. " diff --git a/pyxform/validators/pyxform/pyxform_reference.py b/pyxform/validators/pyxform/pyxform_reference.py index a1b02783..45c6c60a 100644 --- a/pyxform/validators/pyxform/pyxform_reference.py +++ b/pyxform/validators/pyxform/pyxform_reference.py @@ -38,7 +38,7 @@ def validate_pyxform_reference_syntax( continue elif t.name == "PYXFORM_REF_END": start_token = None - elif t.name in ("PYXFORM_REF_START", "PYXFORM_REF"): + elif t.name in {"PYXFORM_REF_START", "PYXFORM_REF"}: msg = PYXFORM_REFERENCE_INVALID.format( sheet=sheet_name, row_number=row_number, column=key ) diff --git a/pyxform/validators/pyxform/sheet_misspellings.py b/pyxform/validators/pyxform/sheet_misspellings.py index c83fef31..06e3851b 100644 --- a/pyxform/validators/pyxform/sheet_misspellings.py +++ b/pyxform/validators/pyxform/sheet_misspellings.py @@ -1,10 +1,10 @@ -from collections.abc import KeysView +from collections.abc import Iterable from pyxform import constants from pyxform.utils import levenshtein_distance -def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None": +def find_sheet_misspellings(key: str, keys: Iterable) -> "str | None": """ Find possible sheet name misspellings to warn the user about. @@ -15,6 +15,8 @@ def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None": :param key: The sheet name to look for. :param keys: The workbook sheet names. """ + if not keys: + return None candidates = tuple( _k # thanks to black for _k in keys diff --git a/pyxform/validators/pyxform/translations_checks.py b/pyxform/validators/pyxform/translations_checks.py index b74d2b36..043b6742 100644 --- a/pyxform/validators/pyxform/translations_checks.py +++ b/pyxform/validators/pyxform/translations_checks.py @@ -33,7 +33,7 @@ def format_missing_translations_msg( def get_sheet_msg(name, sheet): if sheet is not None: - langs = sorted(sheet.keys()) + langs = sorted(sheet) if 0 < len(langs): lang_msgs = [] for lang in langs: @@ -94,8 +94,10 @@ def _find_translations( self, sheet_data: "SheetData", translatable_columns: dict[str, str] ): def process_header(head): - if head[0] in translatable_columns.keys(): + if head[0] in translatable_columns: name = translatable_columns[head[0]] + if isinstance(name, tuple): + name = head[0] if len(head) == 1: self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name) elif len(head) == 2: @@ -103,7 +105,7 @@ def process_header(head): self.columns_seen.add(name) for header in sheet_data: - if 1 < len(header) and header[0] in (const.MEDIA, const.BIND): + if 1 < len(header) and header[0] in {const.MEDIA, const.BIND}: process_header(head=header[1:]) else: process_header(head=header) diff --git a/pyxform/validators/updater.py b/pyxform/validators/updater.py index e47cbb2c..bd5df1b3 100644 --- a/pyxform/validators/updater.py +++ b/pyxform/validators/updater.py @@ -302,9 +302,9 @@ def _unzip_find_jobs(open_zip_file, bin_paths, out_path): if maybe_existing_match.CRC == zip_item.CRC: continue zip_jobs[file_out_path] = zip_item - if len(bin_paths) != len(zip_jobs.keys()): + if len(bin_paths) != len(zip_jobs): raise PyXFormError( - f"Expected {len(bin_paths)} zip job files, found: {len(zip_jobs.keys())}" + f"Expected {len(bin_paths)} zip job files, found: {len(zip_jobs)}" ) return zip_jobs diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 6d64ded3..f5cc62b3 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -6,7 +6,7 @@ import os import re import sys -from itertools import chain +from collections.abc import Sequence from typing import IO, Any from pyxform import aliases, constants @@ -22,19 +22,44 @@ ) from pyxform.errors import PyXFormError from pyxform.parsing.expression import is_pyxform_reference, is_xml_tag -from pyxform.utils import PYXFORM_REFERENCE_REGEX, coalesce, default_is_dynamic -from pyxform.validators.pyxform import choices as vc +from pyxform.parsing.sheet_headers import dealias_and_group_headers +from pyxform.utils import ( + PYXFORM_REFERENCE_REGEX, + coalesce, + default_is_dynamic, +) from pyxform.validators.pyxform import parameters_generic, select_from_file from pyxform.validators.pyxform import question_types as qt from pyxform.validators.pyxform.android_package_name import validate_android_package_name +from pyxform.validators.pyxform.choices import validate_and_clean_choices from pyxform.validators.pyxform.pyxform_reference import validate_pyxform_reference_syntax from pyxform.validators.pyxform.sheet_misspellings import find_sheet_misspellings from pyxform.validators.pyxform.translations_checks import SheetTranslations -from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict +from pyxform.xls2json_backends import ( + RE_WHITESPACE, + DefinitionData, + get_xlsform, +) SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'} RE_SMART_QUOTES = re.compile(r"|".join(re.escape(old) for old in SMART_QUOTES)) -RE_WHITESPACE = re.compile(r"( )+") +RE_BEGIN_CONTROL = re.compile( + r"^(?Pbegin)(\s|_)(?P(" + + "|".join(aliases.control) + + r"))( (over )?(?P\S+))?$" +) +RE_END_CONTROL = re.compile( + r"^(?Pend)(\s|_)(?P(" + "|".join(aliases.control) + r"))$" +) +RE_SELECT = re.compile( + r"^(?P(" + + "|".join(aliases.select) + + r")) (?P\S+)" + + "( (?P(or specify other|or_other|or other)))?$" +) +RE_OSM = re.compile( + r"(?P(" + "|".join(aliases.osm) + r")) (?P\S+)" +) def print_pyobj_to_json(pyobj, path=None): @@ -49,120 +74,6 @@ def print_pyobj_to_json(pyobj, path=None): sys.stdout.write(json.dumps(pyobj, ensure_ascii=False, indent=4)) -def merge_dicts(dict_a, dict_b, default_key="default"): - """ - Recursively merge two nested dicts into a single dict. - When keys match their values are merged using - a recursive call to this function, - otherwise they are just added to the output dict. - """ - if not dict_a: - return dict_b - if not dict_b: - return dict_a - - if not isinstance(dict_a, dict): - if default_key in dict_b: - return dict_b - dict_a = {default_key: dict_a} - if not isinstance(dict_b, dict): - if default_key in dict_a: - return dict_a - dict_b = {default_key: dict_b} - - # Union keys but retain order (as opposed to set()), preferencing dict_a then dict_b. - # E.g. {"a": 1, "b": 2} + {"c": 3, "a": 4} -> {"a": None, "b": None, "c": None} - all_keys = {k: None for k in (chain(dict_a.keys(), dict_b.keys()))} - - out_dict = {} - for key in all_keys.keys(): - out_dict[key] = merge_dicts(dict_a.get(key), dict_b.get(key), default_key) - return out_dict - - -def list_to_nested_dict(lst): - """ - [1,2,3,4] -> {1:{2:{3:4}}} - """ - if len(lst) > 1: - return {lst[0]: list_to_nested_dict(lst[1:])} - else: - return lst[0] - - -class DealiasAndGroupHeadersResult: - __slots__ = ("headers", "data") - - def __init__(self, headers: tuple[tuple[str, ...], ...], data: list[dict]): - """ - :param headers: Distinct headers seen in the sheet, parsed / split if applicable. - :param data: Sheet data rows, in grouped dict format. - """ - self.headers: tuple[tuple[str, ...], ...] = headers - self.data: list[dict] = data - - -def dealias_and_group_headers( - dict_array: list[dict], - header_aliases: dict[str, str], - use_double_colons: bool, - default_language: str = constants.DEFAULT_LANGUAGE_VALUE, - ignore_case: bool = False, -) -> DealiasAndGroupHeadersResult: - """ - For each row in the worksheet, group all keys that contain a double colon. - So - {"text::english": "hello", "text::french" : "bonjour"} - becomes - {"text": {"english": "hello", "french" : "bonjour"}. - Dealiasing is done to the first token - (the first term separated by the delimiter). - default_language -- used to group labels/hints/etc - without a language specified with localized versions. - """ - group_delimiter = "::" - out_dict_array = [] - seen_headers = {} - for row in dict_array: - out_row = {} - for header, val in row.items(): - if ignore_case: - header = header.lower() - - if use_double_colons: - tokens = [t.strip() for t in header.split(group_delimiter)] - - # else: - # We do the initial parse using single colons - # for backwards compatibility and - # only the first single is used - # in order to avoid nesting jr:something tokens. - # if len(tokens) > 1: - # tokens[1:] = [u":".join(tokens[1:])] - else: - # I think the commented out section above - # break if there is something like media:image:english - # so maybe a better backwards compatibility hack - # is to join any jr token with the next token - tokens = [t.strip() for t in header.split(":")] - if "jr" in tokens: - jr_idx = tokens.index("jr") - tokens[jr_idx] = ":".join(tokens[jr_idx : jr_idx + 2]) - tokens.pop(jr_idx + 1) - - dealiased_first_token = header_aliases.get(tokens[0], tokens[0]) - tokens = dealiased_first_token.split(group_delimiter) + tokens[1:] - new_key = tokens[0] - new_value = list_to_nested_dict(tokens[1:] + [val]) - out_row = merge_dicts(out_row, {new_key: new_value}, default_language) - seen_headers[tuple(tokens)] = None - - out_dict_array.append(out_row) - return DealiasAndGroupHeadersResult( - headers=tuple(seen_headers.keys()), data=out_dict_array - ) - - def dealias_types(dict_array): """ Look at all the type values in a dict array and if any aliases are found, @@ -170,24 +81,24 @@ def dealias_types(dict_array): """ for row in dict_array: found_type = row.get(constants.TYPE) - if found_type in aliases._type_alias_map.keys(): + if found_type in aliases._type_alias_map: row[constants.TYPE] = aliases._type_alias_map[found_type] return dict_array def clean_text_values( sheet_name: str, - data: list[dict], + data: Sequence[dict], strip_whitespace: bool = False, add_row_number: bool = False, -) -> list[dict]: +) -> Sequence[dict]: """ Go though the dict array and strips all text values. Also replaces multiple spaces with single spaces. """ for row_number, row in enumerate(data, start=2): for key, value in row.items(): - if isinstance(value, str): + if isinstance(value, str) and value: # Remove extraneous whitespace characters. if strip_whitespace: value = RE_WHITESPACE.sub(" ", value.strip()) @@ -223,21 +134,6 @@ def group_dictionaries_by_key(list_of_dicts, key, remove_key=True): return dict_of_lists -def has_double_colon(workbook_dict) -> bool: - """ - Look for a column header with a doublecolon (::) and - return true if one is found. - """ - for sheet in workbook_dict.values(): - for row in sheet: - for column_header in row.keys(): - if not isinstance(column_header, str): - continue - if "::" in column_header: - return True - return False - - def add_flat_annotations(prompt_list, parent_relevant="", name_prefix=""): """ This is a helper function for generating flat instances @@ -373,7 +269,7 @@ def add_choices_info_to_question( def workbook_to_json( - workbook_dict, + workbook_dict: DefinitionData, form_name: str | None = None, fallback_form_name: str | None = None, default_language: str | None = None, @@ -398,88 +294,75 @@ def workbook_to_json( json form spec. """ warnings = coalesce(warnings, []) - is_valid = False - # Sheet names should be case-insensitive - workbook_dict = {x.lower(): y for x, y in workbook_dict.items()} - workbook_keys = workbook_dict.keys() - if constants.SURVEY not in workbook_dict: + sheet_names = workbook_dict.sheet_names + if not workbook_dict.survey and not workbook_dict.survey_header: msg = f"You must have a sheet named '{constants.SURVEY}'. " - similar = find_sheet_misspellings(key=constants.SURVEY, keys=workbook_keys) + similar = find_sheet_misspellings(key=constants.SURVEY, keys=sheet_names) if similar is not None: msg += similar raise PyXFormError(msg) - # ensure required headers are present - for row in workbook_dict.get(constants.SURVEY, []): - is_valid = "type" in [z.lower() for z in row] - if is_valid: - break - if not is_valid: - # TODO - could we state what headers are missing? - raise PyXFormError( - "The survey sheet is either empty or missing important column headers." - ) - # Make sure the passed in vars are unicode form_name = str(coalesce(form_name, constants.DEFAULT_FORM_NAME)) default_language = str(coalesce(default_language, constants.DEFAULT_LANGUAGE_VALUE)) - # We check for double columns to determine whether to use them - # or single colons to delimit grouped headers. - # Single colons are bad because they conflict with with the xform namespace - # syntax (i.e. jr:constraintMsg), - # so we only use them if we have to for backwards compatibility. - use_double_colons = has_double_colon(workbook_dict) - # Break the spreadsheet dict into easier to access objects # (settings, choices, survey_sheet): + # ########## Settings sheet ########## - k = constants.SETTINGS - if k not in workbook_dict: - similar = find_sheet_misspellings(key=k, keys=workbook_keys) + settings = {} + if workbook_dict.settings: + settings_sheet_headers = workbook_dict.settings_header or [] + settings_sheet = workbook_dict.settings or [] + try: + if ( + sum( + [ + element in {constants.ID_STRING, "form_id"} + for element in settings_sheet_headers[0] + ] + ) + == 2 + ): + settings_sheet_headers[0].pop(constants.ID_STRING, None) + settings_sheet[0].pop(constants.ID_STRING, None) + warnings.append( + "The form_id and id_string column headers are both" + " specified in the settings sheet provided." + " This may cause errors during conversion." + " In future, its best to avoid specifying both" + " column headers in the settings sheet." + ) + except IndexError: # In case there is no settings sheet + pass + + from pyxform.survey import Survey + + settings_sheet = dealias_and_group_headers( + sheet_name=constants.SETTINGS, + sheet_data=settings_sheet, + sheet_header=settings_sheet_headers, + header_aliases=aliases.settings_header, + header_columns=set(Survey.get_slot_names()), + ) + settings = clean_text_values( + sheet_name=constants.SETTINGS, data=[settings_sheet.data[0]] + )[0] + else: + similar = find_sheet_misspellings(key=constants.SETTINGS, keys=sheet_names) if similar is not None: warnings.append(similar + _MSG_SUPPRESS_SPELLING) - settings_sheet_headers = workbook_dict.get(constants.SETTINGS, []) - try: - if ( - sum( - [ - element in [constants.ID_STRING, "form_id"] - for element in settings_sheet_headers[0].keys() - ] - ) - == 2 - ): - settings_sheet_headers[0].pop(constants.ID_STRING, None) - warnings.append( - "The form_id and id_sting column headers are both" - " specified in the settings sheet provided." - " This may cause errors during conversion." - " In future, its best to avoid specifying both" - " column headers in the settings sheet." - ) - except IndexError: # In case there is no settings sheet - settings_sheet_headers = [] - settings_sheet = dealias_and_group_headers( - dict_array=settings_sheet_headers, - header_aliases=aliases.settings_header, - use_double_colons=use_double_colons, - ) - settings = settings_sheet.data[0] if len(settings_sheet.data) > 0 else {} - settings = clean_text_values(sheet_name=constants.SETTINGS, data=[settings])[0] clean_text_values_enabled = aliases.yes_no.get( - settings.get("clean_text_values", "true()") + settings.get("clean_text_values", "yes"), True ) - default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language) - # add_none_option is a boolean that when true, # indicates a none option should automatically be added to selects. # It should probably be deprecated but I haven't checked yet. if "add_none_option" in settings: settings["add_none_option"] = aliases.yes_no.get( - settings["add_none_option"], False + settings.get("add_none_option", "no"), False ) # Here we create our json dict root with default settings: @@ -502,104 +385,144 @@ def workbook_to_json( } # Here the default settings are overridden by those in the settings sheet json_dict.update(settings) + from pyxform.question import Option + + option_fields = set(Option.get_slot_names()) # ########## External Choices sheet ########## - external_choices_sheet = workbook_dict.get(constants.EXTERNAL_CHOICES, []) - external_choices_sheet = clean_text_values( - sheet_name=constants.EXTERNAL_CHOICES, data=external_choices_sheet - ) - external_choices_sheet = dealias_and_group_headers( - dict_array=external_choices_sheet, - header_aliases=aliases.list_header, - use_double_colons=use_double_colons, - default_language=default_language, - ) - external_choices = group_dictionaries_by_key( - list_of_dicts=external_choices_sheet.data, key=constants.LIST_NAME_S - ) + external_choices = workbook_dict.external_choices + if external_choices: + external_choices = clean_text_values( + sheet_name=constants.EXTERNAL_CHOICES, data=external_choices + ) + external_choices = dealias_and_group_headers( + sheet_name=constants.EXTERNAL_CHOICES, + sheet_data=external_choices, + sheet_header=workbook_dict.external_choices_header, + header_aliases=aliases.list_header, + header_columns=option_fields, + default_language=default_language, + ) + external_choices = group_dictionaries_by_key( + list_of_dicts=external_choices.data, key=constants.LIST_NAME_S + ) # ########## Choices sheet ########## - choices_sheet = workbook_dict.get(constants.CHOICES, []) - choices_sheet = clean_text_values( - sheet_name=constants.CHOICES, - data=choices_sheet, - add_row_number=True, - ) - choices_sheet = dealias_and_group_headers( - dict_array=choices_sheet, - header_aliases=aliases.list_header, - use_double_colons=use_double_colons, - default_language=default_language, - ) - choices = group_dictionaries_by_key( - list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S - ) - # To combine the warning into one message, the check for missing choices translation - # columns is run with Survey sheet below. - - # Warn and remove invalid headers in case the form uses headers for notes. - allow_duplicates = aliases.yes_no.get( - settings.get("allow_choice_duplicates", False), False - ) - vc.validate_choices( - choices=choices, - warnings=warnings, - headers=choices_sheet.headers, - allow_duplicates=allow_duplicates, - ) - - if 0 < len(choices): - json_dict[constants.CHOICES] = choices + choices_sheet = workbook_dict.choices + choices = {} + if choices_sheet: + if clean_text_values_enabled: + choices_sheet = clean_text_values( + sheet_name=constants.CHOICES, + data=choices_sheet, + add_row_number=True, + ) + choices_sheet = dealias_and_group_headers( + sheet_name=constants.CHOICES, + sheet_data=choices_sheet, + sheet_header=workbook_dict.choices_header, + header_aliases=aliases.list_header, + header_columns=option_fields, + headers_required={constants.NAME}, + default_language=default_language, + ) + choices = group_dictionaries_by_key( + list_of_dicts=choices_sheet.data, key=constants.LIST_NAME_S + ) + # To combine the warning into one message, the check for missing choices translation + # columns is run with Survey sheet below. + + # Warn and remove invalid headers in case the form uses headers for notes. + choices = validate_and_clean_choices( + choices=choices, + warnings=warnings, + headers=choices_sheet.headers, + allow_duplicates=aliases.yes_no.get( + settings.get("allow_choice_duplicates", "no"), False + ), + ) + if choices: + json_dict[constants.CHOICES] = choices # ########## Entities sheet ########### - entities_sheet = workbook_dict.get(constants.ENTITIES, []) - entities_sheet = clean_text_values(sheet_name=constants.ENTITIES, data=entities_sheet) - entities_sheet = dealias_and_group_headers( - dict_array=entities_sheet, - header_aliases=aliases.entities_header, - use_double_colons=False, - ) - entity_declaration = get_entity_declaration( - entities_sheet=entities_sheet.data, workbook_dict=workbook_dict, warnings=warnings - ) + entity_declaration = None + if workbook_dict.entities: + entities_sheet = clean_text_values( + sheet_name=constants.ENTITIES, data=workbook_dict.entities + ) + from pyxform.entities.entity_declaration import EntityDeclaration + + entities_sheet = dealias_and_group_headers( + sheet_name=constants.ENTITIES, + sheet_data=entities_sheet, + sheet_header=workbook_dict.entities_header, + header_aliases=aliases.entities_header, + # Entities treat some actual columns as if they are parameters. + header_columns={ + *EntityDeclaration.get_slot_names(), + *(i.value for i in constants.EntityColumns.value_list()), + }, + ) + entity_declaration = get_entity_declaration(entities_sheet=entities_sheet.data) + else: + similar = find_sheet_misspellings(key=constants.ENTITIES, keys=sheet_names) + if similar is not None: + warnings.append(similar + constants._MSG_SUPPRESS_SPELLING) # ########## Survey sheet ########### - survey_sheet = workbook_dict[constants.SURVEY] + survey_sheet = workbook_dict.survey # Process the headers: if clean_text_values_enabled: survey_sheet = clean_text_values( - sheet_name=constants.SURVEY, data=survey_sheet, strip_whitespace=True + sheet_name=constants.SURVEY, data=workbook_dict.survey, strip_whitespace=True ) + from pyxform.question import MultipleChoiceQuestion + survey_sheet = dealias_and_group_headers( - dict_array=survey_sheet, + sheet_name=constants.SURVEY, + sheet_data=survey_sheet, + sheet_header=workbook_dict.survey_header, header_aliases=aliases.survey_header, - use_double_colons=use_double_colons, + header_columns=set(MultipleChoiceQuestion.get_slot_names()), + headers_required={constants.TYPE}, default_language=default_language, ) survey_sheet.data = dealias_types(dict_array=survey_sheet.data) # Check for missing translations. The choices sheet is checked here so that the # warning can be combined into one message. + if not choices_sheet: + choices_headers = () + else: + choices_headers = choices_sheet.headers sheet_translations = SheetTranslations( survey_sheet=survey_sheet.headers, - choices_sheet=choices_sheet.headers, + choices_sheet=choices_headers, ) sheet_translations.missing_check(warnings=warnings) + # ########## OSM sheet ########### # No spell check for OSM sheet (infrequently used, many spurious matches). - osm_sheet = dealias_and_group_headers( - dict_array=workbook_dict.get(constants.OSM, []), - header_aliases=aliases.list_header, - use_double_colons=True, - ) - osm_tags = group_dictionaries_by_key( - list_of_dicts=osm_sheet.data, key=constants.LIST_NAME_S - ) + osm_tags = None + if workbook_dict.osm: + osm_sheet = dealias_and_group_headers( + sheet_data=workbook_dict.osm, + sheet_name=constants.OSM, + sheet_header=workbook_dict.osm_header, + header_aliases=aliases.list_header, + header_columns=option_fields, + ) + osm_tags = group_dictionaries_by_key( + list_of_dicts=osm_sheet.data, key=constants.LIST_NAME_S + ) + + # Clear references to original data for garbage collection. + del workbook_dict # ################################# # Parse the survey sheet while generating a survey in our json format: # A stack is used to keep track of begin/end expressions - stack = [ + stack: list[dict[str, Any]] = [ { "control_type": None, "control_name": None, @@ -610,28 +533,8 @@ def workbook_to_json( # this will be set to the name of the list table_list = None - # For efficiency we compile all the regular expressions - # that will be used to parse types: - end_control_regex = re.compile( - r"^(?Pend)(\s|_)(?P(" + "|".join(aliases.control.keys()) + r"))$" - ) - begin_control_regex = re.compile( - r"^(?Pbegin)(\s|_)(?P(" - + "|".join(aliases.control.keys()) - + r"))( (over )?(?P\S+))?$" - ) - select_regexp = re.compile( - r"^(?P(" - + "|".join(aliases.select.keys()) - + r")) (?P\S+)" - + "( (?P(or specify other|or_other|or other)))?$" - ) - osm_regexp = re.compile( - r"(?P(" + "|".join(aliases.osm.keys()) + r")) (?P\S+)" - ) - # Rows from the survey sheet that should be nested in meta - survey_meta = [] + meta_children = [] # To check that questions with triggers refer to other questions that exist. question_names = set() trigger_references = [] @@ -657,7 +560,7 @@ def workbook_to_json( continue # skip empty rows - if len(row) == 0: + if not row: continue # Get question type @@ -683,7 +586,7 @@ def workbook_to_json( # Pull out questions that will go in meta block if question_type == "audit": # Force audit name to always be "audit" to follow XForms spec - if "name" in row and row["name"] not in [None, "", "audit"]: + if "name" in row and row["name"] not in {None, "", "audit"}: raise PyXFormError( ROW_FORMAT_STRING % row_number + " Audits must always be named 'audit.'" @@ -704,7 +607,7 @@ def workbook_to_json( ), ) - if constants.TRACK_CHANGES in parameters.keys(): + if constants.TRACK_CHANGES in parameters: if ( parameters[constants.TRACK_CHANGES] != "true" and parameters[constants.TRACK_CHANGES] != "false" @@ -724,7 +627,7 @@ def workbook_to_json( } ) - if constants.TRACK_CHANGES_REASONS in parameters.keys(): + if constants.TRACK_CHANGES_REASONS in parameters: if parameters[constants.TRACK_CHANGES_REASONS] != "on-form-edit": raise PyXFormError( constants.TRACK_CHANGES_REASONS + " must be set to on-form-edit" @@ -735,7 +638,7 @@ def workbook_to_json( {"odk:" + constants.TRACK_CHANGES_REASONS: "on-form-edit"} ) - if constants.IDENTIFY_USER in parameters.keys(): + if constants.IDENTIFY_USER in parameters: if ( parameters[constants.IDENTIFY_USER] != "true" and parameters[constants.IDENTIFY_USER] != "false" @@ -755,19 +658,19 @@ def workbook_to_json( } ) - location_parameters = ( + location_parameters = { constants.LOCATION_PRIORITY, constants.LOCATION_MIN_INTERVAL, constants.LOCATION_MAX_AGE, - ) - if any(k in parameters.keys() for k in location_parameters): - if all(k in parameters.keys() for k in location_parameters): - if parameters[constants.LOCATION_PRIORITY] not in [ + } + if any(k in parameters for k in location_parameters): + if all(k in parameters for k in location_parameters): + if parameters[constants.LOCATION_PRIORITY] not in { "no-power", "low-power", "balanced", "high-accuracy", - ]: + }: msg = ( f"Parameter {constants.LOCATION_PRIORITY} must be set to " "no-power, low-power, balanced, or high-accuracy:" @@ -843,7 +746,7 @@ def workbook_to_json( + " parameters." ) - survey_meta.append(new_dict) + meta_children.append(new_dict) continue if question_type == "calculate": @@ -872,7 +775,7 @@ def workbook_to_json( # Try to parse question as a end control statement # (i.e. end loop/repeat/group): - end_control_parse = end_control_regex.search(question_type) + end_control_parse = RE_END_CONTROL.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: @@ -914,11 +817,11 @@ def workbook_to_json( ) in_repeat = any(ancestor["control_type"] == "repeat" for ancestor in stack) - validate_entity_saveto(row, row_number, entity_declaration, in_repeat) + validate_entity_saveto(row, row_number, in_repeat, entity_declaration) # Try to parse question as begin control statement # (i.e. begin loop/repeat/group): - begin_control_parse = begin_control_regex.search(question_type) + begin_control_parse = RE_BEGIN_CONTROL.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: @@ -983,7 +886,7 @@ def workbook_to_json( if repeat_count_expression: # Simple expressions don't require a new node, they can reference directly. if not is_pyxform_reference(value=repeat_count_expression): - generated_node_name = new_json_dict["name"] + "_count" + generated_node_name = f"""{new_json_dict["name"]}_count""" parent_children_array.append( { "name": generated_node_name, @@ -996,7 +899,7 @@ def workbook_to_json( ) # This re-directs the body/repeat ref to the above generated node. new_json_dict["control"]["jr:count"] = ( - "${" + generated_node_name + "}" + f"${{{generated_node_name}}}" ) # Code to deal with table_list appearance flags @@ -1050,7 +953,7 @@ def workbook_to_json( question_names.add(question_name) # Try to parse question as a select: - select_parse = select_regexp.search(question_type) + select_parse = RE_SELECT.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): @@ -1065,14 +968,11 @@ def workbook_to_json( ) list_name = parse_dict[constants.LIST_NAME_U] file_extension = os.path.splitext(list_name)[1] - if ( - select_type == constants.SELECT_ONE_EXTERNAL - and list_name not in external_choices - ): + if select_type == constants.SELECT_ONE_EXTERNAL: if not external_choices: k = constants.EXTERNAL_CHOICES msg = "There should be an external_choices sheet in this xlsform." - similar = find_sheet_misspellings(key=k, keys=workbook_keys) + similar = find_sheet_misspellings(key=k, keys=sheet_names) if similar is not None: msg = msg + " " + similar raise PyXFormError( @@ -1080,11 +980,12 @@ def workbook_to_json( + " Please ensure that the external_choices sheet has columns" " 'list name', and 'name'." ) - raise PyXFormError( - ROW_FORMAT_STRING % row_number - + "List name not in external choices sheet: " - + list_name - ) + if list_name not in external_choices: + raise PyXFormError( + ROW_FORMAT_STRING % row_number + + "List name not in external choices sheet: " + + list_name + ) select_from_file.validate_list_name_extension( select_command=parse_dict["select_command"], list_name=list_name, @@ -1099,11 +1000,11 @@ def workbook_to_json( if not choices: k = constants.CHOICES msg = "There should be a choices sheet in this xlsform." - similar = find_sheet_misspellings(key=k, keys=workbook_keys) + similar = find_sheet_misspellings(key=k, keys=sheet_names) if similar is not None: - msg = msg + " " + similar + msg = f"{msg} {similar}" raise PyXFormError( - msg + " Please ensure that the choices sheet has the" + f"{msg} Please ensure that the choices sheet has the" " mandatory columns 'list_name', 'name', and 'label'." ) raise PyXFormError( @@ -1188,10 +1089,10 @@ def workbook_to_json( new_json_dict[constants.TYPE] = select_type select_params_allowed = ["randomize", "seed"] - if parse_dict["select_command"] in ( + if parse_dict["select_command"] in { "select_one_from_file", "select_multiple_from_file", - ): + }: select_params_allowed += ["value", "label"] # Look at parameters column for select parameters @@ -1199,7 +1100,7 @@ def workbook_to_json( parameters=parameters, allowed=select_params_allowed ) - if "randomize" in parameters.keys(): + if "randomize" in parameters: if ( parameters["randomize"] != "true" and parameters["randomize"] != "false" @@ -1209,7 +1110,7 @@ def workbook_to_json( f"""'{parameters["randomize"]}' is an invalid value""" ) - if "seed" in parameters.keys(): + if "seed" in parameters: if not parameters["seed"].startswith("${"): try: float(parameters["seed"]) @@ -1217,18 +1118,18 @@ def workbook_to_json( raise PyXFormError( "seed value must be a number or a reference to another field." ) from seed_err - elif "seed" in parameters.keys(): + elif "seed" in parameters: raise PyXFormError( "Parameters must include randomize=true to use a seed." ) - if "value" in parameters.keys(): + if "value" in parameters: select_from_file.value_or_label_check( name="value", value=parameters["value"], row_number=row_number, ) - if "label" in parameters.keys(): + if "label" in parameters: select_from_file.value_or_label_check( name="label", value=parameters["label"], @@ -1287,13 +1188,13 @@ def workbook_to_json( continue # Try to parse question as osm: - osm_parse = osm_regexp.search(question_type) + osm_parse = RE_OSM.search(question_type) if osm_parse: parse_dict = osm_parse.groupdict() new_dict = row.copy() new_dict["type"] = constants.OSM - if parse_dict.get(constants.LIST_NAME_U) is not None: + if osm_tags and parse_dict.get(constants.LIST_NAME_U) is not None: tags = osm_tags.get(parse_dict.get(constants.LIST_NAME_U)) for tag in tags: if osm_tags.get(tag.get("name")): @@ -1314,7 +1215,7 @@ def workbook_to_json( new_dict = row.copy() parameters_generic.validate(parameters=parameters, allowed=("rows",)) - if "rows" in parameters.keys(): + if "rows" in parameters: try: int(parameters["rows"]) except ValueError as rows_err: @@ -1341,7 +1242,7 @@ def workbook_to_json( "app", ), ) - if "max-pixels" in parameters.keys(): + if "max-pixels" in parameters: try: int(parameters["max-pixels"]) except ValueError as mp_err: @@ -1357,7 +1258,7 @@ def workbook_to_json( + " Use the max-pixels parameter to speed up submission sending and save storage space. Learn more: https://xlsform.org/#image" ) - if "app" in parameters.keys(): + if "app" in parameters: appearance = row.get("control", {}).get("appearance") if appearance is None or appearance == "annotate": app_package_name = str(parameters["app"]) @@ -1377,13 +1278,13 @@ def workbook_to_json( new_dict = row.copy() parameters_generic.validate(parameters=parameters, allowed=("quality",)) - if "quality" in parameters.keys(): - if parameters["quality"] not in [ + if "quality" in parameters: + if parameters["quality"] not in { constants.AUDIO_QUALITY_VOICE_ONLY, constants.AUDIO_QUALITY_LOW, constants.AUDIO_QUALITY_NORMAL, constants.AUDIO_QUALITY_EXTERNAL, - ]: + }: raise PyXFormError("Invalid value for quality.") new_dict["bind"] = new_dict.get("bind", {}) @@ -1396,12 +1297,12 @@ def workbook_to_json( new_dict = row.copy() parameters_generic.validate(parameters=parameters, allowed=("quality",)) - if "quality" in parameters.keys(): - if parameters["quality"] not in [ + if "quality" in parameters: + if parameters["quality"] not in { constants.AUDIO_QUALITY_VOICE_ONLY, constants.AUDIO_QUALITY_LOW, constants.AUDIO_QUALITY_NORMAL, - ]: + }: raise PyXFormError("Invalid value for quality.") new_dict["action"] = new_dict.get("action", {}) @@ -1410,7 +1311,7 @@ def workbook_to_json( parent_children_array.append(new_dict) continue - if question_type in ["geopoint", "geoshape", "geotrace"]: + if question_type in {"geopoint", "geoshape", "geotrace"}: new_dict = row.copy() if question_type == "geopoint": @@ -1427,8 +1328,8 @@ def workbook_to_json( parameters=parameters, allowed=("allow-mock-accuracy",) ) - if "allow-mock-accuracy" in parameters.keys(): - if parameters["allow-mock-accuracy"] not in ["true", "false"]: + if "allow-mock-accuracy" in parameters: + if parameters["allow-mock-accuracy"] not in {"true", "false"}: raise PyXFormError("Invalid value for allow-mock-accuracy.") new_dict["bind"] = new_dict.get("bind", {}) @@ -1437,7 +1338,7 @@ def workbook_to_json( ) new_dict["control"] = new_dict.get("control", {}) - if "capture-accuracy" in parameters.keys(): + if "capture-accuracy" in parameters: try: float(parameters["capture-accuracy"]) new_dict["control"].update( @@ -1448,7 +1349,7 @@ def workbook_to_json( "Parameter capture-accuracy must have a numeric value" ) from ca_err - if "warning-accuracy" in parameters.keys(): + if "warning-accuracy" in parameters: try: float(parameters["warning-accuracy"]) new_dict["control"].update( @@ -1485,8 +1386,6 @@ def workbook_to_json( # print "Generating flattened instance..." add_flat_annotations(stack[0]["parent_children"]) - meta_children = [*survey_meta] - if aliases.yes_no.get(settings.get("omit_instanceID")): if settings.get("public_key"): raise PyXFormError("Cannot omit instanceID, it is required for encryption.") @@ -1513,7 +1412,7 @@ def workbook_to_json( } ) - if len(entity_declaration) > 0: + if entity_declaration: json_dict[constants.ENTITY_FEATURES] = ["create", "update", "offline"] meta_children.append(entity_declaration) @@ -1531,38 +1430,6 @@ def workbook_to_json( return json_dict -def parse_file_to_workbook_dict(path, file_object=None): - """ - Given a xls or csv workbook file use xls2json_backends to create - a python workbook_dict. - workbook_dicts are organized as follows: - {sheetname : [{column_header : column_value_in_array_indexed_row}]} - """ - - (filepath, filename) = os.path.split(path) - if not filename: - raise PyXFormError("No filename.") - (shortname, extension) = os.path.splitext(filename) - if not extension: - raise PyXFormError("No extension.") - - if extension in constants.XLS_EXTENSIONS: - return xls_to_dict(file_object if file_object is not None else path) - elif extension in constants.XLSX_EXTENSIONS: - return xlsx_to_dict(file_object if file_object is not None else path) - elif extension == ".csv": - return csv_to_dict(file_object if file_object is not None else path) - else: - raise PyXFormError("File was not recognized") - - -def get_filename(path): - """ - Get the extensionless filename from a path - """ - return os.path.splitext(os.path.basename(path))[0] - - def parse_file_to_json( path: str, default_name: str = constants.DEFAULT_FORM_NAME, @@ -1575,12 +1442,11 @@ def parse_file_to_json( """ if warnings is None: warnings = [] - workbook_dict = parse_file_to_workbook_dict(path, file_object) - fallback_form_name = str(get_filename(path)) + workbook_dict = get_xlsform(xlsform=coalesce(path, file_object)) return workbook_to_json( workbook_dict=workbook_dict, form_name=default_name, - fallback_form_name=fallback_form_name, + fallback_form_name=workbook_dict.fallback_form_name, default_language=default_language, warnings=warnings, ) @@ -1613,9 +1479,9 @@ def __init__(self, path_or_file): path = path.name except AttributeError: pass - self._dict = parse_file_to_workbook_dict(path) + self._dict = get_xlsform(xlsform=path) self._path = path - self._id = str(get_filename(path)) + self._id = str(os.path.splitext(os.path.basename(path))[0]) self._name = self._print_name = self._title = self._id def to_json_dict(self): @@ -1670,13 +1536,13 @@ def __init__(self, path): self._setup_question_types_dictionary() def _setup_question_types_dictionary(self): - use_double_colons = has_double_colon(self._dict) types_sheet = "question types" self._dict = self._dict[types_sheet] self._dict = dealias_and_group_headers( - dict_array=self._dict, + sheet_name=types_sheet, + sheet_data=self._dict, header_aliases={}, - use_double_colons=use_double_colons, + header_columns=set(), default_language=constants.DEFAULT_LANGUAGE_VALUE, ).data self._dict = organize_by_values(self._dict, "name") diff --git a/pyxform/xls2json_backends.py b/pyxform/xls2json_backends.py index 27652915..306d53e0 100644 --- a/pyxform/xls2json_backends.py +++ b/pyxform/xls2json_backends.py @@ -5,14 +5,13 @@ import csv import datetime import re -from collections.abc import Callable, Iterator +from collections.abc import Callable, Iterable, Sequence from dataclasses import dataclass from enum import Enum -from functools import reduce from io import BytesIO, IOBase, StringIO from os import PathLike from pathlib import Path -from typing import Any +from typing import Any, BinaryIO from zipfile import BadZipFile from openpyxl import open as pyxl_open @@ -35,6 +34,29 @@ "The xls file provided has an invalid date on the %s sheet, under" " the %s column on row number %s" ) +RE_WHITESPACE = re.compile(r"( )+") + + +@dataclass(slots=True) +class DefinitionData: + # XLSForm definition sheets. + # survey is optional to allow processing to proceed to warnings / spell checks. + survey: Sequence[dict[str, str]] | None = None + survey_header: Sequence[dict[str, Any]] | None = None + choices: Sequence[dict[str, str]] | None = None + choices_header: Sequence[dict[str, Any]] | None = None + settings: Sequence[dict[str, str]] | None = None + settings_header: Sequence[dict[str, Any]] | None = None + external_choices: Sequence[dict[str, str]] | None = None + external_choices_header: Sequence[dict[str, Any]] | None = None + entities: Sequence[dict[str, str]] | None = None + entities_header: Sequence[dict[str, Any]] | None = None + osm: Sequence[dict[str, str]] | None = None + osm_header: Sequence[dict[str, Any]] | None = None + + # Extra metadata. + sheet_names: Sequence[str] | None = None + fallback_form_name: str | None = None def _list_to_dict_list(list_items): @@ -43,7 +65,7 @@ def _list_to_dict_list(list_items): Returns a list of the created dict or an empty list """ if list_items: - return [{str(i): "" for i in list_items}] + return [{str(i): None for i in list_items}] return [] @@ -57,7 +79,7 @@ def trim_trailing_empty(a_list: list, n_empty: int) -> list: return a_list -def get_excel_column_headers(first_row: Iterator[str | None]) -> list[str | None]: +def get_excel_column_headers(first_row: Iterable[str | None]) -> list[str | None]: """Get column headers from the first row; stop if there's a run of empty columns.""" max_adjacent_empty_columns = 20 column_header_list = [] @@ -76,15 +98,15 @@ def get_excel_column_headers(first_row: Iterator[str | None]) -> list[str | None if column_header in column_header_list: raise PyXFormError(f"Duplicate column header: {column_header}") # Strip whitespaces from the header. - clean_header = re.sub(r"( )+", " ", column_header.strip()) + clean_header = RE_WHITESPACE.sub(" ", column_header.strip()) column_header_list.append(clean_header) return trim_trailing_empty(column_header_list, adjacent_empty_cols) def get_excel_rows( - headers: Iterator[str | None], - rows: Iterator[tuple[aCell, ...]], + headers: Iterable[str | None], + rows: Iterable[tuple[aCell, ...]], cell_func: Callable[[aCell, int, str], Any], ) -> list[dict[str, Any]]: """Get rows of cleaned data; stop if there's a run of empty rows.""" @@ -165,12 +187,13 @@ def clean_func(cell: xlrdCell, row_n: int, col_key: str) -> str | None: return rows, _list_to_dict_list(column_header_list) def process_workbook(wb: xlrdBook): - result_book = {} + result_book = {"sheet_names": []} for wb_sheet in wb.sheets(): - # Note that the sheet exists but do no further processing here. - result_book[wb_sheet.name] = [] + # Note original in sheet_names for spelling check. + result_book["sheet_names"].append(wb_sheet.name) + sheet_name = wb_sheet.name.lower() # Do not process sheets that have nothing to do with XLSForm. - if wb_sheet.name not in constants.SUPPORTED_SHEET_NAMES: + if sheet_name not in constants.SUPPORTED_SHEET_NAMES: if len(wb.sheets()) == 1: ( result_book[constants.SURVEY], @@ -180,8 +203,8 @@ def process_workbook(wb: xlrdBook): continue else: ( - result_book[wb_sheet.name], - result_book[f"{wb_sheet.name}_header"], + result_book[sheet_name], + result_book[f"{sheet_name}_header"], ) = xls_to_dict_normal_sheet(wb=wb, wb_sheet=wb_sheet) return result_book @@ -255,25 +278,25 @@ def xlsx_to_dict_normal_sheet(sheet: pyxlWorksheet): return rows, _list_to_dict_list(column_header_list) def process_workbook(wb: pyxlWorkbook): - result_book = {} + result_book = {"sheet_names": []} for sheetname in wb.sheetnames: - wb_sheet = wb[sheetname] - # Note that the sheet exists but do no further processing here. - result_book[sheetname] = [] + # Note original in sheet_names for spelling check. + result_book["sheet_names"].append(sheetname) + sheet_name = sheetname.lower() # Do not process sheets that have nothing to do with XLSForm. - if sheetname not in constants.SUPPORTED_SHEET_NAMES: + if sheet_name not in constants.SUPPORTED_SHEET_NAMES: if len(wb.sheetnames) == 1: ( result_book[constants.SURVEY], result_book[f"{constants.SURVEY}_header"], - ) = xlsx_to_dict_normal_sheet(wb_sheet) + ) = xlsx_to_dict_normal_sheet(wb[sheetname]) else: continue else: ( - result_book[sheetname], - result_book[f"{sheetname}_header"], - ) = xlsx_to_dict_normal_sheet(wb_sheet) + result_book[sheet_name], + result_book[f"{sheet_name}_header"], + ) = xlsx_to_dict_normal_sheet(wb[sheetname]) return result_book try: @@ -306,14 +329,18 @@ def xlsx_value_to_str(value) -> str: # ensure unicode and replace nbsp spaces with normal ones # to avoid this issue: # https://github.com/modilabs/pyxform/issues/83 - return str(value).replace(chr(160), " ") + value = str(value) + if chr(160) in value: + return value.replace(chr(160), " ") + else: + return value def is_empty(value): if value is None: return True elif isinstance(value, str): - if value.strip() == "": + if not value or value.isspace(): return True return False @@ -358,25 +385,26 @@ def first_column_as_sheet_name(row): elif len(row) == 1: return row[0], None else: - s_or_c = row[0] - content = row[1:] - if s_or_c == "": - s_or_c = None - # concatenate all the strings in content - if reduce(lambda x, y: x + y, content) == "": + sheet_name = row[0].strip() + content = [str(v).strip() for v in row[1:]] + if sheet_name == "": + sheet_name = None + if not any(c != "" for c in content): # content is a list of empty strings content = None - return s_or_c, content + return sheet_name, content def process_csv_data(rd): - _dict = {} + _dict = {"sheet_names": []} sheet_name = None current_headers = None for row in rd: - survey_or_choices, content = first_column_as_sheet_name(row) - if survey_or_choices is not None: - sheet_name = survey_or_choices - if sheet_name not in _dict: + maybe_sheet_name, content = first_column_as_sheet_name(row) + if maybe_sheet_name is not None: + sheet_name = maybe_sheet_name + if sheet_name and sheet_name not in _dict: + _dict["sheet_names"].append(sheet_name) + sheet_name = sheet_name.lower() _dict[str(sheet_name)] = [] current_headers = None if content is not None: @@ -384,13 +412,11 @@ def process_csv_data(rd): current_headers = content _dict[f"{sheet_name}_header"] = _list_to_dict_list(current_headers) else: - _d = {} - for key, val in zip(current_headers, content, strict=False): - if val != "": - # Slight modification so values are striped - # this is because csvs often spaces following commas - # (but the csv reader might already handle that.) - _d[str(key)] = str(val.strip()) + _d = { + k: v + for k, v in zip(current_headers, content, strict=False) + if v != "" + } _dict[sheet_name].append(_d) return _dict @@ -440,6 +466,8 @@ def convert_file_to_csv_string(path): foo = StringIO(newline="") writer = csv.writer(foo, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL) for sheet_name, rows in imported_sheets.items(): + if sheet_name == "sheet_names": + continue writer.writerow([sheet_name]) out_keys = [] out_rows = [] @@ -526,85 +554,71 @@ def xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name): MD_PIPE_OR_ESCAPE = re.compile(r"(? str: + if not cell or cell.isspace(): return None - val = val.replace(r"\|", "|") - return val - + return cell.strip().replace(r"\|", "|") -def _md_extract_array(mdtablerow): - match = re.match(MD_CELL, mdtablerow) - if match: - mtchstr = match.groups()[0] - if re.match(MD_SEPARATOR, mtchstr): - return False - else: - return [_md_strp_cell(c) for c in re.split(MD_PIPE_OR_ESCAPE, mtchstr)] - - return False - -def _md_is_null_row(r_arr): - for cell in r_arr: - if cell is not None: - return False - - return True - - -def _md_table_to_ss_structure(mdstr: str) -> list[tuple[str, list[list[str]]]]: - ss_arr = [] - for item in mdstr.split("\n"): - arr = _md_extract_array(item) - if arr: - ss_arr.append(arr) +def _md_table_to_ss_structure(mdstr: str) -> dict[str, list[tuple[str, ...]]]: sheet_name = False sheet_arr = False - sheets = [] - for row in ss_arr: - if row[0] is not None: - if sheet_arr: - sheets.append((sheet_name, sheet_arr)) - sheet_arr = [] - sheet_name = row[0] - excluding_first_col = row[1:] - if sheet_name and not _md_is_null_row(excluding_first_col): - sheet_arr.append(excluding_first_col) - sheets.append((sheet_name, sheet_arr)) + sheets = {} + for line in mdstr.split("\n"): + if re.match(MD_COMMENT, line): + # ignore lines which start with pound sign + continue + elif re.match(MD_COMMENT_INLINE, line): + # keep everything before the # outside of the last occurrence of | + line = re.match(MD_COMMENT_INLINE, line).groups()[0] + match = re.match(MD_CELL, line) + if match: + mtchstr = match.groups()[0] + if not re.match(MD_SEPARATOR, mtchstr): + row_split = re.split(MD_PIPE_OR_ESCAPE, mtchstr) + first_col = _md_strp_cell(row_split[0]) + row = tuple(_md_strp_cell(c) for c in row_split[1:]) + if first_col is None and row is None: + continue + if first_col is not None: + if sheet_arr: + sheets[sheet_name] = sheet_arr + sheet_arr = [] + sheet_name = first_col + if sheet_name and any(c is not None for c in row): + sheet_arr.append(row) + sheets[sheet_name] = sheet_arr return sheets def md_to_dict(md: str | BytesIO): - def _row_to_dict(row, headers): - out_dict = {} - for i in range(len(row)): - col = row[i] - if col not in [None, ""]: - out_dict[headers[i]] = col - return out_dict - def list_to_dicts(arr): - return [_row_to_dict(r, arr[0]) for r in arr[1:]] + return [ + {arr[0][i]: v for i, v in enumerate(row) if v not in {None, ""}} + for row in arr[1:] + ] def process_md_data(md_: str): - _md = [] - for line in md_.split("\n"): - if re.match(MD_COMMENT, line): - # ignore lines which start with pound sign - continue - elif re.match(MD_COMMENT_INLINE, line): - # keep everything before the # outside of the last occurrence of | - _md.append(re.match(MD_COMMENT_INLINE, line).groups()[0].strip()) + result_book = {"sheet_names": []} + ss_structure = _md_table_to_ss_structure(md_) + for sheet, contents in ss_structure.items(): + # Note original in sheet_names for spelling check. + result_book["sheet_names"].append(sheet) + sheet_name = sheet.lower() + # Do not process sheets that have nothing to do with XLSForm. + if sheet_name not in constants.SUPPORTED_SHEET_NAMES: + if len(ss_structure) == 1: + result_book[constants.SURVEY] = list_to_dicts(contents) + result_book[f"{constants.SURVEY}_header"] = _list_to_dict_list( + contents[0] + ) + else: + continue else: - _md.append(line.strip()) - md_ = "\n".join(_md) - sheets = {} - for sheet, contents in _md_table_to_ss_structure(md_): - sheets[sheet] = list_to_dicts(contents) - return sheets + result_book[sheet_name] = list_to_dicts(contents) + result_book[f"{sheet_name}_header"] = _list_to_dict_list(contents[0]) + return result_book try: md_data = get_definition_data(definition=md) @@ -622,7 +636,7 @@ def md_table_to_workbook(mdstr: str) -> pyxlWorkbook: """ md_data = _md_table_to_ss_structure(mdstr=mdstr) wb = pyxlWorkbook(write_only=True) - for key, rows in md_data: + for key, rows in md_data.items(): sheet = wb.create_sheet(title=key) for r in rows: sheet.append(r) @@ -685,13 +699,14 @@ class SupportedFileTypes(Enum): def get_processors(): return { SupportedFileTypes.xlsx: xlsx_to_dict, + SupportedFileTypes.xlsm: xlsx_to_dict, SupportedFileTypes.xls: xls_to_dict, SupportedFileTypes.md: md_to_dict, SupportedFileTypes.csv: csv_to_dict, } -@dataclass +@dataclass(slots=True) class Definition: data: BytesIO file_type: SupportedFileTypes | None @@ -701,7 +716,7 @@ class Definition: def definition_to_dict( definition: str | PathLike[str] | bytes | BytesIO | IOBase | Definition, file_type: str | None = None, -) -> dict: +) -> DefinitionData: """ Convert raw definition data to a dict ready for conversion to a XForm. @@ -724,7 +739,9 @@ def definition_to_dict( for func in processors.values(): try: - return func(definition) + return DefinitionData( + fallback_form_name=definition.file_path_stem, **func(definition) + ) except PyXFormReadError: # noqa: PERF203 continue @@ -790,3 +807,17 @@ def get_definition_data( file_type=file_type, file_path_stem=file_path_stem, ) + + +def get_xlsform( + xlsform: str | PathLike[str] | bytes | BytesIO | BinaryIO | dict, + file_type: str | None = None, +) -> DefinitionData: + if isinstance(xlsform, dict): + workbook_dict = DefinitionData(**xlsform) + else: + definition = get_definition_data(definition=xlsform) + if file_type is None: + file_type = definition.file_type + workbook_dict = definition_to_dict(definition=definition, file_type=file_type) + return workbook_dict diff --git a/pyxform/xls2xform.py b/pyxform/xls2xform.py index f9af81a3..a768764a 100644 --- a/pyxform/xls2xform.py +++ b/pyxform/xls2xform.py @@ -11,15 +11,17 @@ from os import PathLike from os.path import splitext from pathlib import Path -from typing import TYPE_CHECKING, BinaryIO +from typing import TYPE_CHECKING, BinaryIO, Optional -from pyxform import builder, xls2json -from pyxform.utils import coalesce, external_choices_to_csv, has_external_choices -from pyxform.validators.odk_validate import ODKValidateError -from pyxform.xls2json_backends import ( - definition_to_dict, - get_definition_data, +from pyxform.builder import create_survey_element_from_dict +from pyxform.utils import ( + coalesce, + external_choices_to_csv, + has_external_choices, ) +from pyxform.validators.odk_validate import ODKValidateError +from pyxform.xls2json import workbook_to_json +from pyxform.xls2json_backends import get_xlsform if TYPE_CHECKING: from pyxform.survey import Survey @@ -39,7 +41,7 @@ def get_xml_path(path): return splitext(path)[0] + ".xml" -@dataclass +@dataclass(slots=True) class ConvertResult: """ Result data from the XLSForm to XForm conversion. @@ -54,8 +56,8 @@ class ConvertResult: xform: str warnings: list[str] itemsets: str | None - _pyxform: dict - _survey: "Survey" + _pyxform: dict | None + _survey: Optional["Survey"] def convert( @@ -93,32 +95,26 @@ def convert( xlsform is provided as a dict, then it is used directly and this argument is ignored. """ warnings = coalesce(warnings, []) - if isinstance(xlsform, dict): - workbook_dict = xlsform - fallback_form_name = None - else: - definition = get_definition_data(definition=xlsform) - if file_type is None: - file_type = definition.file_type - workbook_dict = definition_to_dict(definition=definition, file_type=file_type) - fallback_form_name = definition.file_path_stem - pyxform_data = xls2json.workbook_to_json( + workbook_dict = get_xlsform(xlsform=xlsform, file_type=file_type) + pyxform_data = workbook_to_json( workbook_dict=workbook_dict, form_name=form_name, - fallback_form_name=fallback_form_name, + fallback_form_name=workbook_dict.fallback_form_name, default_language=default_language, warnings=warnings, ) - survey = builder.create_survey_element_from_dict(pyxform_data) + itemsets = None + if has_external_choices(json_struct=pyxform_data): + itemsets = external_choices_to_csv(workbook_dict=workbook_dict) + del workbook_dict + + survey = create_survey_element_from_dict(pyxform_data) xform = survey.to_xml( validate=validate, pretty_print=pretty_print, warnings=warnings, enketo=enketo, ) - itemsets = None - if has_external_choices(json_struct=pyxform_data): - itemsets = external_choices_to_csv(workbook_dict=workbook_dict) return ConvertResult( xform=xform, warnings=warnings, diff --git a/tests/example_xls/case_insensitivity.csv b/tests/example_xls/case_insensitivity.csv new file mode 100644 index 00000000..0d267179 --- /dev/null +++ b/tests/example_xls/case_insensitivity.csv @@ -0,0 +1,23 @@ +SURVEY , + , TYPE , NAME , LABEL::EN , CHOICE_FILTER + , select_one c1 , q1 , Are you good? , + , select_one_external c1 , q2 , Where are you? , YES_NO=${q1} + , osm c1 , q3 , Where exactly? , +CHOICES , + , LIST_NAME , NAME , LABEL::EN + , c1 , n1-c , l1-c + , c1 , n2-c , l2-c +SETTINGS , + , FORM_TITLE , FORM_ID , DEFAULT_LANGUAGE + , Yes or no , YesNo , EN +EXTERNAL_CHOICES , + , LIST_NAME , NAME , LABEL , YES_NO + , c1 , n1-e , l1-e , yes + , c1 , n2-e , l2-e , yes +ENTITIES , + , DATASET , LABEL + , e1 , l1 +OSM , + , LIST_NAME , NAME , LABEL + , c1 , n1-o , l1-o + , c1 , n2-o , l2-o diff --git a/tests/example_xls/case_insensitivity.md b/tests/example_xls/case_insensitivity.md new file mode 100644 index 00000000..b4751f6b --- /dev/null +++ b/tests/example_xls/case_insensitivity.md @@ -0,0 +1,23 @@ +| SURVEY | +| | TYPE | NAME | LABEL::EN | CHOICE_FILTER | +| | select_one c1 | q1 | Are you good? | | +| | select_one_external c1 | q2 | Where are you? | YES_NO=${q1} | +| | osm c1 | q3 | Where exactly? | | +| CHOICES | +| | LIST_NAME | NAME | LABEL::EN | +| | c1 | n1-c | l1-c | +| | c1 | n2-c | l2-c | +| SETTINGS | +| | FORM_TITLE | FORM_ID | DEFAULT_LANGUAGE | +| | Yes or no | YesNo | EN | +| EXTERNAL_CHOICES | +| | LIST_NAME | NAME | LABEL | YES_NO | +| | c1 | n1-e | l1-e | yes | +| | c1 | n2-e | l2-e | yes | +| ENTITIES | +| | DATASET | LABEL | +| | e1 | l1 | +| OSM | +| | LIST_NAME | NAME | LABEL | +| | c1 | n1-o | l1-o | +| | c1 | n2-o | l2-o | diff --git a/tests/example_xls/case_insensitivity.xls b/tests/example_xls/case_insensitivity.xls new file mode 100644 index 00000000..853b759d Binary files /dev/null and b/tests/example_xls/case_insensitivity.xls differ diff --git a/tests/example_xls/case_insensitivity.xlsx b/tests/example_xls/case_insensitivity.xlsx new file mode 100644 index 00000000..852e8ba2 Binary files /dev/null and b/tests/example_xls/case_insensitivity.xlsx differ diff --git a/tests/example_xls/include.md b/tests/example_xls/include.md new file mode 100644 index 00000000..b83d34d7 --- /dev/null +++ b/tests/example_xls/include.md @@ -0,0 +1,6 @@ +| survey | +| | type | name | label:English | +| | text | name | What's your name? | +| | include | yes_or_no_question | Yes or no question section | +| choices | +| | list name | name | label:english | diff --git a/tests/example_xls/include_json.md b/tests/example_xls/include_json.md new file mode 100644 index 00000000..5adc5396 --- /dev/null +++ b/tests/example_xls/include_json.md @@ -0,0 +1,5 @@ +| survey | +| | type | name | label:English | +| | include | how_old_are_you | | +| choices | +| | list name | name | label:english | diff --git a/tests/example_xls/loop.md b/tests/example_xls/loop.md new file mode 100644 index 00000000..0a6ad124 --- /dev/null +++ b/tests/example_xls/loop.md @@ -0,0 +1,11 @@ +| survey | +| | type | name | label:english | +| | select all that apply from toilet_type or specify other | available_toilet_types | What type of toilets are on the premises? | +| | begin loop over toilet_type | loop_toilet_types | +| | integer | number | How many %(label)s are on the premises? | +| | end loop | +| choices | +| | list name | name | label:english | +| | toilet_type | pit_latrine_with_slab | Pit latrine with slab | +| | toilet_type | open_pit_latrine | Pit latrine without slab/open pit | +| | toilet_type | bucket_system | Bucket system | diff --git a/tests/example_xls/specify_other.md b/tests/example_xls/specify_other.md new file mode 100644 index 00000000..0d888ff9 --- /dev/null +++ b/tests/example_xls/specify_other.md @@ -0,0 +1,7 @@ +| survey | +| | type | name | label:English | +| | select one from sexes or specify other | sex | What sex are you? | +| choices | +| | list name | name | label:English | +| | sexes | male | Male | +| | sexes | female | Female | diff --git a/tests/example_xls/text_and_integer.md b/tests/example_xls/text_and_integer.md new file mode 100644 index 00000000..4f9e33b5 --- /dev/null +++ b/tests/example_xls/text_and_integer.md @@ -0,0 +1,6 @@ +| survey | +| | type | name | label:english | +| | text | your_name | What is your name? | +| | integer | your_age | How many years old are you? | +| choices | +| | list name | name | label:english | diff --git a/tests/example_xls/text_and_integer.xlsx b/tests/example_xls/text_and_integer.xlsx index b26558d3..2611c70d 100644 Binary files a/tests/example_xls/text_and_integer.xlsx and b/tests/example_xls/text_and_integer.xlsx differ diff --git a/tests/example_xls/yes_or_no_question.md b/tests/example_xls/yes_or_no_question.md new file mode 100644 index 00000000..07f3be72 --- /dev/null +++ b/tests/example_xls/yes_or_no_question.md @@ -0,0 +1,7 @@ +| survey | +| | type | name | label:english | +| | select one from yes_or_no | good_day | have you had a good day today? | +| choices | +| | list name | name | label:english | +| | yes_or_no | yes | yes | +| | yes_or_no | no | no | diff --git a/tests/fixtures/strings.ini b/tests/fixtures/strings.ini index 90dfedd3..5ca67d8a 100644 --- a/tests/fixtures/strings.ini +++ b/tests/fixtures/strings.ini @@ -5,14 +5,10 @@ test_answers_can_be_imported_from_xml =