diff --git a/cms/djangoapps/contentstore/views/tests/test_import_export.py b/cms/djangoapps/contentstore/views/tests/test_import_export.py index 8cbd11c40cb0..51c58162e8c7 100644 --- a/cms/djangoapps/contentstore/views/tests/test_import_export.py +++ b/cms/djangoapps/contentstore/views/tests/test_import_export.py @@ -1256,7 +1256,25 @@ def assert_problem_definition(self, course_location, expected_problem_content): '\n
\n    x=10 print("hello \n")\n  
\n ' '\n
\n' - ] + ], + [ + '' + '' + '' + '

' + '""' + 'Text' + '

' + '
', + + '\n ' + '\n ' + '

' + '""' + 'Text' + '

\n' + '
\n' + ], ) @ddt.unpack def test_problem_content_on_course_export_import(self, problem_data, expected_problem_content): diff --git a/common/lib/xmodule/xmodule/library_content_module.py b/common/lib/xmodule/xmodule/library_content_module.py index d81cb9804842..d3bce47bb8af 100644 --- a/common/lib/xmodule/xmodule/library_content_module.py +++ b/common/lib/xmodule/xmodule/library_content_module.py @@ -12,6 +12,7 @@ import bleach from lazy import lazy from lxml import etree +from lxml.etree import XMLSyntaxError from opaque_keys.edx.locator import LibraryLocator from pkg_resources import resource_string from web_fragments.fragment import Fragment @@ -666,10 +667,20 @@ def get_content_titles(self): @classmethod def definition_from_xml(cls, xml_object, system): - children = [ - system.process_xml(etree.tostring(child)).scope_ids.usage_id - for child in xml_object.getchildren() - ] + children = [] + + for child in xml_object.getchildren(): + try: + children.append(system.process_xml(etree.tostring(child)).scope_ids.usage_id) + except (XMLSyntaxError, AttributeError): + msg = ( + "Unable to load child when parsing Library Content Block. " + "This can happen when a comment is manually added to the course export." + ) + logger.error(msg) + if system.error_tracker is not None: + system.error_tracker(msg) + definition = { attr_name: json.loads(attr_value) for attr_name, attr_value in xml_object.attrib.items() diff --git a/common/lib/xmodule/xmodule/modulestore/xml.py b/common/lib/xmodule/xmodule/modulestore/xml.py index d87bee193247..c84c7be722ee 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml.py +++ b/common/lib/xmodule/xmodule/modulestore/xml.py @@ -39,8 +39,7 @@ from .exceptions import ItemNotFoundError from .inheritance import InheritanceKeyValueStore, compute_inherited_metadata, inheriting_field_data -edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, - remove_comments=True, remove_blank_text=True) +edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True) etree.set_default_parser(edx_xml_parser) diff --git a/common/lib/xmodule/xmodule/tests/test_library_content.py b/common/lib/xmodule/xmodule/tests/test_library_content.py index 8a287acf88f2..628f471e88eb 100644 --- a/common/lib/xmodule/xmodule/tests/test_library_content.py +++ b/common/lib/xmodule/xmodule/tests/test_library_content.py @@ -74,18 +74,14 @@ class TestLibraryContentExportImport(LibraryContentTest): """ Export and import tests for LibraryContentBlock """ + def setUp(self): + super().setUp() - maxDiff = None - - def test_xml_export_import_cycle(self): - """ - Test the export-import cycle. - """ # Children will only set after calling this. self.lc_block.refresh_children() - lc_block = self.store.get_item(self.lc_block.location) + self.lc_block = self.store.get_item(self.lc_block.location) - expected_olx = ( + self.expected_olx = ( '\n' ' \n' @@ -94,44 +90,78 @@ def test_xml_export_import_cycle(self): ' \n' '\n' ).format( - block=lc_block, + block=self.lc_block, ) - export_fs = MemoryFS() # Set the virtual FS to export the olx to. - lc_block.runtime._descriptor_system.export_fs = export_fs # pylint: disable=protected-access + self.export_fs = MemoryFS() + self.lc_block.runtime._descriptor_system.export_fs = self.export_fs # pylint: disable=protected-access + + # Prepare runtime for the import. + self.runtime = TestImportSystem(load_error_modules=True, course_id=self.lc_block.location.course_key) + self.runtime.resources_fs = self.export_fs + self.id_generator = Mock() # Export the olx. node = etree.Element("unknown_root") - lc_block.add_xml_to_node(node) + self.lc_block.add_xml_to_node(node) + + def _verify_xblock_properties(self, imported_lc_block): + """ + Check the new XBlock has the same properties as the old one. + """ + assert imported_lc_block.display_name == self.lc_block.display_name + assert imported_lc_block.source_library_id == self.lc_block.source_library_id + assert imported_lc_block.source_library_version == self.lc_block.source_library_version + assert imported_lc_block.mode == self.lc_block.mode + assert imported_lc_block.max_count == self.lc_block.max_count + assert imported_lc_block.capa_type == self.lc_block.capa_type + assert len(imported_lc_block.children) == len(self.lc_block.children) + assert imported_lc_block.children == self.lc_block.children - # Read it back - with export_fs.open('{dir}/{file_name}.xml'.format( - dir=lc_block.scope_ids.usage_id.block_type, - file_name=lc_block.scope_ids.usage_id.block_id + def test_xml_export_import_cycle(self): + """ + Test the export-import cycle. + """ + # Read back the olx. + with self.export_fs.open('{dir}/{file_name}.xml'.format( + dir=self.lc_block.scope_ids.usage_id.block_type, + file_name=self.lc_block.scope_ids.usage_id.block_id )) as f: exported_olx = f.read() # And compare. - assert exported_olx == expected_olx - - runtime = TestImportSystem(load_error_modules=True, course_id=lc_block.location.course_key) - runtime.resources_fs = export_fs + assert exported_olx == self.expected_olx # Now import it. olx_element = etree.fromstring(exported_olx) - id_generator = Mock() - imported_lc_block = LibraryContentBlock.parse_xml(olx_element, runtime, None, id_generator) - - # Check the new XBlock has the same properties as the old one. - assert imported_lc_block.display_name == lc_block.display_name - assert imported_lc_block.source_library_id == lc_block.source_library_id - assert imported_lc_block.source_library_version == lc_block.source_library_version - assert imported_lc_block.mode == lc_block.mode - assert imported_lc_block.max_count == lc_block.max_count - assert imported_lc_block.capa_type == lc_block.capa_type - assert len(imported_lc_block.children) == 4 - assert imported_lc_block.children == lc_block.children + imported_lc_block = LibraryContentBlock.parse_xml(olx_element, self.runtime, None, self.id_generator) + + self._verify_xblock_properties(imported_lc_block) + + def test_xml_import_with_comments(self): + """ + Test that XML comments within LibraryContentBlock are ignored during the import. + """ + olx_with_comments = ( + '\n' + '\n' + '\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ).format( + block=self.lc_block, + ) + + # Import the olx. + olx_element = etree.fromstring(olx_with_comments) + imported_lc_block = LibraryContentBlock.parse_xml(olx_element, self.runtime, None, self.id_generator) + + self._verify_xblock_properties(imported_lc_block) class LibraryContentBlockTestMixin: diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index ef6f01b8fe6f..1bfcf03d03c4 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -18,9 +18,7 @@ log = logging.getLogger(__name__) # assume all XML files are persisted as utf-8. -EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False, - remove_comments=True, remove_blank_text=True, - encoding='utf-8') +EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True, encoding='utf-8') def name_to_pathname(name):