diff --git a/doc/release-notes/10837-exclude-others-ns-harvesting-oai-dc.md b/doc/release-notes/10837-exclude-others-ns-harvesting-oai-dc.md new file mode 100644 index 00000000000..c1826bfaed5 --- /dev/null +++ b/doc/release-notes/10837-exclude-others-ns-harvesting-oai-dc.md @@ -0,0 +1,3 @@ +Some repository extend the "oai_dc" metadata prefix with specific namespaces. In this case, harvesting of these datasets is not possible, as an XML parsing error is raised. + +The PR [#10837](https://github.com/IQSS/dataverse/pull/10837) allows the harvesting of these datasets by excluding tags with namespaces that are not "dc:", and harvest only metadata with the "dc" namespace. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index d32a548c8bf..41a57665010 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -205,8 +205,17 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException private void processXMLElement(XMLStreamReader xmlr, String currentPath, String openingTag, ForeignMetadataFormatMapping foreignFormatMapping, DatasetDTO datasetDTO) throws XMLStreamException { logger.fine("entering processXMLElement; ("+currentPath+")"); - - for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + + while (xmlr.hasNext()) { + + int event; + try { + event = xmlr.next(); + } catch (XMLStreamException ex) { + logger.warning("Error occurred in the XML parsing : " + ex.getMessage()); + continue; // Skip Undeclared namespace prefix and Unexpected close tag related to com.ctc.wstx.exc.WstxParsingException + } + if (event == XMLStreamConstants.START_ELEMENT) { String currentElement = xmlr.getLocalName();