Skip to content

Commit

Permalink
OOXML strict format (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjfanning authored Feb 13, 2021
1 parent 759c5df commit ca5431e
Show file tree
Hide file tree
Showing 12 changed files with 656 additions and 37 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.github.pjfanning</groupId>
<artifactId>excel-streaming-reader</artifactId>
<version>3.0.1</version>
<version>3.0.2-SNAPSHOT</version>
<name>Streaming Excel reader</name>
<description>Streaming Excel reader</description>
<url>https://github.com/pjfanning/excel-streaming-reader</url>
Expand Down
4 changes: 0 additions & 4 deletions src/main/java/com/github/pjfanning/xlsx/StreamingReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import com.github.pjfanning.xlsx.impl.*;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.util.Beta;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
Expand All @@ -17,8 +15,6 @@
* row.
*/
public class StreamingReader implements AutoCloseable {
private static final Logger log = LoggerFactory.getLogger(StreamingReader.class);

private final StreamingWorkbookReader workbook;

public StreamingReader(StreamingWorkbookReader workbook) {
Expand Down
32 changes: 22 additions & 10 deletions src/main/java/com/github/pjfanning/xlsx/XmlUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.*;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;

public class XmlUtils {

private static NamespaceContext transitionalFormatNamespaceContext =
new NamespaceContextImpl("ss", "http://schemas.openxmlformats.org/spreadsheetml/2006/main");
private static NamespaceContextImpl strictFormatNamespaceContext =
new NamespaceContextImpl("ss", "http://purl.oclc.org/ooxml/spreadsheetml/main");

public static final String FALSE_AS_STRING = "0";
public static final String TRUE_AS_STRING = "1";

Expand All @@ -29,11 +31,13 @@ public static Document readDocument(InputStream inp) throws IOException, SAXExce
public static NodeList searchForNodeList(Document document, String xpath) {
try {
XPath xp = XPathFactory.newInstance().newXPath();
NamespaceContextImpl nc = new NamespaceContextImpl();
nc.addNamespace("ss", "http://schemas.openxmlformats.org/spreadsheetml/2006/main");
xp.setNamespaceContext(nc);
return (NodeList)xp.compile(xpath)
.evaluate(document, XPathConstants.NODESET);
xp.setNamespaceContext(transitionalFormatNamespaceContext);
NodeList nl = (NodeList)xp.compile(xpath).evaluate(document, XPathConstants.NODESET);
if (nl.getLength() == 0) {
xp.setNamespaceContext(strictFormatNamespaceContext);
nl = (NodeList)xp.compile(xpath).evaluate(document, XPathConstants.NODESET);
}
return nl;
} catch(XPathExpressionException e) {
throw new ParseException(e);
}
Expand All @@ -53,7 +57,12 @@ public NamespaceContextImpl() {
addNamespace(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
}

public synchronized void addNamespace(String prefix, String namespaceURI) {
public NamespaceContextImpl(String prefix, String uri) {
this();
addNamespace(prefix, uri);
}

private void addNamespace(String prefix, String namespaceURI) {
urisByPrefix.put(prefix, namespaceURI);
if (prefixesByURI.containsKey(namespaceURI)) {
(prefixesByURI.get(namespaceURI)).add(prefix);
Expand All @@ -64,6 +73,7 @@ public synchronized void addNamespace(String prefix, String namespaceURI) {
}
}

@Override
public String getNamespaceURI(String prefix) {
if (prefix == null)
throw new IllegalArgumentException("prefix cannot be null");
Expand All @@ -73,10 +83,12 @@ public String getNamespaceURI(String prefix) {
return XMLConstants.NULL_NS_URI;
}

@Override
public String getPrefix(String namespaceURI) {
return (String) getPrefixes(namespaceURI).next();
}

@Override
public Iterator getPrefixes(String namespaceURI) {
if (namespaceURI == null)
throw new IllegalArgumentException("namespaceURI cannot be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,17 +161,19 @@ && isSpreadsheetTag(event.asStartElement().getName())) {
currentCell.setType("n");
}

Attribute style = startElement.getAttributeByName(new QName("s"));
if (style != null) {
String indexStr = style.getValue();
try {
int index = Integer.parseInt(indexStr);
currentCell.setCellStyle(stylesTable.getStyleAt(index));
} catch (NumberFormatException nfe) {
log.warn("Ignoring invalid style index {}", indexStr);
if (stylesTable != null) {
Attribute style = startElement.getAttributeByName(new QName("s"));
if (style != null) {
String indexStr = style.getValue();
try {
int index = Integer.parseInt(indexStr);
currentCell.setCellStyle(stylesTable.getStyleAt(index));
} catch (NumberFormatException nfe) {
log.warn("Ignoring invalid style index {}", indexStr);
}
} else {
currentCell.setCellStyle(stylesTable.getStyleAt(0));
}
} else {
currentCell.setCellStyle(stylesTable.getStyleAt(0));
}
} else if("v".equals(tagLocalName) || "t".equals(tagLocalName)) {
insideCharElement = true;
Expand Down Expand Up @@ -304,10 +306,12 @@ void setFormatString(StartElement startElement, StreamingCell cell) {
String cellStyleString = (cellStyle != null) ? cellStyle.getValue() : null;
XSSFCellStyle style = null;

if(cellStyleString != null) {
style = stylesTable.getStyleAt(Integer.parseInt(cellStyleString));
} else if(stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
if (stylesTable != null) {
if(cellStyleString != null) {
style = stylesTable.getStyleAt(Integer.parseInt(cellStyleString));
} else if(stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
}
}

if(style != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import com.github.pjfanning.xlsx.exceptions.ParseException;
import com.github.pjfanning.xlsx.exceptions.ReadException;
import com.github.pjfanning.xlsx.impl.ooxml.OoXmlStrictConverterInputStream;
import com.github.pjfanning.xlsx.impl.ooxml.OoxmlStrictHelper;
import com.github.pjfanning.xlsx.impl.ooxml.XSSFReader;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
Expand All @@ -17,10 +19,9 @@
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFReader.SheetIterator;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.model.ThemesTable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
Expand Down Expand Up @@ -163,8 +164,15 @@ private void loadPackage(OPCPackage pkg) throws IOException, OpenXML4JException,
try {
styles = reader.getStylesTable();
} catch (Exception e) {
log.warn("Failed to read styles table {}", e.toString());
styles = null;
try {
ThemesTable themesTable = OoxmlStrictHelper.getThemesTable(pkg);
StylesTable stylesTable = OoxmlStrictHelper.getStylesTable(pkg);
stylesTable.setTheme(themesTable);
styles = stylesTable;
} catch (Exception e2) {
log.warn("Failed to read styles table {}", e.toString());
styles = null;
}
}

use1904Dates = WorkbookUtil.use1904Dates(reader);
Expand All @@ -184,7 +192,7 @@ void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTab
//Some workbooks have multiple references to the same sheet. Need to filter
//them out before creating the XMLEventReader by keeping track of their URIs.
//The sheets are listed in order, so we must keep track of insertion order.
SheetIterator iter = (SheetIterator) reader.getSheetsData();
XSSFReader.SheetIterator iter = reader.getSheetsData();
Map<URI, InputStream> sheetStreams = new LinkedHashMap<>();
while(iter.hasNext()) {
InputStream is = iter.next();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.github.pjfanning.xlsx.impl;

import com.github.pjfanning.xlsx.impl.ooxml.XSSFReader;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package com.github.pjfanning.xlsx.impl.ooxml;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart;
import org.apache.poi.util.TempFile;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.model.ThemesTable;
import org.apache.poi.xssf.usermodel.XSSFRelation;

import javax.xml.stream.XMLStreamException;
import java.io.*;
import java.util.List;

public class OoxmlStrictHelper {
public static ThemesTable getThemesTable(OPCPackage pkg) throws IOException, XMLStreamException, InvalidFormatException {
List<PackagePart> parts = pkg.getPartsByContentType(XSSFRelation.THEME.getContentType());
if (parts.isEmpty()) {
return null;
} else {
PackagePart part = parts.get(0);
File tempFile = TempFile.createTempFile("ooxml-strict-themes", ".xml");
try {
try(
InputStream is = part.getInputStream();
OutputStream os = new FileOutputStream(tempFile);
OoXmlStrictConverter converter = new OoXmlStrictConverter(is, os)
) {
while (converter.convertNextElement()) {
//continue
}
}
MemoryPackagePart newPart = new MemoryPackagePart(pkg, part.getPartName(), part.getContentType());
try(InputStream is = new FileInputStream(tempFile)) {
newPart.load(is);
}
return new ThemesTable(newPart);
} finally {
tempFile.delete();
}
}
}

public static StylesTable getStylesTable(OPCPackage pkg) throws IOException, XMLStreamException, InvalidFormatException {
List<PackagePart> parts = pkg.getPartsByContentType(XSSFRelation.STYLES.getContentType());
if (parts.isEmpty()) {
return null;
} else {
PackagePart part = parts.get(0);
File tempFile = TempFile.createTempFile("ooxml-strict-styles", ".xml");
try {
try(
InputStream is = part.getInputStream();
OutputStream os = new FileOutputStream(tempFile);
OoXmlStrictConverter converter = new OoXmlStrictConverter(is, os)
) {
while (converter.convertNextElement()) {
//continue
}
}
MemoryPackagePart newPart = new MemoryPackagePart(pkg, part.getPartName(), part.getContentType());
try(InputStream is = new FileInputStream(tempFile)) {
newPart.load(is);
}
return new StylesTable(newPart);
} finally {
tempFile.delete();
}
}
}
}
Loading

0 comments on commit ca5431e

Please sign in to comment.