Skip to content

Commit

Permalink
#326 - Setup PDF/A conformance testing module using VeraPDF.
Browse files Browse the repository at this point in the history
  • Loading branch information
danfickle committed Feb 13, 2019
1 parent 5c939ad commit 13b4e3b
Show file tree
Hide file tree
Showing 14 changed files with 1,092 additions and 6 deletions.
1 change: 0 additions & 1 deletion openhtmltopdf-examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
<configuration>
<skip>true</skip>
</configuration>
Expand Down
674 changes: 674 additions & 0 deletions openhtmltopdf-pdfa-testing/LICENSE-GPL-3.txt

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions openhtmltopdf-pdfa-testing/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# PDF/A Testing Module
This module is for the automatic testing of PDF/A documents using [VeraPDF](http://docs.verapdf.org/develop/). It is not deployed to Maven Central.

## License
Unlike the rest of the project, this module is distributed under the GPL3, due to the license of VeraPDF.
85 changes: 85 additions & 0 deletions openhtmltopdf-pdfa-testing/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>com.openhtmltopdf</groupId>
<artifactId>openhtmltopdf-parent</artifactId>
<version>0.0.1-RC18-SNAPSHOT</version>
</parent>

<artifactId>openhtmltopdf-pdfa-testing</artifactId>

<packaging>jar</packaging>

<name>Openhtmltopdf PDF/A Testing</name>
<description>Code to test against the PDF/A standard. It is not deployed with a release and unlike the rest of the project, is licensed under the GPL.</description>

<licenses>
<license>
<name>GNU General public license GPLv3+</name>
<url>https://www.gnu.org/licenses/gpl-3.0.en.html</url>
</license>
</licenses>

<dependencies>
<dependency>
<groupId>com.openhtmltopdf</groupId>
<artifactId>openhtmltopdf-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.openhtmltopdf</groupId>
<artifactId>openhtmltopdf-pdfbox</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<!-- NOTE: VeraPDF is licensed under the GPL or MPL. -->
<groupId>org.verapdf</groupId>
<artifactId>validation-model</artifactId>
<version>1.12.1</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<use>false</use>
</configuration>
</plugin>
</plugins>

<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
<resource>
<directory>../</directory>
<targetPath>${project.build.outputDirectory}/META-INF</targetPath>
<includes>
<include>LICENSE*</include>
</includes>
</resource>
</resources>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package com.openhtmltopdf.pdfa.testing;

import static org.junit.Assert.assertTrue;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.verapdf.pdfa.Foundries;
import org.verapdf.pdfa.PDFAParser;
import org.verapdf.pdfa.PDFAValidator;
import org.verapdf.pdfa.VeraGreenfieldFoundryProvider;
import org.verapdf.pdfa.flavours.PDFAFlavour;
import org.verapdf.pdfa.results.TestAssertion.Status;
import org.verapdf.pdfa.results.TestAssertion;
import org.verapdf.pdfa.results.ValidationResult;

import com.openhtmltopdf.pdfboxout.PdfBoxRenderer;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder.PdfAConformance;

public class PdfATester {
@BeforeClass
public static void initialize() {
VeraGreenfieldFoundryProvider.initialise();
}

public static <T> Predicate<T> distinctByKey(Function<? super T, ?> keyExtractor) {
Set<Object> seen = ConcurrentHashMap.newKeySet();
return t -> seen.add(keyExtractor.apply(t));
}

public boolean run(String resource, PDFAFlavour flavour, PdfAConformance conform) throws Exception {
byte[] htmlBytes = null;
try (InputStream is = PdfATester.class.getResourceAsStream("/html/" + resource + ".html")) {
htmlBytes = IOUtils.toByteArray(is);
}
String html = new String(htmlBytes, StandardCharsets.UTF_8);

Files.createDirectories(Paths.get("target/test/artefacts/"));
if (!Files.exists(Paths.get("target/test/artefacts/Karla-Bold.ttf"))) {
try (InputStream in = PdfATester.class.getResourceAsStream("/fonts/Karla-Bold.ttf")) {
Files.copy(in, Paths.get("target/test/artefacts/Karla-Bold.ttf"));
}
}

byte[] pdfBytes;

try (PDDocument doc = new PDDocument()) {
PdfRendererBuilder builder = new PdfRendererBuilder();
builder.usePDDocument(doc);
builder.useFastMode();
//builder.testMode(true);
builder.usePdfAConformance(conform);
builder.useFont(new File("target/test/artefacts/Karla-Bold.ttf"), "TestFont");
builder.withHtmlContent(html, PdfATester.class.getResource("/html/").toString());

try (PdfBoxRenderer renderer = builder.buildPdfRenderer()) {
renderer.createPDFWithoutClosing();
}

try (InputStream colorProfile = PdfATester.class.getResourceAsStream("/colorspaces/sRGB.icc")) {
PDOutputIntent oi = new PDOutputIntent(doc, colorProfile);
oi.setInfo("sRGB IEC61966-2.1");
oi.setOutputCondition("sRGB IEC61966-2.1");
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1");
oi.setRegistryName("http://www.color.org");
doc.getDocumentCatalog().addOutputIntent(oi);
}

ByteArrayOutputStream baos = new ByteArrayOutputStream();
doc.save(baos);
pdfBytes = baos.toByteArray();
}

Files.createDirectories(Paths.get("target/test/pdf/"));
Files.write(Paths.get("target/test/pdf/" + resource + "--" + flavour + ".pdf"), pdfBytes);

PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, true);
try (InputStream is = new ByteArrayInputStream(pdfBytes);
PDFAParser parser = Foundries.defaultInstance().createParser(is, flavour)) {

ValidationResult result = validator.validate(parser);

List<TestAssertion> asserts = result.getTestAssertions().stream()
.filter(ta -> ta.getStatus() == Status.FAILED)
.filter(distinctByKey(TestAssertion::getRuleId))
.collect(Collectors.toList());

String errs = asserts.stream()
.map(ta -> String.format("%s\n %s", ta.getMessage().replaceAll("\\s+", " "), ta.getLocation().getContext()))
.collect(Collectors.joining("\n ", "[\n ", "\n]"));

System.err.format("\nDISTINCT ERRORS(%s--%s) (%d): %s\n", resource, flavour, asserts.size(), errs);

return asserts.isEmpty() && result.isCompliant();
}
}

@Ignore // Failing, multiple. See issue number 326.
@Test
public void testAllInOnePdfA1b() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_1_B, PdfAConformance.PDFA_1_B));
}

@Ignore
@Test
public void testAllInOnePdfA1a() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_1_A, PdfAConformance.PDFA_1_A));
}

@Ignore
@Test
public void testAllInOnePdfA2b() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_B, PdfAConformance.PDFA_2_B));
}

@Ignore
@Test
public void testAllInOnePdfA2a() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_A, PdfAConformance.PDFA_2_A));
}

@Ignore
@Test
public void testAllInOnePdfA2u() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_U, PdfAConformance.PDFA_2_U));
}

}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
The profiles in the base directory are provided according to different licenses.


Group A
sRGB, LCMSLAB.ICM, LCMSXYZ.ICM, the compatibleWithAdobeRGB.icc and the
Gray.icc, CineonLog_M*.icc, CineLogCurve.icc profiles are all zlib licensed.
Even though it is highly recommended to rename them before editing.


Group B
The eciRGB*.icc profiles come with their license in license.rtf.


Group C
PhotoGamutRGB_avg6c.icc is licensed to be distributed freely. Modifications
are not allowed.



Additionally all profiles come with the following disclaimer. The provided
ICC Profiles in the package are called DATA in the folling statement.


NO WARRANTY

BECAUSE THE DATA IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE DATA, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE DATA "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE DATA IS WITH YOU. SHOULD THE
DATA PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.

12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE DATA AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE DATA (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE DATA TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.

Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
The zlib/libpng License

Copyright (c) 2008 Kai-Uwe Behrmann

This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.

Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.

2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.

3. This notice may not be removed or altered from any source
distribution.

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Included are profiles created by Marti Maria (littleCMS) <http://www.littlecms.com>:

# CIE*Lab
# CIE*XYZ
# sRGB

Various contributors:

# LStar-RGB from ColorSolutions <http://hwww.basICColor.com>
# Photogamut-RGB from the Photogamut workgroup <http://www.photogamut.org>
# Cineon and Gray from Kai-Uwe Behrmann <http://www.behrmann.name>
# compatibleWithAdobeRGB1998 from Graeme Gill <http://www.argyllcms.com>
Binary file not shown.
Loading

0 comments on commit 13b4e3b

Please sign in to comment.