-
Notifications
You must be signed in to change notification settings - Fork 363
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#326 - Setup PDF/A conformance testing module using VeraPDF.
- Loading branch information
Showing
14 changed files
with
1,092 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# PDF/A Testing Module | ||
This module is for the automatic testing of PDF/A documents using [VeraPDF](http://docs.verapdf.org/develop/). It is not deployed to Maven Central. | ||
|
||
## License | ||
Unlike the rest of the project, this module is distributed under the GPL3, due to the license of VeraPDF. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
|
||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>com.openhtmltopdf</groupId> | ||
<artifactId>openhtmltopdf-parent</artifactId> | ||
<version>0.0.1-RC18-SNAPSHOT</version> | ||
</parent> | ||
|
||
<artifactId>openhtmltopdf-pdfa-testing</artifactId> | ||
|
||
<packaging>jar</packaging> | ||
|
||
<name>Openhtmltopdf PDF/A Testing</name> | ||
<description>Code to test against the PDF/A standard. It is not deployed with a release and unlike the rest of the project, is licensed under the GPL.</description> | ||
|
||
<licenses> | ||
<license> | ||
<name>GNU General public license GPLv3+</name> | ||
<url>https://www.gnu.org/licenses/gpl-3.0.en.html</url> | ||
</license> | ||
</licenses> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>com.openhtmltopdf</groupId> | ||
<artifactId>openhtmltopdf-core</artifactId> | ||
<version>${project.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.openhtmltopdf</groupId> | ||
<artifactId>openhtmltopdf-pdfbox</artifactId> | ||
<version>${project.version}</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<!-- NOTE: VeraPDF is licensed under the GPL or MPL. --> | ||
<groupId>org.verapdf</groupId> | ||
<artifactId>validation-model</artifactId> | ||
<version>1.12.1</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.12</version> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-deploy-plugin</artifactId> | ||
<configuration> | ||
<skip>true</skip> | ||
</configuration> | ||
</plugin> | ||
|
||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-javadoc-plugin</artifactId> | ||
<configuration> | ||
<use>false</use> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
|
||
<resources> | ||
<resource> | ||
<directory>src/main/resources</directory> | ||
</resource> | ||
<resource> | ||
<directory>../</directory> | ||
<targetPath>${project.build.outputDirectory}/META-INF</targetPath> | ||
<includes> | ||
<include>LICENSE*</include> | ||
</includes> | ||
</resource> | ||
</resources> | ||
</build> | ||
</project> |
146 changes: 146 additions & 0 deletions
146
openhtmltopdf-pdfa-testing/src/test/java/com/openhtmltopdf/pdfa/testing/PdfATester.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
package com.openhtmltopdf.pdfa.testing; | ||
|
||
import static org.junit.Assert.assertTrue; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.ByteArrayOutputStream; | ||
import java.io.File; | ||
import java.io.InputStream; | ||
import java.nio.charset.StandardCharsets; | ||
import java.nio.file.Files; | ||
import java.nio.file.Paths; | ||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.function.Function; | ||
import java.util.function.Predicate; | ||
import java.util.stream.Collectors; | ||
|
||
import org.apache.pdfbox.io.IOUtils; | ||
import org.apache.pdfbox.pdmodel.PDDocument; | ||
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent; | ||
import org.junit.BeforeClass; | ||
import org.junit.Ignore; | ||
import org.junit.Test; | ||
import org.verapdf.pdfa.Foundries; | ||
import org.verapdf.pdfa.PDFAParser; | ||
import org.verapdf.pdfa.PDFAValidator; | ||
import org.verapdf.pdfa.VeraGreenfieldFoundryProvider; | ||
import org.verapdf.pdfa.flavours.PDFAFlavour; | ||
import org.verapdf.pdfa.results.TestAssertion.Status; | ||
import org.verapdf.pdfa.results.TestAssertion; | ||
import org.verapdf.pdfa.results.ValidationResult; | ||
|
||
import com.openhtmltopdf.pdfboxout.PdfBoxRenderer; | ||
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder; | ||
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder.PdfAConformance; | ||
|
||
public class PdfATester { | ||
@BeforeClass | ||
public static void initialize() { | ||
VeraGreenfieldFoundryProvider.initialise(); | ||
} | ||
|
||
public static <T> Predicate<T> distinctByKey(Function<? super T, ?> keyExtractor) { | ||
Set<Object> seen = ConcurrentHashMap.newKeySet(); | ||
return t -> seen.add(keyExtractor.apply(t)); | ||
} | ||
|
||
public boolean run(String resource, PDFAFlavour flavour, PdfAConformance conform) throws Exception { | ||
byte[] htmlBytes = null; | ||
try (InputStream is = PdfATester.class.getResourceAsStream("/html/" + resource + ".html")) { | ||
htmlBytes = IOUtils.toByteArray(is); | ||
} | ||
String html = new String(htmlBytes, StandardCharsets.UTF_8); | ||
|
||
Files.createDirectories(Paths.get("target/test/artefacts/")); | ||
if (!Files.exists(Paths.get("target/test/artefacts/Karla-Bold.ttf"))) { | ||
try (InputStream in = PdfATester.class.getResourceAsStream("/fonts/Karla-Bold.ttf")) { | ||
Files.copy(in, Paths.get("target/test/artefacts/Karla-Bold.ttf")); | ||
} | ||
} | ||
|
||
byte[] pdfBytes; | ||
|
||
try (PDDocument doc = new PDDocument()) { | ||
PdfRendererBuilder builder = new PdfRendererBuilder(); | ||
builder.usePDDocument(doc); | ||
builder.useFastMode(); | ||
//builder.testMode(true); | ||
builder.usePdfAConformance(conform); | ||
builder.useFont(new File("target/test/artefacts/Karla-Bold.ttf"), "TestFont"); | ||
builder.withHtmlContent(html, PdfATester.class.getResource("/html/").toString()); | ||
|
||
try (PdfBoxRenderer renderer = builder.buildPdfRenderer()) { | ||
renderer.createPDFWithoutClosing(); | ||
} | ||
|
||
try (InputStream colorProfile = PdfATester.class.getResourceAsStream("/colorspaces/sRGB.icc")) { | ||
PDOutputIntent oi = new PDOutputIntent(doc, colorProfile); | ||
oi.setInfo("sRGB IEC61966-2.1"); | ||
oi.setOutputCondition("sRGB IEC61966-2.1"); | ||
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1"); | ||
oi.setRegistryName("http://www.color.org"); | ||
doc.getDocumentCatalog().addOutputIntent(oi); | ||
} | ||
|
||
ByteArrayOutputStream baos = new ByteArrayOutputStream(); | ||
doc.save(baos); | ||
pdfBytes = baos.toByteArray(); | ||
} | ||
|
||
Files.createDirectories(Paths.get("target/test/pdf/")); | ||
Files.write(Paths.get("target/test/pdf/" + resource + "--" + flavour + ".pdf"), pdfBytes); | ||
|
||
PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, true); | ||
try (InputStream is = new ByteArrayInputStream(pdfBytes); | ||
PDFAParser parser = Foundries.defaultInstance().createParser(is, flavour)) { | ||
|
||
ValidationResult result = validator.validate(parser); | ||
|
||
List<TestAssertion> asserts = result.getTestAssertions().stream() | ||
.filter(ta -> ta.getStatus() == Status.FAILED) | ||
.filter(distinctByKey(TestAssertion::getRuleId)) | ||
.collect(Collectors.toList()); | ||
|
||
String errs = asserts.stream() | ||
.map(ta -> String.format("%s\n %s", ta.getMessage().replaceAll("\\s+", " "), ta.getLocation().getContext())) | ||
.collect(Collectors.joining("\n ", "[\n ", "\n]")); | ||
|
||
System.err.format("\nDISTINCT ERRORS(%s--%s) (%d): %s\n", resource, flavour, asserts.size(), errs); | ||
|
||
return asserts.isEmpty() && result.isCompliant(); | ||
} | ||
} | ||
|
||
@Ignore // Failing, multiple. See issue number 326. | ||
@Test | ||
public void testAllInOnePdfA1b() throws Exception { | ||
assertTrue(run("all-in-one", PDFAFlavour.PDFA_1_B, PdfAConformance.PDFA_1_B)); | ||
} | ||
|
||
@Ignore | ||
@Test | ||
public void testAllInOnePdfA1a() throws Exception { | ||
assertTrue(run("all-in-one", PDFAFlavour.PDFA_1_A, PdfAConformance.PDFA_1_A)); | ||
} | ||
|
||
@Ignore | ||
@Test | ||
public void testAllInOnePdfA2b() throws Exception { | ||
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_B, PdfAConformance.PDFA_2_B)); | ||
} | ||
|
||
@Ignore | ||
@Test | ||
public void testAllInOnePdfA2a() throws Exception { | ||
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_A, PdfAConformance.PDFA_2_A)); | ||
} | ||
|
||
@Ignore | ||
@Test | ||
public void testAllInOnePdfA2u() throws Exception { | ||
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_U, PdfAConformance.PDFA_2_U)); | ||
} | ||
|
||
} |
Binary file not shown.
45 changes: 45 additions & 0 deletions
45
openhtmltopdf-pdfa-testing/src/test/resources/colorspaces/sRGB.icc.COPYING.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
The profiles in the base directory are provided according to different licenses. | ||
|
||
|
||
Group A | ||
sRGB, LCMSLAB.ICM, LCMSXYZ.ICM, the compatibleWithAdobeRGB.icc and the | ||
Gray.icc, CineonLog_M*.icc, CineLogCurve.icc profiles are all zlib licensed. | ||
Even though it is highly recommended to rename them before editing. | ||
|
||
|
||
Group B | ||
The eciRGB*.icc profiles come with their license in license.rtf. | ||
|
||
|
||
Group C | ||
PhotoGamutRGB_avg6c.icc is licensed to be distributed freely. Modifications | ||
are not allowed. | ||
|
||
|
||
|
||
Additionally all profiles come with the following disclaimer. The provided | ||
ICC Profiles in the package are called DATA in the folling statement. | ||
|
||
|
||
NO WARRANTY | ||
|
||
BECAUSE THE DATA IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY | ||
FOR THE DATA, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN | ||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES | ||
PROVIDE THE DATA "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED | ||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS | ||
TO THE QUALITY AND PERFORMANCE OF THE DATA IS WITH YOU. SHOULD THE | ||
DATA PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, | ||
REPAIR OR CORRECTION. | ||
|
||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | ||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR | ||
REDISTRIBUTE THE DATA AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, | ||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING | ||
OUT OF THE USE OR INABILITY TO USE THE DATA (INCLUDING BUT NOT LIMITED | ||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY | ||
YOU OR THIRD PARTIES OR A FAILURE OF THE DATA TO OPERATE WITH ANY OTHER | ||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE | ||
POSSIBILITY OF SUCH DAMAGES. | ||
|
23 changes: 23 additions & 0 deletions
23
openhtmltopdf-pdfa-testing/src/test/resources/colorspaces/sRGB.icc.LICENSE-ZLIB.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
The zlib/libpng License | ||
|
||
Copyright (c) 2008 Kai-Uwe Behrmann | ||
|
||
This software is provided 'as-is', without any express or implied | ||
warranty. In no event will the authors be held liable for any damages | ||
arising from the use of this software. | ||
|
||
Permission is granted to anyone to use this software for any purpose, | ||
including commercial applications, and to alter it and redistribute it | ||
freely, subject to the following restrictions: | ||
|
||
1. The origin of this software must not be misrepresented; you must not | ||
claim that you wrote the original software. If you use this software | ||
in a product, an acknowledgment in the product documentation would be | ||
appreciated but is not required. | ||
|
||
2. Altered source versions must be plainly marked as such, and must not be | ||
misrepresented as being the original software. | ||
|
||
3. This notice may not be removed or altered from any source | ||
distribution. | ||
|
12 changes: 12 additions & 0 deletions
12
openhtmltopdf-pdfa-testing/src/test/resources/colorspaces/sRGB.icc.README.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
Included are profiles created by Marti Maria (littleCMS) <http://www.littlecms.com>: | ||
|
||
# CIE*Lab | ||
# CIE*XYZ | ||
# sRGB | ||
|
||
Various contributors: | ||
|
||
# LStar-RGB from ColorSolutions <http://hwww.basICColor.com> | ||
# Photogamut-RGB from the Photogamut workgroup <http://www.photogamut.org> | ||
# Cineon and Gray from Kai-Uwe Behrmann <http://www.behrmann.name> | ||
# compatibleWithAdobeRGB1998 from Graeme Gill <http://www.argyllcms.com> |
Binary file not shown.
Oops, something went wrong.