Skip to content

Commit

Permalink
[RELEASE] iText pdfSweep 4.0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
iText-CI committed Jul 12, 2024
2 parents 97729a1 + 0e953a5 commit 524c8ed
Show file tree
Hide file tree
Showing 42 changed files with 777 additions and 348 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
<parent>
<groupId>com.itextpdf</groupId>
<artifactId>root</artifactId>
<version>8.0.3</version>
<version>8.0.5</version>
<relativePath />
</parent>

<artifactId>cleanup</artifactId>
<version>4.0.2</version>
<version>4.0.3</version>

<name>pdfSweep</name>
<description>Redact PDF documents. If you have to share PDFs with different departments or send them out of house, but they
Expand Down
6 changes: 5 additions & 1 deletion sharpenConfiguration.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
<file path="com/itextpdf/pdfcleanup/util/CleanUpCsCompareUtil.java"/>
<file path="com/itextpdf/pdfcleanup/CleanUpCsCompareUtilTest.java"/>
</fileset>
<fileset reason="This class contains a test which checks results of a bug regarding image type.
This has been fixed for specific JDKs but not all. Needs different implementation for .NET.">
<file path="com/itextpdf/pdfcleanup/UnsupportedImageTypeTest.java"/>
</fileset>
</java>
<resource>
<file path="com/itextpdf/pdfcleanup/CleanUpTaggedPdfTest/cmp_cleanImage_partial.pdf" />
Expand Down Expand Up @@ -37,7 +41,7 @@
<file path="com/itextpdf/pdfcleanup/FilteredImagesCacheTest/cmp_filteredImagesCacheTest04.pdf" />
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCs.pdf" />
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCsJpegBaselineEncoded.pdf" />
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCsJpegBaselineEncodedWithApp14Segment.pdf" />
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCsJpegBaselineEncodedWithApp14Segment.pdf" />
</resource>
</ignored>
<overwritten>
Expand Down
34 changes: 34 additions & 0 deletions src/main/java/com/itextpdf/pdfcleanup/CleanUpProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ This file is part of the iText (R) project.
package com.itextpdf.pdfcleanup;

import com.itextpdf.commons.actions.contexts.IMetaInfo;
import com.itextpdf.pdfcleanup.exceptions.CleanupExceptionMessageConstant;

/**
* Contains properties for {@link PdfCleanUpTool} operations.
Expand All @@ -31,6 +32,7 @@ public class CleanUpProperties {

private IMetaInfo metaInfo;
private boolean processAnnotations;
private Double overlapRatio;

/**
* Creates default CleanUpProperties instance.
Expand Down Expand Up @@ -76,4 +78,36 @@ public boolean isProcessAnnotations() {
public void setProcessAnnotations(boolean processAnnotations) {
this.processAnnotations = processAnnotations;
}

/**
* Gets the overlap ratio.
* This is a value between 0 and 1 that indicates how much the content region should overlap with the redaction
* area to be removed.
*
* @return the overlap ratio or {@code null} if it has not been set.
*/
public Double getOverlapRatio() {
return overlapRatio;
}

/**
* Sets the overlap ratio.
* This is a value between 0 and 1 that indicates how much the content region should overlap with the
* redaction area to be removed.
* <p>
* Example: if the overlap ratio is set to 0.3, the content region will be removed if it overlaps with
* the redaction area by at least 30%.
*
* @param overlapRatio The overlap ratio to set.
*/
public void setOverlapRatio(Double overlapRatio) {
if (overlapRatio == null) {
this.overlapRatio = null;
return;
}
if (overlapRatio <= 0 || overlapRatio > 1) {
throw new IllegalArgumentException(CleanupExceptionMessageConstant.OVERLAP_RATIO_SHOULD_BE_IN_RANGE);
}
this.overlapRatio = overlapRatio;
}
}
90 changes: 53 additions & 37 deletions src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,13 @@ class PdfCleanUpFilter {
private static final Set<PdfName> NOT_SUPPORTED_FILTERS_FOR_DIRECT_CLEANUP = Collections.unmodifiableSet(
new LinkedHashSet<>(Arrays.asList(PdfName.JBIG2Decode, PdfName.DCTDecode, PdfName.JPXDecode)));

private List<Rectangle> regions;
private final List<Rectangle> regions;

public PdfCleanUpFilter(List<Rectangle> regions) {
private final CleanUpProperties properties;

public PdfCleanUpFilter(List<Rectangle> regions, CleanUpProperties properties) {
this.regions = regions;
this.properties = properties;
}

static boolean imageSupportsDirectCleanup(PdfImageXObject image) {
Expand All @@ -118,7 +121,7 @@ static boolean imageSupportsDirectCleanup(PdfImageXObject image) {
* are never considered as intersecting.
* @return true if the rectangles intersect, false otherwise
*/
static boolean checkIfRectanglesIntersect(Point[] rect1, Point[] rect2) {
boolean checkIfRectanglesIntersect(Point[] rect1, Point[] rect2) {
IClipper clipper = new DefaultClipper();
// If the redaction area is degenerate, the result will be false
if (!ClipperBridge.addPolygonToClipper(clipper, rect2, PolyType.CLIP)) {
Expand Down Expand Up @@ -170,29 +173,44 @@ static boolean checkIfRectanglesIntersect(Point[] rect1, Point[] rect2) {
// working with paths is considered to be a bit faster in terms of performance.
Paths paths = new Paths();
clipper.execute(ClipType.INTERSECTION, paths, PolyFillType.NON_ZERO, PolyFillType.NON_ZERO);
return !checkIfIntersectionRectangleDegenerate(paths.getBounds(), false)
&& !paths.isEmpty();
} else {
int rect1Size = rect1.length;
return checkIfIntersectionOccurs(paths, rect1, false);
}
intersectionSubjectAdded = ClipperBridge.addPolylineSubjectToClipper(clipper, rect1);
if (!intersectionSubjectAdded) {
// According to the comment above,
// this could have happened only if all four passed points are actually the same point.
// Adding here a point really close to the original point, to make sure it's not covered by the
// intersecting rectangle.
final double SMALL_DIFF = 0.01;
final Point[] expandedRect1 = new Point[rect1.length + 1];
System.arraycopy(rect1, 0, expandedRect1, 0, rect1.length);
expandedRect1[rect1.length] = new Point(rect1[0].getX() + SMALL_DIFF, rect1[0].getY());
rect1 = expandedRect1;

intersectionSubjectAdded = ClipperBridge.addPolylineSubjectToClipper(clipper, rect1);
if (!intersectionSubjectAdded) {
// According to the comment above,
// this could have happened only if all four passed points are actually the same point.
// Adding here a point really close to the original point, to make sure it's not covered by the
// intersecting rectangle.
double smallDiff = 0.01;
List<Point> rect1List = new ArrayList<Point>(Arrays.asList(rect1));
rect1List.add(new Point(rect1[0].getX() + smallDiff, rect1[0].getY()));
rect1 = rect1List.toArray(new Point[rect1Size]);
intersectionSubjectAdded = ClipperBridge.addPolylineSubjectToClipper(clipper, rect1);
assert intersectionSubjectAdded;
}
PolyTree polyTree = new PolyTree();
clipper.execute(ClipType.INTERSECTION, polyTree, PolyFillType.NON_ZERO, PolyFillType.NON_ZERO);
Paths paths = Paths.makePolyTreeToPaths(polyTree);
return !checkIfIntersectionRectangleDegenerate(paths.getBounds(), true)
&& !paths.isEmpty();
assert intersectionSubjectAdded;
}
PolyTree polyTree = new PolyTree();
clipper.execute(ClipType.INTERSECTION, polyTree, PolyFillType.NON_ZERO, PolyFillType.NON_ZERO);
return checkIfIntersectionOccurs(Paths.makePolyTreeToPaths(polyTree), rect1, true);
}

private boolean checkIfIntersectionOccurs(Paths paths, Point[] rect1, boolean isDegenerate) {
if (paths.isEmpty()) {
return false;
}
final LongRect intersectionRectangle = paths.getBounds();
// If the user defines a overlappingRatio we use this to calculate whether it intersects enough
// To pass as an intersection
if (properties.getOverlapRatio() == null) {
return !checkIfIntersectionRectangleDegenerate(intersectionRectangle, isDegenerate);
}
final double overlappedArea = CleanUpHelperUtil.calculatePolygonArea(rect1);
final double intersectionArea = ClipperBridge.longRectCalculateHeight(intersectionRectangle) *
ClipperBridge.longRectCalculateWidth(intersectionRectangle);
final double percentageOfOverlapping = intersectionArea / overlappedArea;
final float SMALL_VALUE_FOR_ROUNDING_ERRORS = 1e-5f;
return percentageOfOverlapping + SMALL_VALUE_FOR_ROUNDING_ERRORS > properties.getOverlapRatio();
}

/**
Expand Down Expand Up @@ -274,7 +292,7 @@ FilteredImagesCache.FilteredImageKey createFilteredImageKey(PdfImageXObject imag
* @return a filtered {@link com.itextpdf.kernel.geom.Path} object.
*/
private com.itextpdf.kernel.geom.Path filterFillPath(com.itextpdf.kernel.geom.Path path,
Matrix ctm, int fillingRule) {
Matrix ctm, int fillingRule) {
path.closeAllSubpaths();

IClipper clipper = new DefaultClipper();
Expand Down Expand Up @@ -336,8 +354,8 @@ private List<Rectangle> getImageAreasToBeCleaned(Matrix imageCtm) {
}

private com.itextpdf.kernel.geom.Path filterStrokePath(com.itextpdf.kernel.geom.Path sourcePath, Matrix ctm,
float lineWidth, int lineCapStyle, int lineJoinStyle,
float miterLimit, LineDashPattern lineDashPattern) {
float lineWidth, int lineCapStyle, int lineJoinStyle,
float miterLimit, LineDashPattern lineDashPattern) {
com.itextpdf.kernel.geom.Path path = sourcePath;
JoinType joinType = ClipperBridge.getJoinType(lineJoinStyle);
EndType endType = ClipperBridge.getEndType(lineCapStyle);
Expand Down Expand Up @@ -420,15 +438,14 @@ private static FilterResult<ImageData> filterImage(PdfImageXObject image, List<R
* is true) and it is included into intersecting rectangle, this method returns false,
* despite of the intersection rectangle is degenerate.
*
* @param rect intersection rectangle
* @param rect intersection rectangle
* @param isIntersectSubjectDegenerate value, specifying if the intersection subject
* is degenerate.
* @return true - if the intersection rectangle is degenerate.
*/
private static boolean checkIfIntersectionRectangleDegenerate(LongRect rect,
boolean isIntersectSubjectDegenerate) {
float width = (float)(Math.abs(rect.left - rect.right) / ClipperBridge.floatMultiplier);
float height = (float)(Math.abs(rect.top - rect.bottom) / ClipperBridge.floatMultiplier);
private static boolean checkIfIntersectionRectangleDegenerate(LongRect rect, boolean isIntersectSubjectDegenerate) {
final float width = ClipperBridge.longRectCalculateWidth(rect);
final float height = ClipperBridge.longRectCalculateHeight(rect);
return isIntersectSubjectDegenerate ? (width < EPS && height < EPS) : (width < EPS || height < EPS);
}

Expand Down Expand Up @@ -466,7 +483,7 @@ private static boolean isSupportedFilterForDirectImageCleanup(PdfObject filter)
return true;
}
if (filter.isName()) {
return !NOT_SUPPORTED_FILTERS_FOR_DIRECT_CLEANUP.contains((PdfName)filter);
return !NOT_SUPPORTED_FILTERS_FOR_DIRECT_CLEANUP.contains((PdfName) filter);
} else if (filter.isArray()) {
PdfArray filterArray = (PdfArray) filter;
for (int i = 0; i < filterArray.size(); ++i) {
Expand Down Expand Up @@ -508,7 +525,7 @@ private static Rectangle transformRectIntoImageCoordinates(Rectangle rect, Matri
* Filters image content using direct manipulation over PDF image samples stream. Implemented according to ISO 32000-2,
* "8.9.3 Sample representation".
*
* @param image image XObject which will be filtered
* @param image image XObject which will be filtered
* @param imageAreasToBeCleaned list of rectangle areas for clean up with coordinates in (0,1)x(0,1) space
* @return raw bytes of the PDF image samples stream which is already cleaned.
*/
Expand All @@ -529,7 +546,7 @@ private static byte[] processImageDirectly(PdfImageXObject image, List<Rectangle
throw new IllegalArgumentException("/BitsPerComponent only allowed values are: 1, 2, 4, 8 and 16.");
}

double bytesInComponent = (double)bpc / 8;
double bytesInComponent = (double) bpc / 8;
int firstComponentInByte = 0;
if (bpc < 16) {
for (int i = 0; i < bpc; ++i) {
Expand All @@ -544,7 +561,7 @@ private static byte[] processImageDirectly(PdfImageXObject image, List<Rectangle
rowPadding = (int) (8 - (width * bpc) % 8);
}
for (Rectangle rect : imageAreasToBeCleaned) {
int[] cleanImgRect = CleanUpHelperUtil.getImageRectToClean(rect, (int)width, (int)height);
int[] cleanImgRect = CleanUpHelperUtil.getImageRectToClean(rect, (int) width, (int) height);
for (int j = cleanImgRect[Y]; j < cleanImgRect[Y] + cleanImgRect[H]; ++j) {
for (int i = cleanImgRect[X]; i < cleanImgRect[X] + cleanImgRect[W]; ++i) {
// based on assumption that numOfComponents always equals 1, because this method is only for monochrome and grayscale images
Expand Down Expand Up @@ -751,7 +768,6 @@ private static Point[] transformPoints(Matrix transformationMatrix, boolean inve
private static Point[] getTextRectangle(TextRenderInfo renderInfo) {
LineSegment ascent = renderInfo.getAscentLine();
LineSegment descent = renderInfo.getDescentLine();

return new Point[]{
new Point(ascent.getStartPoint().get(0), ascent.getStartPoint().get(1)),
new Point(ascent.getEndPoint().get(0), ascent.getEndPoint().get(1)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,15 @@ public class PdfCleanUpProcessor extends PdfCanvasProcessor {
private TextPositioning textPositioning;
private FilteredImagesCache filteredImagesCache;


PdfCleanUpProcessor(List<Rectangle> cleanUpRegions, PdfDocument document) {
this(cleanUpRegions, document, new CleanUpProperties());
}

PdfCleanUpProcessor(List<Rectangle> cleanUpRegions, PdfDocument document, CleanUpProperties properties) {
super(new PdfCleanUpEventListener());
this.document = document;
this.filter = new PdfCleanUpFilter(cleanUpRegions);
this.filter = new PdfCleanUpFilter(cleanUpRegions, properties);
this.canvasStack = new Stack<>();
this.notAppliedGsParams = new ArrayDeque<>();
this.notAppliedGsParams.push(new NotAppliedGsParams());
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public class PdfCleanUpTool {

private PdfDocument pdfDocument;

private boolean processAnnotations;
private CleanUpProperties properties;

/**
* Key - page number, value - list of locations related to the page.
Expand Down Expand Up @@ -137,14 +137,14 @@ public PdfCleanUpTool(PdfDocument pdfDocument, boolean cleanRedactAnnotations, C
if (pdfDocument.getReader() == null || pdfDocument.getWriter() == null) {
throw new PdfException(CleanupExceptionMessageConstant.PDF_DOCUMENT_MUST_BE_OPENED_IN_STAMPING_MODE);
}
this.properties = properties;
this.pdfDocument = pdfDocument;
this.pdfCleanUpLocations = new HashMap<>();
this.filteredImagesCache = new FilteredImagesCache();

if (cleanRedactAnnotations) {
addCleanUpLocationsBasedOnRedactAnnotations();
}
processAnnotations = properties.isProcessAnnotations();
}

/**
Expand Down Expand Up @@ -215,10 +215,10 @@ private void cleanUpPage(int pageNumber, List<PdfCleanUpLocation> cleanUpLocatio
}

PdfPage page = pdfDocument.getPage(pageNumber);
PdfCleanUpProcessor cleanUpProcessor = new PdfCleanUpProcessor(regions, pdfDocument);
PdfCleanUpProcessor cleanUpProcessor = new PdfCleanUpProcessor(regions, pdfDocument, this.properties);
cleanUpProcessor.setFilteredImagesCache(filteredImagesCache);
cleanUpProcessor.processPageContent(page);
if (processAnnotations) {
if (properties.isProcessAnnotations()) {
cleanUpProcessor.processPageAnnotations(page, regions, redactAnnotations != null);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class PdfSweepProductData {
public static final String PDF_SWEEP_PRODUCT_NAME = "pdfSweep";
public static final String PDF_SWEEP_PUBLIC_PRODUCT_NAME = PDF_SWEEP_PRODUCT_NAME;

private static final String PDF_SWEEP_VERSION = "4.0.2";
private static final String PDF_SWEEP_VERSION = "4.0.3";
private static final int PDF_SWEEP_COPYRIGHT_SINCE = 2000;
private static final int PDF_SWEEP_COPYRIGHT_TO = 2024;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ public final class CleanupExceptionMessageConstant {
// Do not remove, it's used in .NET
// This same exception message is thrown in CleanUpImageUtil#cleanImage when the image format is unsupported
public static final String UNSUPPORTED_IMAGE_TYPE = "Unsupported image type";
public static final String OVERLAP_RATIO_SHOULD_BE_IN_RANGE = "Overlap ratio should be in range (0, 1]";

private CleanupExceptionMessageConstant(){}
private CleanupExceptionMessageConstant(){
//empty constructor
}
}
16 changes: 16 additions & 0 deletions src/main/java/com/itextpdf/pdfcleanup/util/CleanUpHelperUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ This file is part of the iText (R) project.
*/
package com.itextpdf.pdfcleanup.util;

import com.itextpdf.kernel.geom.Point;
import com.itextpdf.kernel.geom.Rectangle;

/**
Expand Down Expand Up @@ -56,4 +57,19 @@ public static int[] getImageRectToClean(Rectangle rect, int imgWidth, int imgHei
int h = scaledTopY - scaledBottomY;
return new int[]{x, y, w, h};
}


public static double calculatePolygonArea(Point[] vertices) {
double sum = 0;
for (int i = 0; i < vertices.length; i++) {
if (i == 0) {
sum += vertices[i].x * (vertices[i + 1].y - vertices[vertices.length - 1].y);
} else if (i == vertices.length - 1) {
sum += vertices[i].x * (vertices[0].y - vertices[i - 1].y);
} else {
sum += vertices[i].x * (vertices[i + 1].y - vertices[i - 1].y);
}
}
return 0.5 * Math.abs(sum);
}
}
Loading

0 comments on commit 524c8ed

Please sign in to comment.