Skip to content

Commit

Permalink
#79 - Even more robust parent/child relationships based on box tree i…
Browse files Browse the repository at this point in the history
…nstead of DOM elements.

Also started to use correct tags.
  • Loading branch information
danfickle committed Jan 18, 2019
1 parent 9c985ac commit 35bf7bd
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 66 deletions.
10 changes: 10 additions & 0 deletions openhtmltopdf-core/src/main/java/com/openhtmltopdf/render/Box.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ public abstract class Box implements Styleable, DisplayListItem {
private Area _absoluteClipBox;
private boolean _clipBoxCalculated = false;

private Object _accessibilityObject;

protected Box() {
}

Expand Down Expand Up @@ -552,6 +554,14 @@ public boolean hasNonTextContent(CssContext c) {

return false;
}

public void setAccessiblityObject(Object object) {
this._accessibilityObject = object;
}

public Object getAccessibilityObject() {
return this._accessibilityObject;
}

public void paintRootElementBackground(RenderingContext c) {
PaintingInfo pI = getPaintingInfo();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
Expand All @@ -18,14 +16,13 @@
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import com.openhtmltopdf.extend.StructureType;
import com.openhtmltopdf.render.BlockBox;
import com.openhtmltopdf.render.Box;
import com.openhtmltopdf.render.LineBox;
import com.openhtmltopdf.render.RenderingContext;

public class PdfBoxAccessibilityHelper {
private final Map<Element, StructureItem> _structureMap = new HashMap<>();
private final List<List<StructureItem>> _pageContentItems = new ArrayList<>();
private final PdfBoxFastOutputDevice _od;

Expand All @@ -43,11 +40,12 @@ public PdfBoxAccessibilityHelper(PdfBoxFastOutputDevice od) {

private static class StructureItem {
private final StructureType type;
private final Box box;
private Box box;
private final List<StructureItem> children = new ArrayList<>();

private COSDictionary dict;
private PDStructureElement elem;
private PDStructureElement parentElem;
private int mcid = -1;
private StructureItem parent;
private PDPage page;
Expand Down Expand Up @@ -92,9 +90,8 @@ public void finishPdfUa() {

root.appendKid(rootElem);

StructureItem rootStruct = _structureMap.get(_doc.getDocumentElement());
rootStruct.elem = rootElem;
finishStructure(rootStruct);
_root.elem = rootElem;
finishStructure(_root, _root.elem);

_od.getWriter().getDocumentCatalog().setStructureTreeRoot(root);
}
Expand All @@ -107,8 +104,8 @@ public void finishPdfUa() {

COSArray mcidParentReferences = new COSArray();
for (StructureItem item : pageItems) {
System.out.println("item = " + item + ", parent = " + item.parent + " ,," + item.parent.elem);
mcidParentReferences.add(item.parent.elem);
System.out.println("%%%%%%%item = " + item + ", parent = " + item.parentElem);
mcidParentReferences.add(item.parentElem);
}

numTree.add(COSInteger.get(i));
Expand All @@ -126,31 +123,59 @@ public void finishPdfUa() {
}

private String chooseTag(StructureItem item) {
return item.box != null && item.box.getStyle().isInline() ? "Span" : "P"; // TODO.
if (item.box != null) {
if (item.box.getLayer() != null) {
return StandardStructureTypes.SECT;
} else if (item.box instanceof BlockBox) {
BlockBox block = (BlockBox) item.box;

if (block.isFloated()) {
return StandardStructureTypes.NOTE;
} else if (block.isInline()) {
return StandardStructureTypes.SPAN;
} else if (block.getElement() != null && block.getElement().getNodeName().equals("p")) {
return StandardStructureTypes.P;
} else {
return StandardStructureTypes.DIV;
}

// TODO: Tables.
} else {
return StandardStructureTypes.SPAN;
}
}

return StandardStructureTypes.SPAN;
}

private void finishStructure(StructureItem item) {
System.out.println("item = " + item + " ,," + item.mcid);
private void finishStructure(StructureItem item, PDStructureElement parent) {
for (StructureItem child : item.children) {
if (child.mcid == -1) {
if (child.children.isEmpty()) {
continue;
}

String pdfTag = chooseTag(child);

child.elem = new PDStructureElement(pdfTag, item.elem);
System.out.println("child = " + child + "!!!!!!" + child.elem);
child.elem.setParent(item.elem);
child.elem.setPage(child.page);

item.elem.appendKid(child.elem);
if (child.box instanceof LineBox &&
!child.box.hasNonTextContent(_ctx)) {
finishStructure(child, parent);
} else {
String pdfTag = chooseTag(child);

finishStructure(child);
child.parentElem = parent;
child.elem = new PDStructureElement(pdfTag, parent);
child.elem.setParent(parent);
child.elem.setPage(child.page);
System.out.println("ADDING$$: " + child + " :::: " + child.elem + "-----" + pdfTag);
parent.appendKid(child.elem);

finishStructure(child, child.elem);
}
} else if (child.type == StructureType.TEXT) {
item.elem.appendKid(new PDMarkedContent(COSName.getPDFName("Span"), child.dict));
child.parentElem = parent;
parent.appendKid(new PDMarkedContent(COSName.getPDFName("Span"), child.dict));
} else if (child.type == StructureType.BACKGROUND) {
item.elem.appendKid(new PDArtifactMarkedContent(child.dict));
child.parentElem = parent;
parent.appendKid(new PDArtifactMarkedContent(child.dict));
}
}
}
Expand All @@ -165,65 +190,69 @@ private Element getBoxElement(Box box) {
}
}

private StructureItem findParentStructualElement(Box box) {
Element elem = getBoxElement(box);
Node parent = elem.getParentNode();

StructureItem item;

if (parent == null || parent instanceof Document) {
item = _root;
} else {
item = _structureMap.get(elem.getParentNode());
}

System.out.println("ch = " + box + " parent = " + item + ", " + elem.getParentNode().getNodeName());

return item;
}

private StructureItem findCurrentStructualElement(Box box) {
Element elem = getBoxElement(box);

return _structureMap.get(elem);
}

private COSDictionary createMarkedContentDictionary() {
COSDictionary dict = new COSDictionary();
dict.setInt(COSName.MCID, _nextMcid);
_nextMcid++;
return dict;
}

private void ensureAncestorTree(StructureItem child, Box parent) {
// Walk up the ancestor tree making sure they all have accessibility objects.
while (parent != null && parent.getAccessibilityObject() == null) {
StructureItem parentItem = createStructureItem(null, parent);
parent.setAccessiblityObject(parentItem);
parentItem.children.add(child);
child.parent = parentItem;
child = parentItem;
parent = parent.getParent();
}
}

private StructureItem createStructureItem(StructureType type, Box box) {
StructureItem child = (StructureItem) box.getAccessibilityObject();

Element elem = getBoxElement(box);
StructureItem item = _structureMap.get(elem);

if (item == null) {
item = new StructureItem(type, box);
_structureMap.put(elem, item);

item.parent = findParentStructualElement(box);
item.parent.children.add(item);
if (child == null) {
child = new StructureItem(type, box);
child.page = _page;

item.page = _page;
box.setAccessiblityObject(child);

ensureAncestorTree(child, box.getParent());
ensureParent(box, child);
} else if (child.box == null) {
child.box = box;
}

//System.out.println("-------ADD: " + item + " , &&" + item.parent);
return item;
System.out.println("-------ADD: " + child + " && " + child.parent);
return child;
}

public void ensureParent(Box box, StructureItem child) {
if (child.parent == null) {
if (box.getParent() != null) {
StructureItem parent = (StructureItem) box.getParent().getAccessibilityObject();
parent.children.add(child);
child.parent = parent;
} else {
_root.children.add(child);
child.parent = _root;
}
}
}

private StructureItem createMarkedContentStructureItem(StructureType type, Box box) {
StructureItem current = new StructureItem(type, box);
StructureItem parent = findCurrentStructualElement(box);
System.out.println("mcid prent = " + parent + " , " + current);

ensureAncestorTree(current, box.getParent());
ensureParent(box, current);

current.mcid = _nextMcid;
current.dict = createMarkedContentDictionary();
current.parent = parent;
current.parent.children.add(current);

_pageContentItems.get(_pageContentItems.size() - 1).add(current);

System.out.println("+++++++ADD: " + current + " !! " + current.parent + " !! " + current.mcid);

return current;
}
Expand Down Expand Up @@ -300,7 +329,6 @@ public void setDocument(Document doc) {
this._doc = doc;

StructureItem rootStruct = new StructureItem(null, null);
_structureMap.put(_doc.getDocumentElement(), rootStruct);
_root = rootStruct;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,10 @@ public void start(Document doc) {
_bmManager = new PdfBoxBookmarkManager(doc, _writer, _sharedContext, _dotsPerPoint, this);
_linkManager = new PdfBoxFastLinkManager(_sharedContext, _dotsPerPoint, _root, this);
loadMetadata(doc);
_pdfUa.setDocument(doc);

if (_pdfUa != null) {
_pdfUa.setDocument(doc);
}
}

public void finish(RenderingContext c, Box root) {
Expand Down

0 comments on commit 35bf7bd

Please sign in to comment.