Skip to content

Commit

Permalink
Parser/scanner for Doctype
Browse files Browse the repository at this point in the history
Fixes part of #221
Doctype parsing setup. Internal DTD not complete, it instead stores as string

Signed-off-by: Nikolas Komonen <nikolaskomonen@gmail.com>
  • Loading branch information
NikolasKomonen committed Nov 15, 2018
1 parent ae60f52 commit d550ef1
Show file tree
Hide file tree
Showing 11 changed files with 455 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,42 @@
*/
public class DocumentType extends Node implements org.w3c.dom.DocumentType {

public enum DocumentTypeKind{
PUBLIC,
SYSTEM
}

/** Document type name. */
String name;

private DocumentTypeKind kind;
private String publicId;
private String systemId;
private String internalDTD; //TODO: THIS IS TEMPORARY. Implement actual parsing.

private String content;
int startContent;
int endContent;



public DocumentType(int start, int end, XMLDocument ownerDocument) {
super(start, end, ownerDocument);
}

/**
* @return the internalDTD
*/
public String getInternalDTD() {
return internalDTD;
}

/**
* @param internalDTD the internalDTD to set
*/
public void setInternalDTD(String internalDTD) {
this.internalDTD = internalDTD;
}

public String getContent() {
if (content == null) {
content = getOwnerDocument().getText().substring(getStartContent(), getEndContent());
Expand All @@ -52,6 +77,24 @@ public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

/**
* @return the DocumentTypeKind
*/
public DocumentTypeKind getKind() {
return kind;
}

/**
* @param kind the DocumentTypeKind to set
*/
public void setKind(DocumentTypeKind kind) {
this.kind = kind;
}

/*
* (non-Javadoc)
*
Expand Down Expand Up @@ -109,7 +152,14 @@ public NamedNodeMap getNotations() {
*/
@Override
public String getPublicId() {
throw new UnsupportedOperationException();
return this.publicId;
}

/**
* @param publicId the publicId to set
*/
public void setPublicId(String publicId) {
this.publicId = cleanURL(publicId);
}

/*
Expand All @@ -119,6 +169,30 @@ public String getPublicId() {
*/
@Override
public String getSystemId() {
throw new UnsupportedOperationException();
return this.systemId;
}

/**
* @param systemId the systemId to set
*/
public void setSystemId(String systemId) {
this.systemId = cleanURL(systemId);
}

private static String cleanURL(String url) {
if (url == null) {
return null;
}
if (url.isEmpty()) {
return url;
}
int start = url.charAt(0) == '\"' ? 1 : 0;
int end = url.charAt(url.length() - 1) == '\"' ? url.length() - 1 : url.length();
return url.substring(start, end);
}





}
Original file line number Diff line number Diff line change
Expand Up @@ -334,11 +334,11 @@ public List<Node> getChildren() {
}

/**
* Add node child
* Add node child and set child.parent to this.
*
* @param child the node child to add.
*/
public void addChild(Node child) {
public void addChildAndSetItsParent(Node child) {
child.parent = this;
if (children == null) {
children = new XMLNodeList<Node>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import org.eclipse.lsp4xml.commons.BadLocationException;
import org.eclipse.lsp4xml.commons.TextDocument;
import org.eclipse.lsp4xml.dom.DocumentType.DocumentTypeKind;
import org.eclipse.lsp4xml.dom.parser.Scanner;
import org.eclipse.lsp4xml.dom.parser.TokenType;
import org.eclipse.lsp4xml.dom.parser.XMLScanner;
Expand Down Expand Up @@ -59,7 +60,7 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso
case StartTagOpen: {
Element child = xmlDocument.createElement(scanner.getTokenOffset(), text.length());
child.startTagOpenOffset = scanner.getTokenOffset();
curr.addChild(child);
curr.addChildAndSetItsParent(child);
curr = child;
break;
}
Expand Down Expand Up @@ -116,7 +117,7 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso
Element element = xmlDocument.createElement(scanner.getTokenOffset() - 2, text.length());
element.endTagOpenOffset = endTagOpenOffset;
element.tag = closeTag;
current.addChild(element);
current.addChildAndSetItsParent(element);
curr = element;
}
break;
Expand Down Expand Up @@ -159,7 +160,7 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso

case CDATATagOpen: {
CDataSection cdataNode = xmlDocument.createCDataSection(scanner.getTokenOffset(), text.length());
curr.addChild(cdataNode);
curr.addChildAndSetItsParent(cdataNode);
curr = cdataNode;
break;
}
Expand All @@ -181,7 +182,7 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso
case StartPrologOrPI: {
ProcessingInstruction prologOrPINode = xmlDocument.createProcessingInstruction(scanner.getTokenOffset(),
text.length());
curr.addChild(prologOrPINode);
curr.addChildAndSetItsParent(prologOrPINode);
curr = prologOrPINode;
break;
}
Expand Down Expand Up @@ -217,7 +218,7 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso

case StartCommentTag: {
Comment comment = xmlDocument.createComment(scanner.getTokenOffset(), text.length());
curr.addChild(comment);
curr.addChildAndSetItsParent(comment);
curr = comment;
try {
int endLine = document.positionAt(lastClosed.end).getLine();
Expand All @@ -228,7 +229,6 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso
} catch (BadLocationException e) {
LOGGER.log(Level.SEVERE, "XMLParser StartCommentTag bad offset in document", e);
}
// }
break;
}

Expand All @@ -241,15 +241,45 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso

case StartDoctypeTag: {
DocumentType doctype = xmlDocument.createDocumentType(scanner.getTokenOffset(), text.length());
curr.addChild(doctype);
curr.addChildAndSetItsParent(doctype);
doctype.parent = curr;
curr = doctype;
break;
}

case Doctype: {
case DoctypeName: {
DocumentType doctype = (DocumentType) curr;
doctype.startContent = scanner.getTokenOffset();
doctype.endContent = scanner.getTokenEnd();
doctype.setName(scanner.getTokenText());
break;
}

case DocTypeKindPUBLIC: {
DocumentType doctype = (DocumentType) curr;
doctype.setKind(DocumentTypeKind.PUBLIC);
break;
}

case DocTypeKindSYSTEM: {
DocumentType doctype = (DocumentType) curr;
doctype.setKind(DocumentTypeKind.SYSTEM);
break;
}

case DoctypePublicId: {
DocumentType doctype = (DocumentType) curr;
doctype.setPublicId(scanner.getTokenText());
break;
}

case DoctypeSystemId: {
DocumentType doctype = (DocumentType) curr;
doctype.setSystemId(scanner.getTokenText());
break;
}

case InternalDTDContent: {
DocumentType doctype = (DocumentType) curr;
doctype.setInternalDTD(scanner.getTokenText());
break;
}

Expand All @@ -271,14 +301,14 @@ public XMLDocument parse(TextDocument document, URIResolverExtensionManager reso
// FIXME: don't use getTokenText (substring) to know if the content is only
// spaces or line feed (scanner should know that).
String content = scanner.getTokenText();
if (content.trim().length() == 0) {
if (content.trim().length() == 0) { // if string is only whitespaces
break;
}
int start = scanner.getTokenOffset();
int end = scanner.getTokenEnd();
Text textNode = xmlDocument.createText(start, end);
textNode.closed = true;
curr.addChild(textNode);
curr.addChildAndSetItsParent(textNode);
break;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

public class Constants {

public final static int _BNG = "!".codePointAt(0);
public final static int _EXL = "!".codePointAt(0);
public final static int _MIN = "-".codePointAt(0);
public final static int _LAN = "<".codePointAt(0);
public final static int _RAN = ">".codePointAt(0);
Expand Down Expand Up @@ -49,10 +49,14 @@ public class Constants {

public static final Pattern ATTRIBUTE_VALUE_REGEX = Pattern.compile("^[^\\s\"'`=<>\\/]+");

public static final Pattern PROLOG_NAME_OPTIONS = Pattern.compile("^(xml|xml-stylesheet)$");
public static final Pattern URL_VALUE_REGEX = Pattern.compile("^\"[^<>\"]*\"");

public static final Pattern PROLOG_NAME_OPTIONS = Pattern.compile("^(xml|xml-stylesheet)");

public static final Pattern DOCTYPE_KIND_OPTIONS = Pattern.compile("^(PUBLIC|SYSTEM)");

public static final Pattern PI_TAG_NAME = Pattern.compile("^[a-zA-Z0-9]+");

// public static final Pattern DOCTYPE_NAME =
// Pattern.compile("^[_:\\w][_:\\w-.\\d]*");
public static final Pattern DOCTYPE_NAME =
Pattern.compile("^[_:\\w][_:\\w-.\\d]*");
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ public int peekChar() {
return peekChar(0);
}

/**
* Peeks at next char at position + n. peekChar() == peekChar(0)
* @param n
* @return
*/
public int peekChar(int n) {
int pos = this.position + n;
if (pos >= len) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@
public enum ScannerState {
WithinContent, AfterOpeningStartTag, AfterOpeningEndTag, WithinProlog, WithinDoctype, WithinTag, WithinEndTag,
WithinComment, AfterAttributeName, BeforeAttributeValue, WithinCDATA, AfterClosingCDATATag, StartCDATATag, AfterPrologOpen, PrologOrPI,
WithinPI
WithinPI, AfterDoctypeName, AfterDoctypePUBLIC, AfterDoctypeSYSTEM, AfterDoctypePublicId, AfterDoctypeSystemId, AfterInternalDTDStartBracket, WithinDTD, WithinInternalDTD

}
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,21 @@ public enum TokenType {
AttributeName,
AttributeValue,
StartPrologOrPI,
StartDoctypeTag,
PrologName,
PIName,
PIContent,
PIEnd,
PrologEnd,
Doctype,
EndDoctypeTag,
Content,
Whitespace,
Unknown,
EOS
EOS,
StartDoctypeTag,
DoctypeName,
DocTypeKindPUBLIC,
DocTypeKindSYSTEM,
DoctypePublicId,
DoctypeSystemId,
InternalDTDStart,
EndDoctypeTag, InternalDTDEnd, InternalDTDContent
}
Loading

0 comments on commit d550ef1

Please sign in to comment.