diff --git a/jvm/src/test/scala/scala/xml/XMLTest.scala b/jvm/src/test/scala/scala/xml/XMLTest.scala index d5a606d4..b96d2329 100644 --- a/jvm/src/test/scala/scala/xml/XMLTest.scala +++ b/jvm/src/test/scala/scala/xml/XMLTest.scala @@ -1,14 +1,10 @@ package scala.xml import language.postfixOps - import org.junit.{Test => UnitTest} -import org.junit.Assert.assertTrue -import org.junit.Assert.assertFalse -import org.junit.Assert.assertEquals +import org.junit.Assert.{assertEquals, assertFalse, assertTrue} import java.io.StringWriter import java.io.ByteArrayOutputStream -import java.io.StringReader import scala.xml.dtd.{DocType, PublicID} import scala.xml.parsing.ConstructingParser import scala.xml.Utility.sort @@ -610,7 +606,7 @@ class XMLTestJVM { | section]]> suffix""".stripMargin) } - def roundtripNodes(xml: String): Unit = assertEquals(xml, XML.loadStringNodes(xml).map(_.toString).mkString("")) + def roundtripNodes(xml: String): Unit = assertEquals(xml, XML.loadStringDocument(xml).children.map(_.toString).mkString("")) @UnitTest def xmlLoaderLoadNodes(): Unit = { diff --git a/shared/src/main/scala/scala/xml/XML.scala b/shared/src/main/scala/scala/xml/XML.scala index 16a23aa0..1aad3139 100755 --- a/shared/src/main/scala/scala/xml/XML.scala +++ b/shared/src/main/scala/scala/xml/XML.scala @@ -16,7 +16,7 @@ package xml import factory.XMLLoader import java.io.{File, FileDescriptor, FileInputStream, FileOutputStream, InputStream, Reader, StringReader, Writer} import java.nio.channels.Channels -import scala.util.control.Exception.ultimately +import scala.util.control.Exception object Source { def fromFile(name: String): InputSource = fromFile(new File(name)) @@ -25,8 +25,8 @@ object Source { def fromSysId(sysID: String): InputSource = new InputSource(sysID) def fromFile(fd: FileDescriptor): InputSource = fromInputStream(new FileInputStream(fd)) def fromInputStream(is: InputStream): InputSource = new InputSource(is) - def fromReader(reader: Reader): InputSource = new InputSource(reader) def fromString(string: String): InputSource = fromReader(new StringReader(string)) + def fromReader(reader: Reader): InputSource = new InputSource(reader) } /** @@ -68,12 +68,14 @@ object XML extends XMLLoader[Elem] { val encoding: String = "UTF-8" /** Returns an XMLLoader whose load* methods will use the supplied SAXParser. */ - def withSAXParser(p: SAXParser): XMLLoader[Elem] = - new XMLLoader[Elem] { override val parser: SAXParser = p } + def withSAXParser(p: SAXParser): XMLLoader[Elem] = new XMLLoader[Elem] { + override val parser: SAXParser = p + } /** Returns an XMLLoader whose load* methods will use the supplied XMLReader. */ - def withXMLReader(r: XMLReader): XMLLoader[Elem] = - new XMLLoader[Elem] { override val reader: XMLReader = r } + def withXMLReader(r: XMLReader): XMLLoader[Elem] = new XMLLoader[Elem] { + override val reader: XMLReader = r + } /** * Saves a node to a file with given filename using given encoding @@ -94,15 +96,15 @@ object XML extends XMLLoader[Elem] { node: Node, enc: String = "UTF-8", xmlDecl: Boolean = false, - doctype: dtd.DocType = null): Unit = - { - val fos: FileOutputStream = new FileOutputStream(filename) - val w: Writer = Channels.newWriter(fos.getChannel, enc) + doctype: dtd.DocType = null + ): Unit = { + val fos: FileOutputStream = new FileOutputStream(filename) + val w: Writer = Channels.newWriter(fos.getChannel, enc) - ultimately(w.close())( - write(w, node, enc, xmlDecl, doctype) - ) - } + Exception.ultimately(w.close())( + write(w, node, enc, xmlDecl, doctype) + ) + } /** * Writes the given node using writer, optionally with xml decl and doctype. @@ -114,7 +116,14 @@ object XML extends XMLLoader[Elem] { * @param xmlDecl if true, write xml declaration * @param doctype if not null, write doctype declaration */ - final def write(w: java.io.Writer, node: Node, enc: String, xmlDecl: Boolean, doctype: dtd.DocType, minimizeTags: MinimizeMode.Value = MinimizeMode.Default): Unit = { + final def write( + w: Writer, + node: Node, + enc: String, + xmlDecl: Boolean, + doctype: dtd.DocType, + minimizeTags: MinimizeMode.Value = MinimizeMode.Default + ): Unit = { /* TODO: optimize by giving writer parameter to toXML*/ if (xmlDecl) w.write("\n") if (doctype ne null) w.write(doctype.toString + "\n") diff --git a/shared/src/main/scala/scala/xml/factory/XMLLoader.scala b/shared/src/main/scala/scala/xml/factory/XMLLoader.scala index acaf6353..0aa36e69 100644 --- a/shared/src/main/scala/scala/xml/factory/XMLLoader.scala +++ b/shared/src/main/scala/scala/xml/factory/XMLLoader.scala @@ -14,9 +14,9 @@ package scala package xml package factory -import org.xml.sax.{SAXNotRecognizedException, SAXNotSupportedException, XMLReader} +import org.xml.sax.XMLReader +import scala.xml.Source import javax.xml.parsers.SAXParserFactory -import parsing.{FactoryAdapter, NoBindingFactoryAdapter} import java.io.{File, FileDescriptor, InputStream, Reader} import java.net.URL @@ -25,9 +25,6 @@ import java.net.URL * created by "def parser" or the reader created by "def reader". */ trait XMLLoader[T <: Node] { - import scala.xml.Source._ - def adapter: FactoryAdapter = new NoBindingFactoryAdapter() - private def setSafeDefaults(parserFactory: SAXParserFactory): Unit = { parserFactory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true) parserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false) @@ -54,69 +51,49 @@ trait XMLLoader[T <: Node] { def reader: XMLReader = parser.getXMLReader /** - * Loads XML from the given InputSource, using the supplied parser. + * Loads XML from the given InputSource, using the supplied parser or reader. * The methods available in scala.xml.XML use the XML parser in the JDK * (unless another parser is present on the classpath). */ - def loadXML(inputSource: InputSource, parser: SAXParser): T = loadXML(inputSource, parser.getXMLReader) - - def loadXMLNodes(inputSource: InputSource, parser: SAXParser): Seq[Node] = loadXMLNodes(inputSource, parser.getXMLReader) - - private def loadXML(inputSource: InputSource, reader: XMLReader): T = { - val result: FactoryAdapter = parse(inputSource, reader) - result.rootElem.asInstanceOf[T] - } - - private def loadXMLNodes(inputSource: InputSource, reader: XMLReader): Seq[Node] = { - val result: FactoryAdapter = parse(inputSource, reader) - result.prolog ++ (result.rootElem :: result.epilogue) - } - - private def parse(inputSource: InputSource, xmlReader: XMLReader): FactoryAdapter = { - if (inputSource == null) throw new IllegalArgumentException("InputSource cannot be null") - - val result: FactoryAdapter = adapter - - xmlReader.setContentHandler(result) - xmlReader.setDTDHandler(result) - /* Do not overwrite pre-configured EntityResolver. */ - if (xmlReader.getEntityResolver == null) xmlReader.setEntityResolver(result) - /* Do not overwrite pre-configured ErrorHandler. */ - if (xmlReader.getErrorHandler == null) xmlReader.setErrorHandler(result) - - try { - xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", result) - } catch { - case _: SAXNotRecognizedException => - case _: SAXNotSupportedException => - } - - result.scopeStack = TopScope :: result.scopeStack - xmlReader.parse(inputSource) - result.scopeStack = result.scopeStack.tail - - result - } - - /** Loads XML. */ - def load(inputSource: InputSource): T = loadXML(inputSource, reader) - def loadFile(fileName: String): T = load(fromFile(fileName)) - def loadFile(file: File): T = load(fromFile(file)) - def load(url: URL): T = load(fromUrl(url)) - def load(sysId: String): T = load(fromSysId(sysId)) - def loadFile(fileDescriptor: FileDescriptor): T = load(fromFile(fileDescriptor)) - def load(inputStream: InputStream): T = load(fromInputStream(inputStream)) - def load(reader: Reader): T = load(fromReader(reader)) - def loadString(string: String): T = load(fromString(string)) + private def getDocElem(document: Document): T = document.docElem.asInstanceOf[T] + + def loadXML(inputSource: InputSource, parser: SAXParser): T = getDocElem(loadDocument(inputSource, parser)) + def loadXMLNodes(inputSource: InputSource, parser: SAXParser): Seq[Node] = loadDocument(inputSource, parser).children + + private def loadDocument(inputSource: InputSource, parser: SAXParser): Document = adapter.loadDocument(inputSource, parser) + private def loadDocument(inputSource: InputSource, reader: XMLReader): Document = adapter.loadDocument(inputSource, reader) + def adapter: parsing.FactoryAdapter = new parsing.NoBindingFactoryAdapter() + + /** Loads XML Document. */ + def loadDocument(source: InputSource): Document = loadDocument(source, reader) + def loadFileDocument(fileName: String): Document = loadDocument(Source.fromFile(fileName)) + def loadFileDocument(file: File): Document = loadDocument(Source.fromFile(file)) + def loadDocument(url: URL): Document = loadDocument(Source.fromUrl(url)) + def loadDocument(sysId: String): Document = loadDocument(Source.fromSysId(sysId)) + def loadFileDocument(fileDescriptor: FileDescriptor): Document = loadDocument(Source.fromFile(fileDescriptor)) + def loadDocument(inputStream: InputStream): Document = loadDocument(Source.fromInputStream(inputStream)) + def loadDocument(reader: Reader): Document = loadDocument(Source.fromReader(reader)) + def loadStringDocument(string: String): Document = loadDocument(Source.fromString(string)) + + /** Loads XML element. */ + def load(inputSource: InputSource): T = getDocElem(loadDocument(inputSource)) + def loadFile(fileName: String): T = getDocElem(loadFileDocument(fileName)) + def loadFile(file: File): T = getDocElem(loadFileDocument(file)) + def load(url: URL): T = getDocElem(loadDocument(url)) + def load(sysId: String): T = getDocElem(loadDocument(sysId)) + def loadFile(fileDescriptor: FileDescriptor): T = getDocElem(loadFileDocument(fileDescriptor)) + def load(inputStream: InputStream): T = getDocElem(loadDocument(inputStream)) + def load(reader: Reader): T = getDocElem(loadDocument(reader)) + def loadString(string: String): T = getDocElem(loadStringDocument(string)) /** Load XML nodes, including comments and processing instructions that precede and follow the root element. */ - def loadNodes(inputSource: InputSource): Seq[Node] = loadXMLNodes(inputSource, reader) - def loadFileNodes(fileName: String): Seq[Node] = loadNodes(fromFile(fileName)) - def loadFileNodes(file: File): Seq[Node] = loadNodes(fromFile(file)) - def loadNodes(url: URL): Seq[Node] = loadNodes(fromUrl(url)) - def loadNodes(sysId: String): Seq[Node] = loadNodes(fromSysId(sysId)) - def loadFileNodes(fileDescriptor: FileDescriptor): Seq[Node] = loadNodes(fromFile(fileDescriptor)) - def loadNodes(inputStream: InputStream): Seq[Node] = loadNodes(fromInputStream(inputStream)) - def loadNodes(reader: Reader): Seq[Node] = loadNodes(fromReader(reader)) - def loadStringNodes(string: String): Seq[Node] = loadNodes(fromString(string)) + def loadNodes(inputSource: InputSource): Seq[Node] = loadDocument(inputSource).children + def loadFileNodes(fileName: String): Seq[Node] = loadFileDocument(fileName).children + def loadFileNodes(file: File): Seq[Node] = loadFileDocument(file).children + def loadNodes(url: URL): Seq[Node] = loadDocument(url).children + def loadNodes(sysId: String): Seq[Node] = loadDocument(sysId).children + def loadFileNodes(fileDescriptor: FileDescriptor): Seq[Node] = loadFileDocument(fileDescriptor).children + def loadNodes(inputStream: InputStream): Seq[Node] = loadDocument(inputStream).children + def loadNodes(reader: Reader): Seq[Node] = loadDocument(reader).children + def loadStringNodes(string: String): Seq[Node] = loadStringDocument(string).children } diff --git a/shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala b/shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala index d3103346..536d269f 100644 --- a/shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala +++ b/shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala @@ -15,7 +15,7 @@ package xml package parsing import scala.collection.Seq -import org.xml.sax.Attributes +import org.xml.sax.{Attributes, SAXNotRecognizedException, SAXNotSupportedException} import org.xml.sax.ext.DefaultHandler2 // can be mixed into FactoryAdapter if desired @@ -37,9 +37,15 @@ trait ConsoleErrorHandler extends DefaultHandler2 { /** * SAX adapter class, for use with Java SAX parser. Keeps track of * namespace bindings, without relying on namespace handling of the - * underlying SAX parser. + * underlying SAX parser (but processing the parser's namespace-related events if it is namespace-aware). */ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] { + val normalizeWhitespace: Boolean = false + + private var document: Option[Document] = None + + private var prefixMappings: List[(String, String)] = List.empty + var prolog: List[Node] = List.empty var rootElem: Node = _ var epilogue: List[Node] = List.empty @@ -79,6 +85,52 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod var curTag: String = _ var capture: Boolean = false + /** + * Captures text or cdata. + */ + def captureText(): Unit = { + if (capture && buffer.nonEmpty) { + val text: String = buffer.toString + val newNode: Node = if (inCDATA) createPCData(text) else createText(text) + hStack ::= newNode + } + + buffer.clear() + inCDATA = false + } + + /** + * Load XML document from the source using the parser. + */ + def loadDocument(source: InputSource, parser: SAXParser): Document = + loadDocument(source, parser.getXMLReader) + + /** + * Load XML document from the source using the reader. + */ + def loadDocument(source: InputSource, xmlReader: XMLReader): Document = { + if (source == null) throw new IllegalArgumentException("InputSource cannot be null") + + xmlReader.setContentHandler(this) + xmlReader.setDTDHandler(this) + /* Do not overwrite pre-configured EntityResolver. */ + if (xmlReader.getEntityResolver == null) xmlReader.setEntityResolver(this) + /* Do not overwrite pre-configured ErrorHandler. */ + if (xmlReader.getErrorHandler == null) xmlReader.setErrorHandler(this) + + /* Use LexicalHandler if it is supported by the xmlReader. */ + try { + xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", this) + } catch { + case _: SAXNotRecognizedException => + case _: SAXNotSupportedException => + } + + xmlReader.parse(source) + + document.get + } + // abstract methods /** @@ -121,121 +173,100 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod */ def createComment(characters: String): Seq[Comment] - // - // ContentHandler methods - // - - val normalizeWhitespace: Boolean = false + /* ContentHandler methods */ - /** - * Capture characters, possibly normalizing whitespace. - * @param ch - * @param offset - * @param length - */ - override def characters(ch: Array[Char], offset: Int, length: Int): Unit = { - if (!capture) () - // compliant: report every character - else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length) - // normalizing whitespace is not compliant, but useful - else { - var it: Iterator[Char] = ch.slice(offset, offset + length).iterator - while (it.hasNext) { - val c: Char = it.next() - val isSpace: Boolean = c.isWhitespace - buffer append (if (isSpace) ' ' else c) - if (isSpace) - it = it dropWhile (_.isWhitespace) - } - } + override def startDocument(): Unit = { + scopeStack ::= TopScope // TODO remove } - /** - * Start of a CDATA section. - */ - override def startCDATA(): Unit = { - captureText() - inCDATA = true + override def endDocument(): Unit = { + // capture the epilogue at the end of the document + epilogue = hStack.init.reverse + + val document = new Document + this.document = Some(document) + document.children = prolog ++ rootElem ++ epilogue + document.docElem = rootElem + document.dtd = null + document.baseURI = null + document.encoding = None + document.standAlone = None + document.version = None + + // Note: resetting to the freshly-created state; needed only if this instance is reused, which we do not do... + hStack = hStack.last :: Nil // TODO List.empty + scopeStack = scopeStack.tail // TODO List.empty + + rootElem = null + prolog = List.empty + epilogue = List.empty + + buffer.clear() + inCDATA = false + capture = false + curTag = null + + attribStack = List.empty + tagStack = List.empty } - /** - * End of a CDATA section. - */ - override def endCDATA(): Unit = captureText() + override def startPrefixMapping(prefix: String, uri: String): Unit = + prefixMappings ::= (prefix, uri) - /* ContentHandler methods */ + override def endPrefixMapping(prefix: String): Unit = () /* Start element. */ override def startElement( uri: String, _localName: String, qname: String, - attributes: Attributes): Unit = - { - captureText() - - // capture the prolog at the start of the root element - if (tagStack.isEmpty) { - prolog = hStack.reverse - hStack = List.empty - } - - tagStack = curTag :: tagStack - curTag = qname - - val localName: String = Utility.splitName(qname)._2 - capture = nodeContainsText(localName) - - hStack = null :: hStack - var m: MetaData = Null - var scpe: NamespaceBinding = - if (scopeStack.isEmpty) TopScope - else scopeStack.head - - for (i <- (0 until attributes.getLength).reverse) { - val qname: String = attributes getQName i - val value: String = attributes getValue i - val (pre: Option[String], key: String) = Utility.splitName(qname) - def nullIfEmpty(s: String): String = if (s == "") null else s - - if (pre.contains("xmlns") || (pre.isEmpty && qname == "xmlns")) { - val arg: String = if (pre.isEmpty) null else key - scpe = NamespaceBinding(arg, nullIfEmpty(value), scpe) - } else - m = Attribute(pre, key, Text(value), m) - } - - // Add namespace bindings for the prefix mappings declared by this element - // (if there are any, the parser is namespace-aware, and no namespace bindings were delivered as attributes). - // All `startPrefixMapping()` events will occur immediately before the corresponding `startElement()` event. - for ((prefix: String, uri: String) <- prefixMappings) - scpe = NamespaceBinding(if (prefix.isEmpty) null else prefix, uri, scpe) - - // Once the `prefixMappings` are processed into `scpe`, the list is emptied out - // so that already-declared namespaces are not re-declared on the nested elements. - prefixMappings = List.empty + attributes: Attributes + ): Unit = { + captureText() - scopeStack = scpe :: scopeStack - attribStack = m :: attribStack + // capture the prolog at the start of the root element + if (tagStack.isEmpty) { + prolog = hStack.reverse + hStack = List.empty } - private var prefixMappings: List[(String, String)] = List.empty + tagStack ::= curTag + curTag = qname + + val localName: String = Utility.splitName(qname)._2 + capture = nodeContainsText(localName) + + hStack ::= null + var m: MetaData = Null + var scpe: NamespaceBinding = + if (scopeStack.isEmpty) TopScope + else scopeStack.head + + for (i <- (0 until attributes.getLength).reverse) { + val qname: String = attributes getQName i + val value: String = attributes getValue i + val (pre: Option[String], key: String) = Utility.splitName(qname) + def nullIfEmpty(s: String): String = if (s == "") null else s + + if (pre.contains("xmlns") || (pre.isEmpty && qname == "xmlns")) { + val arg: String = if (pre.isEmpty) null else key + scpe = NamespaceBinding(arg, nullIfEmpty(value), scpe) + } else + m = Attribute(pre, key, Text(value), m) + } - override def startPrefixMapping(prefix: String, uri: String): Unit = - prefixMappings = (prefix, uri) :: prefixMappings + // Add namespace bindings for the prefix mappings declared by this element + // (if there are any, the parser is namespace-aware, and no namespace bindings were delivered as attributes). + // All `startPrefixMapping()` events will occur immediately before the corresponding `startElement()` event. + for ((prefix: String, uri: String) <- prefixMappings) + scpe = NamespaceBinding(if (prefix.isEmpty) null else prefix, uri, scpe) - /** - * Captures text or cdata. - */ - def captureText(): Unit = { - if (capture && buffer.nonEmpty) { - val text: String = buffer.toString - val newNode: Node = if (inCDATA) createPCData(text) else createText(text) - hStack = newNode :: hStack - } + // Once the `prefixMappings` are processed into `scpe`, the list is emptied out + // so that already-declared namespaces are not re-declared on the nested elements. + prefixMappings = List.empty - buffer.clear() - inCDATA = false + scopeStack ::= scpe + attribStack ::= m } /** @@ -262,18 +293,35 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod // create element rootElem = createNode(pre.orNull, localName, metaData, scp, v) - hStack = rootElem :: hStack + hStack ::= rootElem curTag = tagStack.head tagStack = tagStack.tail capture = curTag != null && nodeContainsText(curTag) // root level } - override def endDocument(): Unit = { - // capture the epilogue at the end of the document - epilogue = hStack.init.reverse - hStack = hStack.last :: Nil + /** + * Capture characters, possibly normalizing whitespace. + * + * @param ch + * @param offset + * @param length + */ + override def characters(ch: Array[Char], offset: Int, length: Int): Unit = { + if (!capture) () + // compliant: report every character + else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length) + // normalizing whitespace is not compliant, but useful + else { + var it: Iterator[Char] = ch.slice(offset, offset + length).iterator + while (it.hasNext) { + val c: Char = it.next() + val isSpace: Boolean = c.isWhitespace + buffer append (if (isSpace) ' ' else c) + if (isSpace) + it = it dropWhile (_.isWhitespace) + } + } } - /** * Processing instruction. */ @@ -282,6 +330,19 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod hStack = hStack.reverse_:::(createProcInstr(target, data).toList) } + /** + * Start of a CDATA section. + */ + override def startCDATA(): Unit = { + captureText() + inCDATA = true + } + + /** + * End of a CDATA section. + */ + override def endCDATA(): Unit = captureText() + /** * Comment. */