diff --git a/docs/changes/1.x/1.2.0.md b/docs/changes/1.x/1.2.0.md index 9900e7b663..30d3b35202 100644 --- a/docs/changes/1.x/1.2.0.md +++ b/docs/changes/1.x/1.2.0.md @@ -31,6 +31,7 @@ - Added Support for Language, both for document overall and individual text elements - Template : Set a checkbox by [@nxtpge](https://github.com/nxtpge) in [#2509](https://github.com/PHPOffice/PHPWord/pull/2509) - ODText / RTF / Word2007 Writer : Add field FILENAME by [@milkyway-git](https://github.com/milkyway-git) in [#2510](https://github.com/PHPOffice/PHPWord/pull/2510) +- ODText Reader : Improve Section Reader by [@oleibman](https://github.com/oleibman) in [#2507](https://github.com/PHPOffice/PHPWord/pull/2507) ### Bug fixes diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index e7918d9174..2e44745b3d 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -165,11 +165,6 @@ parameters: count: 1 path: src/PhpWord/Reader/HTML.php - - - message: "#^Call to an undefined method DOMNode\\:\\:getAttribute\\(\\)\\.$#" - count: 2 - path: src/PhpWord/Reader/ODText/Content.php - - message: "#^Offset 'textNodes' on array\\{changed\\: PhpOffice\\\\PhpWord\\\\Element\\\\TrackChange, textNodes\\: DOMNodeList\\\\} in isset\\(\\) always exists and is not nullable\\.$#" count: 1 diff --git a/src/PhpWord/Element/TextRun.php b/src/PhpWord/Element/TextRun.php index fc8727592a..33c55f6584 100644 --- a/src/PhpWord/Element/TextRun.php +++ b/src/PhpWord/Element/TextRun.php @@ -78,4 +78,16 @@ public function setParagraphStyle($style = null) return $this->paragraphStyle; } + + public function getText(): string + { + $outstr = ''; + foreach ($this->getElements() as $element) { + if ($element instanceof Text) { + $outstr .= $element->getText(); + } + } + + return $outstr; + } } diff --git a/src/PhpWord/Reader/ODText/Content.php b/src/PhpWord/Reader/ODText/Content.php index 45cb0704db..15c76c27b9 100644 --- a/src/PhpWord/Reader/ODText/Content.php +++ b/src/PhpWord/Reader/ODText/Content.php @@ -18,7 +18,10 @@ namespace PhpOffice\PhpWord\Reader\ODText; use DateTime; +use DOMElement; +use DOMNodeList; use PhpOffice\Math\Reader\MathML; +use PhpOffice\PhpWord\Element\Section; use PhpOffice\PhpWord\Element\TrackChange; use PhpOffice\PhpWord\PhpWord; use PhpOffice\PhpWord\Shared\XMLReader; @@ -30,6 +33,9 @@ */ class Content extends AbstractPart { + /** @var ?Section */ + private $section; + /** * Read content.xml. */ @@ -41,17 +47,28 @@ public function read(PhpWord $phpWord): void $trackedChanges = []; $nodes = $xmlReader->getElements('office:body/office:text/*'); + $this->section = null; + $this->processNodes($nodes, $xmlReader, $phpWord); + $this->section = null; + } + + /** @param DOMNodeList $nodes */ + public function processNodes(DOMNodeList $nodes, XMLReader $xmlReader, PhpWord $phpWord): void + { if ($nodes->length > 0) { - $section = $phpWord->addSection(); foreach ($nodes as $node) { // $styleName = $xmlReader->getAttribute('text:style-name', $node); switch ($node->nodeName) { case 'text:h': // Heading $depth = $xmlReader->getAttribute('text:outline-level', $node); - $section->addTitle($node->nodeValue, $depth); + $this->getSection($phpWord)->addTitle($node->nodeValue, $depth); break; case 'text:p': // Paragraph + $styleName = $xmlReader->getAttribute('text:style-name', $node); + if (substr($styleName, 0, 2) === 'SB') { + break; + } $element = $xmlReader->getElement('draw:frame/draw:object', $node); if ($element) { $mathFile = str_replace('./', '', $element->getAttribute('xlink:href')) . '/content.xml'; @@ -65,11 +82,13 @@ public function read(PhpWord $phpWord): void $reader = new MathML(); $math = $reader->read($mathXML); - $section->addFormula($math); + $this->getSection($phpWord)->addFormula($math); } } } else { $children = $node->childNodes; + $spans = false; + /** @var DOMElement $child */ foreach ($children as $child) { switch ($child->nodeName) { case 'text:change-start': @@ -89,16 +108,49 @@ public function read(PhpWord $phpWord): void $changed = $trackedChanges[$changeId]; } + break; + case 'text:span': + $spans = true; + break; } } - $element = $section->addText($node->nodeValue); + if ($spans) { + $element = $this->getSection($phpWord)->addTextRun(); + foreach ($children as $child) { + switch ($child->nodeName) { + case 'text:span': + /** @var DOMElement $child2 */ + foreach ($child->childNodes as $child2) { + switch ($child2->nodeName) { + case '#text': + $element->addText($child2->nodeValue); + + break; + case 'text:tab': + $element->addText("\t"); + + break; + case 'text:s': + $spaces = (int) $child2->getAttribute('text:c') ?: 1; + $element->addText(str_repeat(' ', $spaces)); + + break; + } + } + + break; + } + } + } else { + $element = $this->getSection($phpWord)->addText($node->nodeValue); + } if (isset($changed) && is_array($changed)) { $element->setTrackChange($changed['changed']); if (isset($changed['textNodes'])) { foreach ($changed['textNodes'] as $changedNode) { - $element = $section->addText($changedNode->nodeValue); + $element = $this->getSection($phpWord)->addText($changedNode->nodeValue); $element->setTrackChange($changed['changed']); } } @@ -110,7 +162,7 @@ public function read(PhpWord $phpWord): void $listItems = $xmlReader->getElements('text:list-item/text:p', $node); foreach ($listItems as $listItem) { // $listStyleName = $xmlReader->getAttribute('text:style-name', $listItem); - $section->addListItem($listItem->nodeValue, 0); + $this->getSection($phpWord)->addListItem($listItem->nodeValue, 0); } break; @@ -129,9 +181,26 @@ public function read(PhpWord $phpWord): void $trackedChanges[$changedRegion->getAttribute('text:id')] = ['changed' => $changed, 'textNodes' => $textNodes]; } + break; + case 'text:section': // Section + // $sectionStyleName = $xmlReader->getAttribute('text:style-name', $listItem); + $this->section = $phpWord->addSection(); + $children = $node->childNodes; + $this->processNodes($children, $xmlReader, $phpWord); + break; } } } } + + private function getSection(PhpWord $phpWord): Section + { + $section = $this->section; + if ($section === null) { + $section = $this->section = $phpWord->addSection(); + } + + return $section; + } } diff --git a/tests/PhpWordTests/Reader/ODText/ODTextSectionTest.php b/tests/PhpWordTests/Reader/ODText/ODTextSectionTest.php new file mode 100644 index 0000000000..0a1a4512db --- /dev/null +++ b/tests/PhpWordTests/Reader/ODText/ODTextSectionTest.php @@ -0,0 +1,83 @@ +filename !== '') { + unlink($this->filename); + $this->filename = ''; + } + } + + public function testWriteThenReadSection(): void + { + $dir = 'tests/PhpWordTests/_files'; + Settings::setOutputEscapingEnabled(true); + $phpWord = new PhpWord(); + $section = $phpWord->addSection(); + $inputText = ['days', 'monday', 'tuesday']; + $inputText[] = "Tab\tthen two spaces then done."; + foreach ($inputText as $text) { + $section->addText($text); + } + $writer = IOFactory::createWriter($phpWord, 'ODText'); + $this->filename = "$dir/sectiontest.odt"; + $writer->save($this->filename); + + $reader = IOFactory::createReader('ODText'); + $phpWord2 = $reader->load($this->filename); + $outputText = []; + foreach ($phpWord2->getSections() as $section) { + foreach ($section->getElements() as $element) { + if (is_object($element) && method_exists($element, 'getText')) { + $outputText[] = $element->getText(); + } + } + } + self::assertSame($inputText, $outputText); + } + + public function testReadNoSections(): void + { + $dir = 'tests/PhpWordTests/_files/documents'; + $inputText = ['days', 'monday', 'tuesday']; + + $reader = IOFactory::createReader('ODText'); + $filename = "$dir/word.2493.nosection.odt"; + $phpWord2 = $reader->load($filename); + $outputText = []; + foreach ($phpWord2->getSections() as $section) { + foreach ($section->getElements() as $element) { + if (is_object($element) && method_exists($element, 'getText')) { + $outputText[] = $element->getText(); + } + } + } + self::assertSame($inputText, $outputText); + } +} diff --git a/tests/PhpWordTests/_files/documents/word.2493.nosection.odt b/tests/PhpWordTests/_files/documents/word.2493.nosection.odt new file mode 100644 index 0000000000..eb0fa20764 Binary files /dev/null and b/tests/PhpWordTests/_files/documents/word.2493.nosection.odt differ