Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ODText Reader : Improve Section Reader #2507

Merged
merged 1 commit into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/1.x/1.2.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
- Added Support for Language, both for document overall and individual text elements
- Template : Set a checkbox by [@nxtpge](https://github.com/nxtpge) in [#2509](https://github.com/PHPOffice/PHPWord/pull/2509)
- ODText / RTF / Word2007 Writer : Add field FILENAME by [@milkyway-git](https://github.com/milkyway-git) in [#2510](https://github.com/PHPOffice/PHPWord/pull/2510)
- ODText Reader : Improve Section Reader by [@oleibman](https://github.com/oleibman) in [#2507](https://github.com/PHPOffice/PHPWord/pull/2507)

### Bug fixes

Expand Down
5 changes: 0 additions & 5 deletions phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,6 @@ parameters:
count: 1
path: src/PhpWord/Reader/HTML.php

-
message: "#^Call to an undefined method DOMNode\\:\\:getAttribute\\(\\)\\.$#"
count: 2
path: src/PhpWord/Reader/ODText/Content.php

-
message: "#^Offset 'textNodes' on array\\{changed\\: PhpOffice\\\\PhpWord\\\\Element\\\\TrackChange, textNodes\\: DOMNodeList\\<DOMElement\\>\\} in isset\\(\\) always exists and is not nullable\\.$#"
count: 1
Expand Down
12 changes: 12 additions & 0 deletions src/PhpWord/Element/TextRun.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,16 @@ public function setParagraphStyle($style = null)

return $this->paragraphStyle;
}

public function getText(): string
{
$outstr = '';
foreach ($this->getElements() as $element) {
if ($element instanceof Text) {
$outstr .= $element->getText();
}
}

return $outstr;
}
}
81 changes: 75 additions & 6 deletions src/PhpWord/Reader/ODText/Content.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
namespace PhpOffice\PhpWord\Reader\ODText;

use DateTime;
use DOMElement;
use DOMNodeList;
use PhpOffice\Math\Reader\MathML;
use PhpOffice\PhpWord\Element\Section;
use PhpOffice\PhpWord\Element\TrackChange;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Shared\XMLReader;
Expand All @@ -30,6 +33,9 @@
*/
class Content extends AbstractPart
{
/** @var ?Section */
private $section;

/**
* Read content.xml.
*/
Expand All @@ -41,17 +47,28 @@ public function read(PhpWord $phpWord): void
$trackedChanges = [];

$nodes = $xmlReader->getElements('office:body/office:text/*');
$this->section = null;
$this->processNodes($nodes, $xmlReader, $phpWord);
$this->section = null;
}

/** @param DOMNodeList<DOMElement> $nodes */
public function processNodes(DOMNodeList $nodes, XMLReader $xmlReader, PhpWord $phpWord): void
{
if ($nodes->length > 0) {
$section = $phpWord->addSection();
foreach ($nodes as $node) {
// $styleName = $xmlReader->getAttribute('text:style-name', $node);
switch ($node->nodeName) {
case 'text:h': // Heading
$depth = $xmlReader->getAttribute('text:outline-level', $node);
$section->addTitle($node->nodeValue, $depth);
$this->getSection($phpWord)->addTitle($node->nodeValue, $depth);

break;
case 'text:p': // Paragraph
$styleName = $xmlReader->getAttribute('text:style-name', $node);
if (substr($styleName, 0, 2) === 'SB') {
break;
}
$element = $xmlReader->getElement('draw:frame/draw:object', $node);
if ($element) {
$mathFile = str_replace('./', '', $element->getAttribute('xlink:href')) . '/content.xml';
Expand All @@ -65,11 +82,13 @@ public function read(PhpWord $phpWord): void
$reader = new MathML();
$math = $reader->read($mathXML);

$section->addFormula($math);
$this->getSection($phpWord)->addFormula($math);
}
}
} else {
$children = $node->childNodes;
$spans = false;
/** @var DOMElement $child */
foreach ($children as $child) {
switch ($child->nodeName) {
case 'text:change-start':
Expand All @@ -89,16 +108,49 @@ public function read(PhpWord $phpWord): void
$changed = $trackedChanges[$changeId];
}

break;
case 'text:span':
$spans = true;

break;
}
}

$element = $section->addText($node->nodeValue);
if ($spans) {
$element = $this->getSection($phpWord)->addTextRun();
foreach ($children as $child) {
switch ($child->nodeName) {
case 'text:span':
/** @var DOMElement $child2 */
foreach ($child->childNodes as $child2) {
switch ($child2->nodeName) {
case '#text':
$element->addText($child2->nodeValue);

break;
case 'text:tab':
$element->addText("\t");

break;
case 'text:s':
$spaces = (int) $child2->getAttribute('text:c') ?: 1;
$element->addText(str_repeat(' ', $spaces));

break;
}
}

break;
}
}
} else {
$element = $this->getSection($phpWord)->addText($node->nodeValue);
}
if (isset($changed) && is_array($changed)) {
$element->setTrackChange($changed['changed']);
if (isset($changed['textNodes'])) {
foreach ($changed['textNodes'] as $changedNode) {
$element = $section->addText($changedNode->nodeValue);
$element = $this->getSection($phpWord)->addText($changedNode->nodeValue);
$element->setTrackChange($changed['changed']);
}
}
Expand All @@ -110,7 +162,7 @@ public function read(PhpWord $phpWord): void
$listItems = $xmlReader->getElements('text:list-item/text:p', $node);
foreach ($listItems as $listItem) {
// $listStyleName = $xmlReader->getAttribute('text:style-name', $listItem);
$section->addListItem($listItem->nodeValue, 0);
$this->getSection($phpWord)->addListItem($listItem->nodeValue, 0);
}

break;
Expand All @@ -129,9 +181,26 @@ public function read(PhpWord $phpWord): void
$trackedChanges[$changedRegion->getAttribute('text:id')] = ['changed' => $changed, 'textNodes' => $textNodes];
}

break;
case 'text:section': // Section
// $sectionStyleName = $xmlReader->getAttribute('text:style-name', $listItem);
$this->section = $phpWord->addSection();
$children = $node->childNodes;
$this->processNodes($children, $xmlReader, $phpWord);

break;
}
}
}
}

private function getSection(PhpWord $phpWord): Section
{
$section = $this->section;
if ($section === null) {
$section = $this->section = $phpWord->addSection();
}

return $section;
}
}
83 changes: 83 additions & 0 deletions tests/PhpWordTests/Reader/ODText/ODTextSectionTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?php
/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/

namespace PhpOffice\PhpWordTests\Reader\ODText;

use PhpOffice\PhpWord\IOFactory;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Settings;

class ODTextSectionTest extends \PHPUnit\Framework\TestCase
{
/** @var string */
private $filename = '';

protected function tearDown(): void
{
if ($this->filename !== '') {
unlink($this->filename);
$this->filename = '';
}
}

public function testWriteThenReadSection(): void
{
$dir = 'tests/PhpWordTests/_files';
Settings::setOutputEscapingEnabled(true);
$phpWord = new PhpWord();
$section = $phpWord->addSection();
$inputText = ['days', 'monday', 'tuesday'];
$inputText[] = "Tab\tthen two spaces then done.";
foreach ($inputText as $text) {
$section->addText($text);
}
$writer = IOFactory::createWriter($phpWord, 'ODText');
$this->filename = "$dir/sectiontest.odt";
$writer->save($this->filename);

$reader = IOFactory::createReader('ODText');
$phpWord2 = $reader->load($this->filename);
$outputText = [];
foreach ($phpWord2->getSections() as $section) {
foreach ($section->getElements() as $element) {
if (is_object($element) && method_exists($element, 'getText')) {
$outputText[] = $element->getText();
}
}
}
self::assertSame($inputText, $outputText);
}

public function testReadNoSections(): void
{
$dir = 'tests/PhpWordTests/_files/documents';
$inputText = ['days', 'monday', 'tuesday'];

$reader = IOFactory::createReader('ODText');
$filename = "$dir/word.2493.nosection.odt";
$phpWord2 = $reader->load($filename);
$outputText = [];
foreach ($phpWord2->getSections() as $section) {
foreach ($section->getElements() as $element) {
if (is_object($element) && method_exists($element, 'getText')) {
$outputText[] = $element->getText();
}
}
}
self::assertSame($inputText, $outputText);
}
}
Binary file not shown.
Loading