AbstractPart.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. <?php
  2. /**
  3. * This file is part of PHPWord - A pure PHP library for reading and writing
  4. * word processing documents.
  5. *
  6. * PHPWord is free software distributed under the terms of the GNU Lesser
  7. * General Public License version 3 as published by the Free Software Foundation.
  8. *
  9. * For the full copyright and license information, please read the LICENSE
  10. * file that was distributed with this source code. For the full list of
  11. * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
  12. *
  13. * @link https://github.com/PHPOffice/PHPWord
  14. * @copyright 2010-2014 PHPWord contributors
  15. * @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
  16. */
  17. namespace PhpOffice\PhpWord\Reader\Word2007;
  18. use PhpOffice\PhpWord\PhpWord;
  19. use PhpOffice\PhpWord\Shared\XMLReader;
  20. /**
  21. * Abstract part reader
  22. *
  23. * This class is inherited by ODText reader
  24. *
  25. * @since 0.10.0
  26. */
  27. abstract class AbstractPart
  28. {
  29. /**
  30. * Conversion method
  31. *
  32. * @const int
  33. */
  34. const READ_VALUE = 'attributeValue'; // Read attribute value
  35. const READ_EQUAL = 'attributeEquals'; // Read `true` when attribute value equals specified value
  36. const READ_TRUE = 'attributeTrue'; // Read `true` when element exists
  37. const READ_FALSE = 'attributeFalse'; // Read `false` when element exists
  38. const READ_SIZE = 'attributeMultiplyByTwo'; // Read special attribute value for Font::$size
  39. /**
  40. * Document file
  41. *
  42. * @var string
  43. */
  44. protected $docFile;
  45. /**
  46. * XML file
  47. *
  48. * @var string
  49. */
  50. protected $xmlFile;
  51. /**
  52. * Part relationships
  53. *
  54. * @var array
  55. */
  56. protected $rels = array();
  57. /**
  58. * Read part.
  59. */
  60. abstract public function read(PhpWord $phpWord);
  61. /**
  62. * Create new instance
  63. *
  64. * @param string $docFile
  65. * @param string $xmlFile
  66. */
  67. public function __construct($docFile, $xmlFile)
  68. {
  69. $this->docFile = $docFile;
  70. $this->xmlFile = $xmlFile;
  71. }
  72. /**
  73. * Set relationships.
  74. *
  75. * @param array $value
  76. * @return void
  77. */
  78. public function setRels($value)
  79. {
  80. $this->rels = $value;
  81. }
  82. /**
  83. * Read w:p.
  84. *
  85. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  86. * @param \DOMElement $domNode
  87. * @param mixed $parent
  88. * @param string $docPart
  89. * @return void
  90. *
  91. * @todo Get font style for preserve text
  92. */
  93. protected function readParagraph(XMLReader $xmlReader, \DOMElement $domNode, $parent, $docPart = 'document')
  94. {
  95. // Paragraph style
  96. $paragraphStyle = null;
  97. $headingMatches = array();
  98. if ($xmlReader->elementExists('w:pPr', $domNode)) {
  99. $paragraphStyle = $this->readParagraphStyle($xmlReader, $domNode);
  100. if (is_array($paragraphStyle) && isset($paragraphStyle['styleName'])) {
  101. preg_match('/Heading(\d)/', $paragraphStyle['styleName'], $headingMatches);
  102. }
  103. }
  104. // PreserveText
  105. if ($xmlReader->elementExists('w:r/w:instrText', $domNode)) {
  106. $ignoreText = false;
  107. $textContent = '';
  108. $fontStyle = $this->readFontStyle($xmlReader, $domNode);
  109. $nodes = $xmlReader->getElements('w:r', $domNode);
  110. foreach ($nodes as $node) {
  111. $instrText = $xmlReader->getValue('w:instrText', $node);
  112. if ($xmlReader->elementExists('w:fldChar', $node)) {
  113. $fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
  114. if ($fldCharType == 'begin') {
  115. $ignoreText = true;
  116. } elseif ($fldCharType == 'end') {
  117. $ignoreText = false;
  118. }
  119. }
  120. if (!is_null($instrText)) {
  121. $textContent .= '{' . $instrText . '}';
  122. } else {
  123. if ($ignoreText === false) {
  124. $textContent .= $xmlReader->getValue('w:t', $node);
  125. }
  126. }
  127. }
  128. $parent->addPreserveText($textContent, $fontStyle, $paragraphStyle);
  129. // List item
  130. } elseif ($xmlReader->elementExists('w:pPr/w:numPr', $domNode)) {
  131. $textContent = '';
  132. $numId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:numId');
  133. $levelId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:ilvl');
  134. $nodes = $xmlReader->getElements('w:r', $domNode);
  135. foreach ($nodes as $node) {
  136. $textContent .= $xmlReader->getValue('w:t', $node);
  137. }
  138. $parent->addListItem($textContent, $levelId, null, "PHPWordList{$numId}", $paragraphStyle);
  139. // Heading
  140. } elseif (!empty($headingMatches)) {
  141. $textContent = '';
  142. $nodes = $xmlReader->getElements('w:r', $domNode);
  143. foreach ($nodes as $node) {
  144. $textContent .= $xmlReader->getValue('w:t', $node);
  145. }
  146. $parent->addTitle($textContent, $headingMatches[1]);
  147. // Text and TextRun
  148. } else {
  149. $runCount = $xmlReader->countElements('w:r', $domNode);
  150. $linkCount = $xmlReader->countElements('w:hyperlink', $domNode);
  151. $runLinkCount = $runCount + $linkCount;
  152. if ($runLinkCount == 0) {
  153. $parent->addTextBreak(null, $paragraphStyle);
  154. } else {
  155. $nodes = $xmlReader->getElements('*', $domNode);
  156. foreach ($nodes as $node) {
  157. $this->readRun(
  158. $xmlReader,
  159. $node,
  160. ($runLinkCount > 1) ? $parent->addTextRun($paragraphStyle) : $parent,
  161. $docPart,
  162. $paragraphStyle
  163. );
  164. }
  165. }
  166. }
  167. }
  168. /**
  169. * Read w:r.
  170. *
  171. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  172. * @param \DOMElement $domNode
  173. * @param mixed $parent
  174. * @param string $docPart
  175. * @param mixed $paragraphStyle
  176. * @return void
  177. *
  178. * @todo Footnote paragraph style
  179. */
  180. protected function readRun(XMLReader $xmlReader, \DOMElement $domNode, $parent, $docPart, $paragraphStyle = null)
  181. {
  182. if (!in_array($domNode->nodeName, array('w:r', 'w:hyperlink'))) {
  183. return;
  184. }
  185. $fontStyle = $this->readFontStyle($xmlReader, $domNode);
  186. // Link
  187. if ($domNode->nodeName == 'w:hyperlink') {
  188. $rId = $xmlReader->getAttribute('r:id', $domNode);
  189. $textContent = $xmlReader->getValue('w:r/w:t', $domNode);
  190. $target = $this->getMediaTarget($docPart, $rId);
  191. if (!is_null($target)) {
  192. $parent->addLink($target, $textContent, $fontStyle, $paragraphStyle);
  193. }
  194. } else {
  195. // Footnote
  196. if ($xmlReader->elementExists('w:footnoteReference', $domNode)) {
  197. $parent->addFootnote();
  198. // Endnote
  199. } elseif ($xmlReader->elementExists('w:endnoteReference', $domNode)) {
  200. $parent->addEndnote();
  201. // Image
  202. } elseif ($xmlReader->elementExists('w:pict', $domNode)) {
  203. $rId = $xmlReader->getAttribute('r:id', $domNode, 'w:pict/v:shape/v:imagedata');
  204. $target = $this->getMediaTarget($docPart, $rId);
  205. if (!is_null($target)) {
  206. $imageSource = "zip://{$this->docFile}#{$target}";
  207. $parent->addImage($imageSource);
  208. }
  209. // Object
  210. } elseif ($xmlReader->elementExists('w:object', $domNode)) {
  211. $rId = $xmlReader->getAttribute('r:id', $domNode, 'w:object/o:OLEObject');
  212. // $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata');
  213. $target = $this->getMediaTarget($docPart, $rId);
  214. if (!is_null($target)) {
  215. $textContent = "<Object: {$target}>";
  216. $parent->addText($textContent, $fontStyle, $paragraphStyle);
  217. }
  218. // TextRun
  219. } else {
  220. $textContent = $xmlReader->getValue('w:t', $domNode);
  221. $parent->addText($textContent, $fontStyle, $paragraphStyle);
  222. }
  223. }
  224. }
  225. /**
  226. * Read w:tbl.
  227. *
  228. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  229. * @param \DOMElement $domNode
  230. * @param mixed $parent
  231. * @param string $docPart
  232. * @return void
  233. */
  234. protected function readTable(XMLReader $xmlReader, \DOMElement $domNode, $parent, $docPart = 'document')
  235. {
  236. // Table style
  237. $tblStyle = null;
  238. if ($xmlReader->elementExists('w:tblPr', $domNode)) {
  239. $tblStyle = $this->readTableStyle($xmlReader, $domNode);
  240. }
  241. /** @var \PhpOffice\PhpWord\Element\Table $table Type hint */
  242. $table = $parent->addTable($tblStyle);
  243. $tblNodes = $xmlReader->getElements('*', $domNode);
  244. foreach ($tblNodes as $tblNode) {
  245. if ($tblNode->nodeName == 'w:tblGrid') { // Column
  246. // @todo Do something with table columns
  247. } elseif ($tblNode->nodeName == 'w:tr') { // Row
  248. $rowHeight = $xmlReader->getAttribute('w:val', $tblNode, 'w:trPr/w:trHeight');
  249. $rowHRule = $xmlReader->getAttribute('w:hRule', $tblNode, 'w:trPr/w:trHeight');
  250. $rowHRule = $rowHRule == 'exact' ? true : false;
  251. $rowStyle = array(
  252. 'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
  253. 'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
  254. 'exactHeight' => $rowHRule,
  255. );
  256. $row = $table->addRow($rowHeight, $rowStyle);
  257. $rowNodes = $xmlReader->getElements('*', $tblNode);
  258. foreach ($rowNodes as $rowNode) {
  259. if ($rowNode->nodeName == 'w:trPr') { // Row style
  260. // @todo Do something with row style
  261. } elseif ($rowNode->nodeName == 'w:tc') { // Cell
  262. $cellWidth = $xmlReader->getAttribute('w:w', $rowNode, 'w:tcPr/w:tcW');
  263. $cellStyle = null;
  264. $cellStyleNode = $xmlReader->getElement('w:tcPr', $rowNode);
  265. if (!is_null($cellStyleNode)) {
  266. $cellStyle = $this->readCellStyle($xmlReader, $cellStyleNode);
  267. }
  268. $cell = $row->addCell($cellWidth, $cellStyle);
  269. $cellNodes = $xmlReader->getElements('*', $rowNode);
  270. foreach ($cellNodes as $cellNode) {
  271. if ($cellNode->nodeName == 'w:p') { // Paragraph
  272. $this->readParagraph($xmlReader, $cellNode, $cell, $docPart);
  273. }
  274. }
  275. }
  276. }
  277. }
  278. }
  279. }
  280. /**
  281. * Read w:pPr.
  282. *
  283. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  284. * @param \DOMElement $domNode
  285. * @return array|null
  286. */
  287. protected function readParagraphStyle(XMLReader $xmlReader, \DOMElement $domNode)
  288. {
  289. if (!$xmlReader->elementExists('w:pPr', $domNode)) {
  290. return null;
  291. }
  292. $styleNode = $xmlReader->getElement('w:pPr', $domNode);
  293. $styleDefs = array(
  294. 'styleName' => array(self::READ_VALUE, 'w:pStyle'),
  295. 'align' => array(self::READ_VALUE, 'w:jc'),
  296. 'basedOn' => array(self::READ_VALUE, 'w:basedOn'),
  297. 'next' => array(self::READ_VALUE, 'w:next'),
  298. 'indent' => array(self::READ_VALUE, 'w:ind', 'w:left'),
  299. 'hanging' => array(self::READ_VALUE, 'w:ind', 'w:hanging'),
  300. 'spaceAfter' => array(self::READ_VALUE, 'w:spacing', 'w:after'),
  301. 'spaceBefore' => array(self::READ_VALUE, 'w:spacing', 'w:before'),
  302. 'widowControl' => array(self::READ_FALSE, 'w:widowControl'),
  303. 'keepNext' => array(self::READ_TRUE, 'w:keepNext'),
  304. 'keepLines' => array(self::READ_TRUE, 'w:keepLines'),
  305. 'pageBreakBefore' => array(self::READ_TRUE, 'w:pageBreakBefore'),
  306. );
  307. return $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
  308. }
  309. /**
  310. * Read w:rPr
  311. *
  312. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  313. * @param \DOMElement $domNode
  314. * @return array|null
  315. */
  316. protected function readFontStyle(XMLReader $xmlReader, \DOMElement $domNode)
  317. {
  318. if (is_null($domNode)) {
  319. return null;
  320. }
  321. // Hyperlink has an extra w:r child
  322. if ($domNode->nodeName == 'w:hyperlink') {
  323. $domNode = $xmlReader->getElement('w:r', $domNode);
  324. }
  325. if (!$xmlReader->elementExists('w:rPr', $domNode)) {
  326. return null;
  327. }
  328. $styleNode = $xmlReader->getElement('w:rPr', $domNode);
  329. $styleDefs = array(
  330. 'styleName' => array(self::READ_VALUE, 'w:rStyle'),
  331. 'name' => array(self::READ_VALUE, 'w:rFonts', 'w:ascii'),
  332. 'hint' => array(self::READ_VALUE, 'w:rFonts', 'w:hint'),
  333. 'size' => array(self::READ_SIZE, 'w:sz'),
  334. 'color' => array(self::READ_VALUE, 'w:color'),
  335. 'underline' => array(self::READ_VALUE, 'w:u'),
  336. 'bold' => array(self::READ_TRUE, 'w:b'),
  337. 'italic' => array(self::READ_TRUE, 'w:i'),
  338. 'strikethrough' => array(self::READ_TRUE, 'w:strike'),
  339. 'doubleStrikethrough' => array(self::READ_TRUE, 'w:dstrike'),
  340. 'smallCaps' => array(self::READ_TRUE, 'w:smallCaps'),
  341. 'allCaps' => array(self::READ_TRUE, 'w:caps'),
  342. 'superScript' => array(self::READ_EQUAL, 'w:vertAlign', 'w:val', 'superscript'),
  343. 'subScript' => array(self::READ_EQUAL, 'w:vertAlign', 'w:val', 'subscript'),
  344. 'fgColor' => array(self::READ_VALUE, 'w:highlight'),
  345. 'rtl' => array(self::READ_TRUE, 'w:rtl'),
  346. );
  347. return $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
  348. }
  349. /**
  350. * Read w:tblPr
  351. *
  352. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  353. * @param \DOMElement $domNode
  354. * @return string|array|null
  355. * @todo Capture w:tblStylePr w:type="firstRow"
  356. */
  357. protected function readTableStyle(XMLReader $xmlReader, \DOMElement $domNode)
  358. {
  359. $style = null;
  360. $margins = array('top', 'left', 'bottom', 'right');
  361. $borders = $margins + array('insideH', 'insideV');
  362. if ($xmlReader->elementExists('w:tblPr', $domNode)) {
  363. if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) {
  364. $style = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle');
  365. } else {
  366. $styleNode = $xmlReader->getElement('w:tblPr', $domNode);
  367. $styleDefs = array();
  368. // $styleDefs['styleName'] = array(self::READ_VALUE, 'w:tblStyle');
  369. foreach ($margins as $side) {
  370. $ucfSide = ucfirst($side);
  371. $styleDefs["cellMargin$ucfSide"] = array(self::READ_VALUE, "w:tblCellMar/w:$side", 'w:w');
  372. }
  373. foreach ($borders as $side) {
  374. $ucfSide = ucfirst($side);
  375. $styleDefs["border{$ucfSide}Size"] = array(self::READ_VALUE, "w:tblBorders/w:$side", 'w:sz');
  376. $styleDefs["border{$ucfSide}Color"] = array(self::READ_VALUE, "w:tblBorders/w:$side", 'w:color');
  377. }
  378. $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
  379. }
  380. }
  381. return $style;
  382. }
  383. /**
  384. * Read w:tcPr
  385. *
  386. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  387. * @param \DOMElement $domNode
  388. * @return array
  389. */
  390. private function readCellStyle(XMLReader $xmlReader, \DOMElement $domNode)
  391. {
  392. $styleDefs = array(
  393. 'valign' => array(self::READ_VALUE, 'w:vAlign'),
  394. 'textDirection' => array(self::READ_VALUE, 'w:textDirection'),
  395. 'gridSpan' => array(self::READ_VALUE, 'w:gridSpan'),
  396. 'vMerge' => array(self::READ_VALUE, 'w:vMerge'),
  397. 'bgColor' => array(self::READ_VALUE, 'w:shd/w:fill'),
  398. );
  399. return $this->readStyleDefs($xmlReader, $domNode, $styleDefs);
  400. }
  401. /**
  402. * Read style definition
  403. *
  404. * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
  405. * @param \DOMElement $parentNode
  406. * @param array $styleDefs
  407. * @ignoreScrutinizerPatch
  408. * @return array
  409. */
  410. protected function readStyleDefs(XMLReader $xmlReader, \DOMElement $parentNode = null, $styleDefs = array())
  411. {
  412. $styles = array();
  413. foreach ($styleDefs as $styleProp => $styleVal) {
  414. @list($method, $element, $attribute, $expected) = $styleVal;
  415. if ($xmlReader->elementExists($element, $parentNode)) {
  416. $node = $xmlReader->getElement($element, $parentNode);
  417. // Use w:val as default if no attribute assigned
  418. $attribute = ($attribute === null) ? 'w:val' : $attribute;
  419. $attributeValue = $xmlReader->getAttribute($attribute, $node);
  420. $styleValue = $this->readStyleDef($method, $attributeValue, $expected);
  421. if ($styleValue !== null) {
  422. $styles[$styleProp] = $styleValue;
  423. }
  424. }
  425. }
  426. return $styles;
  427. }
  428. /**
  429. * Return style definition based on conversion method
  430. *
  431. * @param string $method
  432. * @ignoreScrutinizerPatch
  433. * @param mixed $attributeValue
  434. * @param mixed $expected
  435. * @return mixed
  436. */
  437. private function readStyleDef($method, $attributeValue, $expected)
  438. {
  439. $style = $attributeValue;
  440. if ($method == self::READ_SIZE) {
  441. $style = $attributeValue / 2;
  442. } elseif ($method == self::READ_TRUE) {
  443. $style = true;
  444. } elseif ($method == self::READ_FALSE) {
  445. $style = false;
  446. } elseif ($method == self::READ_EQUAL) {
  447. $style = $attributeValue == $expected;
  448. }
  449. return $style;
  450. }
  451. /**
  452. * Returns the target of image, object, or link as stored in ::readMainRels
  453. *
  454. * @param string $docPart
  455. * @param string $rId
  456. * @return string|null
  457. */
  458. private function getMediaTarget($docPart, $rId)
  459. {
  460. $target = null;
  461. if (isset($this->rels[$docPart]) && isset($this->rels[$docPart][$rId])) {
  462. $target = $this->rels[$docPart][$rId]['target'];
  463. }
  464. return $target;
  465. }
  466. }