From c938cc35effeafba26489a127b9202fc16b71df0 Mon Sep 17 00:00:00 2001 From: aVadim Date: Sat, 12 Oct 2024 22:19:39 +0300 Subject: [PATCH] fix error: Images inserted to cells in Excel 365 are not detected --- src/FastExcelReader/Excel.php | 152 ++++++++++++++++++++++++++++++++-- src/FastExcelReader/Sheet.php | 109 ++++++++++++++++++++---- tests/FastExcelReaderTest.php | 1 + 3 files changed, 239 insertions(+), 23 deletions(-) diff --git a/src/FastExcelReader/Excel.php b/src/FastExcelReader/Excel.php index ff8d19a..4fcd2f1 100644 --- a/src/FastExcelReader/Excel.php +++ b/src/FastExcelReader/Excel.php @@ -45,6 +45,8 @@ class Excel implements InterfaceBookReader protected array $styles = []; + protected array $valueMetadataImages = []; + /** @var Sheet[] */ protected array $sheets = []; @@ -64,6 +66,9 @@ class Excel implements InterfaceBookReader protected ?array $themeColors = null; + protected int $countImages = -1; // -1 - unknown + + /** * Excel constructor * @@ -176,6 +181,9 @@ protected function _prepare(string $file) if (isset($this->relations['styles'])) { $this->_loadStyles(reset($this->relations['styles'])); } + if (isset($this->relations['sheetMetadata'], $this->relations['richValueRel'])) { + $this->_loadMetadataImages(reset($this->relations['sheetMetadata']), reset($this->relations['richValueRel'])); + } if ($this->sheets) { // set current sheet @@ -332,6 +340,99 @@ protected function _loadStyles(string $innerFile = null) $this->xmlReader->close(); } + /** + * @param string|null $metadataFile + */ + protected function _loadMetadataImages(string $metadataFile, string $richValueRelFile) + { + $this->xmlReader->openZip($metadataFile); + $metadataTypesCount = 0; + $metadataTypes = []; + while ($this->xmlReader->read()) { + if ($this->xmlReader->name === 'metadataType') { + if ($this->xmlReader->nodeType === \XMLReader::ELEMENT) { + $metadataTypesCount++; + if ((string)$this->xmlReader->getAttribute('name') === 'XLRICHVALUE') { + // we need only + $metadataTypes[$metadataTypesCount] = 'XLRICHVALUE'; + } + } + else { + break; + } + } + } + $futureMetadata = []; + while ($this->xmlReader->read()) { + if ($this->xmlReader->name === 'futureMetadata') { + if ($this->xmlReader->nodeType === \XMLReader::ELEMENT && (string)$this->xmlReader->getAttribute('name') === 'XLRICHVALUE') { + while ($this->xmlReader->read()) { + if ($this->xmlReader->name === 'xlrd:rvb') { + $futureMetadata[] = (int)$this->xmlReader->getAttribute('i'); + } + elseif ($this->xmlReader->name === 'futureMetadata' && $this->xmlReader->nodeType === \XMLReader::END_ELEMENT) { + break 2; + } + } + } + elseif ($this->xmlReader->nodeType === \XMLReader::END_ELEMENT) { + break; + } + } + } + + while ($this->xmlReader->read()) { + if ($this->xmlReader->name === 'rc') { + $type = (int)$this->xmlReader->getAttribute('t'); + $value = (int)$this->xmlReader->getAttribute('v'); + if (isset($metadataTypes[$type])) { // metadataType name="XLRICHVALUE" + if (isset($futureMetadata[$value])) { + $this->valueMetadataImages[] = ['i' => $futureMetadata[$value]]; + } + } + } + } + $this->xmlReader->close(); + + $this->xmlReader->openZip($richValueRelFile); + $count = 0; + while ($this->xmlReader->read()) { + if ($this->xmlReader->name === 'rel' && ($rId = $this->xmlReader->getAttribute('r:id'))) { + $this->valueMetadataImages[$count++]['r_id'] = $rId; + } + } + $this->xmlReader->close(); + + $images = []; + $xmlRels = 'xl/richData/_rels/richValueRel.xml.rels'; + $this->xmlReader->openZip($xmlRels); + while ($this->xmlReader->read()) { + if ($this->xmlReader->name === 'Relationship' && $this->xmlReader->nodeType === \XMLReader::ELEMENT && ($Id = (string)$this->xmlReader->getAttribute('Id'))) { + if (substr((string)$this->xmlReader->getAttribute('Type'), -6) === '/image') { + $images[$Id] = (string)$this->xmlReader->getAttribute('Target'); + } + } + } + $this->xmlReader->close(); + + foreach ($this->valueMetadataImages as $index => $metadataImage) { + $rId = $this->valueMetadataImages[$index]['r_id']; + if (isset($images[$rId])) { + $this->valueMetadataImages[$index]['file_name'] = str_replace('../media/', 'xl/media/', $images[$rId]); + } + } + } + + /** + * @param int $vmIndex + * + * @return string|null + */ + public function metadataImage(int $vmIndex): ?string + { + return $this->valueMetadataImages[$vmIndex - 1]['file_name'] ?? null; + } + /** * @param int|null $numFmtId * @param string $pattern @@ -1191,6 +1292,24 @@ public function hasImages(): bool return false; } + /** + * @return array + */ + public function mediaImageFiles(): array + { + $result = []; + if (!empty($this->relations['media'])) { + foreach ($this->relations['media'] as $mediaFile) { + $extension = strtolower(pathinfo($mediaFile, PATHINFO_EXTENSION)); + if (in_array($extension, ['jpg', 'jpeg', 'png', 'bmp', 'ico', 'webp', 'tif', 'tiff', 'gif'])) { + $result[] = basename($mediaFile); + } + } + } + + return $result; + } + /** * Returns the total count of images in the workbook * @@ -1198,14 +1317,16 @@ public function hasImages(): bool */ public function countImages(): int { - $result = 0; - if ($this->hasDrawings()) { - foreach ($this->sheets as $sheet) { - $result += $sheet->countImages(); + if ($this->countImages === -1) { + $this->countImages = 0; + if ($this->hasDrawings() || $this->mediaImageFiles()) { + foreach ($this->sheets as $sheet) { + $this->countImages += $sheet->countImages(); + } } } - return $result; + return $this->countImages; } /** @@ -1216,7 +1337,7 @@ public function countImages(): int public function getImageList(): array { $result = []; - if ($this->hasDrawings()) { + if ($this->countImages()) { foreach ($this->sheets as $sheet) { $result[$sheet->name()] = $sheet->getImageList(); } @@ -1225,6 +1346,25 @@ public function getImageList(): array return $result; } + /** + * @return bool + */ + public function hasExtraImages(): bool + { + $drawingImageFiles = []; + if ($this->hasDrawings()) { + foreach ($this->sheets as $sheet) { + $imageFiles = $sheet->_getDrawingsImageFiles(); + if ($imageFiles) { + $drawingImageFiles += $imageFiles; + } + } + } + $imageFiles = $this->mediaImageFiles(); + + return (count($imageFiles) !== count($drawingImageFiles)); + } + /** * @return array */ diff --git a/src/FastExcelReader/Sheet.php b/src/FastExcelReader/Sheet.php index 34a9543..c5c3982 100644 --- a/src/FastExcelReader/Sheet.php +++ b/src/FastExcelReader/Sheet.php @@ -19,7 +19,7 @@ class Sheet implements InterfaceSheetReader protected string $state = ''; - protected string $path; + protected string $pathInZip; protected ?array $dimension = null; @@ -30,6 +30,8 @@ class Sheet implements InterfaceSheetReader protected array $props = []; + protected array $images = []; + protected ?array $mergedCells = null; /** @var Reader */ @@ -37,8 +39,12 @@ class Sheet implements InterfaceSheetReader protected int $readRowNum = 0; + /** @var mixed */ protected $preReadFunc = null; + + /** @var mixed */ protected $postReadFunc = null; + protected array $readNodeFunc = []; /** @@ -50,6 +56,8 @@ class Sheet implements InterfaceSheetReader protected array $sharedFormulas = []; + protected int $countImages = -1; // -1 - unknown + public function __construct($sheetName, $sheetId, $file, $path, $excel) { @@ -57,7 +65,7 @@ public function __construct($sheetName, $sheetId, $file, $path, $excel) $this->name = $sheetName; $this->sheetId = $sheetId; $this->zipFilename = $file; - $this->path = $path; + $this->pathInZip = $path; $this->area = [ 'row_min' => 1, @@ -257,7 +265,7 @@ public function name(): string */ public function path(): string { - return $this->path; + return $this->pathInZip; } /** @@ -344,7 +352,7 @@ protected function _readHeader() 'range' => '', ]; $xmlReader = $this->getReader(); - $xmlReader->openZip($this->path); + $xmlReader->openZip($this->pathInZip); while ($xmlReader->read()) { if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'dimension') { $range = (string)$xmlReader->getAttribute('ref'); @@ -379,7 +387,7 @@ protected function _readBottom() { if ($this->mergedCells === null) { $xmlReader = $this->getReader(); - $xmlReader->openZip($this->path); + $xmlReader->openZip($this->pathInZip); while ($xmlReader->read()) { if ($xmlReader->nodeType === \XMLReader::END_ELEMENT && $xmlReader->name === 'sheetData') { break; @@ -966,7 +974,7 @@ public function nextRow($columnKeys = [], int $resultMode = null, ?bool $styleId $this->readRowNum = $this->countReadRows = 0; $xmlReader = $this->getReader(); - $xmlReader->openZip($this->path); + $xmlReader->openZip($this->pathInZip); $rowData = $rowTemplate; $rowNum = 0; @@ -1210,11 +1218,26 @@ public function mergedRange(string $cellAddress): ?string */ protected function drawingFilename(): ?string { - $findName = str_replace('/worksheets/sheet', '/drawings/drawing', $this->path); + $findName = str_replace('/worksheets/sheet', '/drawings/drawing', $this->pathInZip); return in_array($findName, $this->excel->innerFileList(), true) ? $findName : null; } + /** + * @param string $cell + * @param string $fileName + * @param string|null $imageName + * + * @return void + */ + protected function addImage(string $cell, string $fileName, ?string $imageName = null) + { + $this->images[$cell] = [ + 'image_name' => $imageName, + 'file_name' => $fileName, + ]; + } + /** * @param $xmlName * @@ -1293,12 +1316,38 @@ protected function extractDrawingInfo($xmlName): array } $result['images'][$addr] = $media; $result['rows'][$media['row']][] = $addr; + $this->addImage($addr, basename($media['target']), $media['name']); } } return $result; } + protected function extractRichValueImages() + { + $xmlReader = $this->getReader(); + $xmlReader->openZip($this->pathInZip); + while ($xmlReader->read()) { + // seek + if ($xmlReader->name === 'sheetData') { + break; + } + } + while ($xmlReader->read()) { + // loop until + if ($xmlReader->name === 'sheetData' && $xmlReader->nodeType === \XMLReader::END_ELEMENT) { + break; + } + if ($xmlReader->name === 'c' && $xmlReader->nodeType === \XMLReader::ELEMENT) { + $vm = (string)$xmlReader->getAttribute('vm'); + $cell = (string)$xmlReader->getAttribute('r'); + if ($vm && ($imageFile = $this->excel->metadataImage($vm))) { + $this->addImage($cell, basename($imageFile)); + } + } + } + } + /** * @return bool */ @@ -1308,9 +1357,29 @@ public function hasDrawings(): bool } /** + * Count images of the sheet + * * @return int */ public function countImages(): int + { + if ($this->countImages === -1) { + $this->_countDrawingsImages(); + if ($this->excel->hasExtraImages()) { + $this->extractRichValueImages(); + } + $this->countImages = count($this->images); + } + + return $this->countImages; + } + + /** + * Count images form drawings of the sheet + * + * @return int + */ + public function _countDrawingsImages(): int { $result = 0; if ($this->hasDrawings()) { @@ -1333,21 +1402,28 @@ public function countImages(): int /** * @return array */ - public function getImageList(): array + public function _getDrawingsImageFiles(): array { $result = []; - if ($this->countImages()) { - foreach ($this->props['drawings']['images'] as $addr => $image) { - $result[$addr] = [ - 'image_name' => $image['name'], - 'file_name' => basename($image['target']), - ]; - } + if ($this->_countDrawingsImages()) { + $result = array_column($this->props['drawings']['images'], 'target'); } return $result; } + /** + * @return array + */ + public function getImageList(): array + { + if ($this->countImages()) { + return $this->images; + } + + return []; + } + /** * @param $row * @@ -1380,8 +1456,7 @@ public function getImageListByRow($row): array public function hasImage(string $cell): bool { if ($this->countImages()) { - - return isset($this->props['drawings']['images'][strtoupper($cell)]); + return isset($this->images[strtoupper($cell)]); } return false; diff --git a/tests/FastExcelReaderTest.php b/tests/FastExcelReaderTest.php index a8702bc..c2518da 100644 --- a/tests/FastExcelReaderTest.php +++ b/tests/FastExcelReaderTest.php @@ -220,6 +220,7 @@ public function testExcelReader03Excel365() $this->assertFalse($excel->sheet()->hasImage('c1')); $this->assertTrue($excel->sheet()->hasImage('c2')); + $this->assertTrue($excel->sheet()->hasImage('C3')); } public function testExcelReader04()