From 660f74d95716e6f17b9479addf90e4a83666bd42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Sun, 27 Aug 2023 21:46:27 +0200 Subject: [PATCH 1/9] 2.0.13 - add `descriptionHtml()` method to `Ebook::class`, which can contains description with html tags if it is available, html is sanitized, original description is still available via `description()` method with plain text - Bugfixes --- README.md | 1 + composer.json | 2 +- src/Ebook.php | 19 +++++++++++++ src/Formats/EbookModule.php | 44 +++++++++++++++++++++++++++++++ src/Formats/Epub/EpubMetadata.php | 17 ++++++------ src/Tools/BookMeta.php | 4 +-- tests/EbookTest.php | 7 +++++ tests/EntityTest.php | 4 +-- 8 files changed, 84 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index e89bd77..fac0116 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,7 @@ $ebook->getTitle(); // string $ebook->getAuthors(); // BookAuthor[] (`name`: string, `role`: string) $ebook->getAuthorMain(); // ?BookAuthor => First BookAuthor (`name`: string, `role`: string) $ebook->getDescription(); // ?string +$ebook->getDescriptionHtml(); // ?string $ebook->getCopyright(); // ?string $ebook->getPublisher(); // ?string $ebook->getIdentifiers(); // BookIdentifier[] (`value`: string, `scheme`: string) diff --git a/composer.json b/composer.json index d0a95bd..ab289f4 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "kiwilan/php-ebook", "description": "PHP package to read metadata and extract covers from eBooks (.epub, .cbz, .cbr, .cb7, .cbt, .pdf) and audiobooks (.mp3, .m4a, .m4b, .flac, .ogg).", - "version": "2.0.12", + "version": "2.0.13", "keywords": [ "php", "ebook", diff --git a/src/Ebook.php b/src/Ebook.php index ca47bc1..6d45df1 100755 --- a/src/Ebook.php +++ b/src/Ebook.php @@ -30,6 +30,8 @@ class Ebook protected ?string $description = null; + protected ?string $descriptionHtml = null; + protected ?string $publisher = null; /** @var BookIdentifier[] */ @@ -216,6 +218,7 @@ private function convertEbook(): self $this->authorMain = $ebook->getAuthorMain(); $this->authors = $ebook->getAuthors(); $this->description = $ebook->getDescription(); + $this->descriptionHtml = $ebook->getDescriptionHtml(); $this->publisher = $ebook->getPublisher(); $this->identifiers = $ebook->getIdentifiers(); $this->publishDate = $ebook->getPublishDate(); @@ -305,6 +308,14 @@ public function getDescription(int $limit = null): ?string return $this->description; } + /** + * Description of the book with HTML sanitized. + */ + public function getDescriptionHtml(): ?string + { + return $this->descriptionHtml; + } + /** * Publisher of the book. */ @@ -601,6 +612,13 @@ public function setDescription(?string $description): self return $this; } + public function setDescriptionHtml(?string $descriptionHtml): self + { + $this->descriptionHtml = $descriptionHtml; + + return $this; + } + public function setPublisher(?string $publisher): self { $this->publisher = $publisher; @@ -705,6 +723,7 @@ public function toArray(): array 'authorMain' => $this->authorMain?->getName(), 'authors' => array_map(fn (BookAuthor $author) => $author->getName(), $this->authors), 'description' => $this->description, + 'descriptionHtml' => $this->descriptionHtml, 'publisher' => $this->publisher, 'identifiers' => array_map(fn (BookIdentifier $identifier) => $identifier->toArray(), $this->identifiers), 'date' => $this->publishDate?->format('Y-m-d H:i:s'), diff --git a/src/Formats/EbookModule.php b/src/Formats/EbookModule.php index b6ef4ca..6e3e886 100644 --- a/src/Formats/EbookModule.php +++ b/src/Formats/EbookModule.php @@ -22,6 +22,50 @@ abstract public function toCounts(): Ebook; abstract public function toArray(): array; + protected function htmlToString(?string $html): ?string + { + if (! $html) { + return null; + } + + $html = strip_tags($html); + $html = $this->formatText($html); + + return $html; + } + + protected function sanitizeHtml(?string $html): ?string + { + if (! $html) { + return null; + } + + $html = strip_tags($html, [ + 'div', + 'p', + 'br', + 'b', + 'i', + 'u', + 'strong', + 'em', + ]); + $html = $this->formatText($html); + + return $html; + } + + private function formatText(string $text): string + { + $text = str_replace("\n", '', $text); + $text = str_replace("\r", '', $text); + $text = str_replace("\t", '', $text); + $text = trim($text); + $text = preg_replace('/\s+/', ' ', $text); + + return $text; + } + public function toJson(): string { return json_encode($this->toArray(), JSON_PRETTY_PRINT); diff --git a/src/Formats/Epub/EpubMetadata.php b/src/Formats/Epub/EpubMetadata.php index 4ea98f9..45e9491 100644 --- a/src/Formats/Epub/EpubMetadata.php +++ b/src/Formats/Epub/EpubMetadata.php @@ -75,9 +75,8 @@ public function toEbook(): Ebook $authors = array_values($this->opf->getDcCreators()); $this->ebook->setAuthors($authors); - if ($this->opf->getDcDescription()) { - $this->ebook->setDescription(strip_tags($this->opf->getDcDescription())); - } + $this->ebook->setDescription($this->htmlToString($this->opf->getDcDescription())); + $this->ebook->setDescriptionHtml($this->sanitizeHtml($this->opf->getDcDescription())); $this->ebook->setCopyright(! empty($this->opf->getDcRights()) ? implode(', ', $this->opf->getDcRights()) : null); $this->ebook->setPublisher($this->opf->getDcPublisher()); $this->ebook->setIdentifiers($this->opf->getDcIdentifiers()); @@ -97,14 +96,14 @@ public function toEbook(): Ebook $rating = null; if (! empty($this->opf->getMeta())) { foreach ($this->opf->getMeta() as $meta) { - if ($meta->name() === 'calibre:series') { - $this->ebook->setSeries($meta->content()); + if ($meta->getName() === 'calibre:series') { + $this->ebook->setSeries($meta->getContent()); } - if ($meta->name() === 'calibre:series_index') { - $this->ebook->setVolume((int) $meta->content()); + if ($meta->getName() === 'calibre:series_index') { + $this->ebook->setVolume((int) $meta->getContent()); } - if ($meta->name() === 'calibre:rating') { - $rating = (float) $meta->content(); + if ($meta->getName() === 'calibre:rating') { + $rating = (float) $meta->getContent(); } } } diff --git a/src/Tools/BookMeta.php b/src/Tools/BookMeta.php index 95dfc68..3bda055 100644 --- a/src/Tools/BookMeta.php +++ b/src/Tools/BookMeta.php @@ -10,12 +10,12 @@ public function __construct( ) { } - public function name(): ?string + public function getName(): ?string { return $this->name; } - public function content(): ?string + public function getContent(): ?string { return $this->content; } diff --git a/tests/EbookTest.php b/tests/EbookTest.php index f1b0a84..1556175 100644 --- a/tests/EbookTest.php +++ b/tests/EbookTest.php @@ -56,3 +56,10 @@ expect($metadata->getAudiobook())->toBeInstanceOf(AudiobookMetadata::class); } })->with([EPUB, CBZ, PDF, AUDIOBOOK]); + +it('can have description with HTML', function (string $path) { + $ebook = Ebook::read($path); + + expect($ebook->getDescription())->toBe('A natural disaster leaves the young girl wandering alone in an unfamiliar and dangerous land until she is found by a woman of the Clan, people very different from her own kind. To them, blond, blue-eyed Ayla looks peculiar and ugly—she is one of the Others, those who have moved into their ancient homeland; but Iza cannot leave the girl to die and takes her with them. Iza and Creb, the old Mog-ur, grow to love her, and as Ayla learns the ways of the Clan and Iza’s way of healing, most come to accept her. But the brutal and proud youth who is destined to become their next leader sees her differences as a threat to his authority. He develops a deep and abiding hatred for the strange girl of the Others who lives in their midst, and is determined to get his revenge.'); + expect($ebook->getDescriptionHtml())->toBe('

A natural disaster leaves the young girl wandering alone in an unfamiliar and dangerous land until she is found by a woman of the Clan, people very different from her own kind. To them, blond, blue-eyed Ayla looks peculiar and ugly—she is one of the Others, those who have moved into their ancient homeland; but Iza cannot leave the girl to die and takes her with them. Iza and Creb, the old Mog-ur, grow to love her, and as Ayla learns the ways of the Clan and Iza’s way of healing, most come to accept her. But the brutal and proud youth who is destined to become their next leader sees her differences as a threat to his authority. He develops a deep and abiding hatred for the strange girl of the Others who lives in their midst, and is determined to get his revenge.

'); +})->with([EPUB]); diff --git a/tests/EntityTest.php b/tests/EntityTest.php index 153274f..c04c7b3 100644 --- a/tests/EntityTest.php +++ b/tests/EntityTest.php @@ -143,8 +143,8 @@ it('can use BookMeta', function (string $name, string $content) { $item = new BookMeta($name, $content); - expect($item->name())->toBe($name); - expect($item->content())->toBe($content); + expect($item->getName())->toBe($name); + expect($item->getContent())->toBe($content); expect($item->toArray())->toBe([ 'name' => $name, 'content' => $content, From 38bd55a74fda1ff274493b290ff4e682961c7510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Sun, 27 Aug 2023 21:55:26 +0200 Subject: [PATCH 2/9] add test for author empty --- tests/EpubOpfTest.php | 20 ++++++++++++++++++++ tests/media/opf-epub2-no-tags.opf | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/EpubOpfTest.php b/tests/EpubOpfTest.php index 7540ba0..93b5546 100644 --- a/tests/EpubOpfTest.php +++ b/tests/EpubOpfTest.php @@ -83,3 +83,23 @@ expect($opf->getCoverPath())->toBeString(); expect($opf->getEpubVersion())->toBeGreaterThanOrEqual(2); }); + +it('can parse epub opf without author', function (string $path) { + $opf = OpfMetadata::make(file_get_contents($path), $path); + ray($opf); + + // expect($opf)->tobeInstanceOf(OpfMetadata::class); + // expect($path)->toBeReadableFile(); + // expect($opf->getDcTitle())->toBeString(); + // expect($opf->getDcCreators())->toBeArray(); + // expect($opf->getDcDescription())->toBeString(); + // expect($opf->getDcContributors())->toBeArray(); + // expect($opf->getDcRights())->toBeArray(); + // expect($opf->getDcPublisher())->toBeString(); + // expect($opf->getDcIdentifiers())->toBeArray(); + // expect($opf->getDcSubject())->toBeArray(); + // expect($opf->getDcLanguage())->toBeString(); + // expect($opf->getMeta())->toBeArray(); + // expect($opf->getCoverPath())->toBeString(); + // expect($opf->getEpubVersion())->toBeGreaterThanOrEqual(2); +})->with([EPUB_OPF_EPUB2_NO_TAGS]); diff --git a/tests/media/opf-epub2-no-tags.opf b/tests/media/opf-epub2-no-tags.opf index 878935b..18203be 100644 --- a/tests/media/opf-epub2-no-tags.opf +++ b/tests/media/opf-epub2-no-tags.opf @@ -4,7 +4,7 @@ xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"> Le clan de l'ours des cavernes - Jean M. Auel + calibre (6.12.0) [https://calibre-ebook.com] <div> <p>Quelque part en Europe, 35 000 ans avant notre ère. Petite fille Cro-Magnon de cinq From 4730505e12630f71ec8260addc5878a48b137300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Mon, 28 Aug 2023 08:29:23 +0200 Subject: [PATCH 3/9] add 6722038 --- src/Formats/Epub/OpfMetadata.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Formats/Epub/OpfMetadata.php b/src/Formats/Epub/OpfMetadata.php index 9a88740..2a59cb0 100644 --- a/src/Formats/Epub/OpfMetadata.php +++ b/src/Formats/Epub/OpfMetadata.php @@ -384,6 +384,10 @@ private function setDcCreators(): array foreach ($core as $item) { $name = XmlReader::parseContent($item); + // if `` + if (is_array($name)) { + continue; + } $attributes = XmlReader::parseAttributes($item); $items[$name] = new BookAuthor( name: $name, From 6c927878e88f66380cab0af606c8abaf11ac6e86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Mon, 28 Aug 2023 09:15:51 +0200 Subject: [PATCH 4/9] 2.0.20 - add `descriptionHtml()` method to `Ebook::class`, which can contains description with html tags if it is available, html is sanitized, original description is still available via `description()` method with plain text - add `getBasename()` method to `Ebook::class`, which returns basename of ebook file, `getFilename()` now return real filename of ebook file - add `isValid(string $path)` static method to `Ebook::class`, which checks if ebook file is valid, thanks to @SergioMendolia: - fix `` empty tag in `opf` file, thanks to @SergioMendolia: - Bugfixes --- README.md | 1 + composer.json | 2 +- src/Ebook.php | 92 ++++++++++++++++++------- tests/EpubOpfTest.php | 20 +----- tests/EpubTest.php | 8 ++- tests/Pest.php | 1 + tests/media/opf-epub2-empty-creator.opf | 81 ++++++++++++++++++++++ tests/media/opf-epub2-no-tags.opf | 2 +- 8 files changed, 163 insertions(+), 44 deletions(-) create mode 100644 tests/media/opf-epub2-empty-creator.opf diff --git a/README.md b/README.md index fac0116..e79d74f 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ $ebook->getMetadata(); // ?EbookMetadata => metadata with parsers $ebook->getMetaTitle(); // ?MetaTitle, with slug and sort properties for `title` and `series` $ebook->getFormat(); // ?EbookFormatEnum => `epub`, `pdf`, `cba` $ebook->getCover(); // ?EbookCover => cover of book +$ebook->getArchive(); // ?BaseArchive => archive of book from `kiwilan/php-archive` ``` And to test if some data exists: diff --git a/composer.json b/composer.json index ab289f4..35fd9ac 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "kiwilan/php-ebook", "description": "PHP package to read metadata and extract covers from eBooks (.epub, .cbz, .cbr, .cb7, .cbt, .pdf) and audiobooks (.mp3, .m4a, .m4b, .flac, .ogg).", - "version": "2.0.13", + "version": "2.0.20", "keywords": [ "php", "ebook", diff --git a/src/Ebook.php b/src/Ebook.php index 6d45df1..bfa65b7 100755 --- a/src/Ebook.php +++ b/src/Ebook.php @@ -68,6 +68,7 @@ class Ebook protected function __construct( protected string $path, protected string $filename, + protected string $basename, protected string $extension, protected ?BaseArchive $archive = null, protected ?Audio $audio = null, @@ -85,7 +86,46 @@ protected function __construct( public static function read(string $path): ?self { $start = microtime(true); - $filename = pathinfo($path, PATHINFO_BASENAME); + $self = self::parseFile($path); + + $format = match ($self->format) { + EbookFormatEnum::EPUB => $self->epub(), + EbookFormatEnum::MOBI => $self->mobi(), + EbookFormatEnum::CBA => $self->cba(), + EbookFormatEnum::PDF => $self->pdf(), + EbookFormatEnum::AUDIOBOOK => $self->audiobook(), + default => null, + }; + + if ($format === null) { + return null; + } + + $self->metaTitle = MetaTitle::make($self); + + $time = microtime(true) - $start; + $self->execTime = (float) number_format((float) $time, 5, '.', ''); + + return $self; + } + + /** + * Check if an ebook file is valid. + */ + public static function isValid(string $path): bool + { + $self = self::parseFile($path); + + return ! $self->isBadFile; + } + + /** + * Parse an ebook file. + */ + private static function parseFile(string $path): Ebook + { + $basename = pathinfo($path, PATHINFO_BASENAME); + $filename = pathinfo($path, PATHINFO_FILENAME); $extension = pathinfo($path, PATHINFO_EXTENSION); $cbaExtensions = ['cbz', 'cbr', 'cb7', 'cbt']; @@ -102,7 +142,7 @@ public static function read(string $path): ?self throw new \Exception("Unknown archive type: {$extension}"); } - $self = new self($path, $filename, $extension); + $self = new self($path, $filename, $basename, $extension); $self->format = match ($extension) { 'epub' => $self->format = EbookFormatEnum::EPUB, @@ -143,24 +183,6 @@ public static function read(string $path): ?self $self->audio = Audio::get($path); } - $format = match ($self->format) { - EbookFormatEnum::EPUB => $self->epub(), - EbookFormatEnum::MOBI => $self->mobi(), - EbookFormatEnum::CBA => $self->cba(), - EbookFormatEnum::PDF => $self->pdf(), - EbookFormatEnum::AUDIOBOOK => $self->audiobook(), - default => null, - }; - - if ($format === null) { - return null; - } - - $self->metaTitle = MetaTitle::make($self); - - $time = microtime(true) - $start; - $self->execTime = (float) number_format((float) $time, 5, '.', ''); - return $self; } @@ -298,6 +320,8 @@ public function getAuthors(): array /** * Description of the book. + * + * @param int|null $limit Limit the length of the description. */ public function getDescription(int $limit = null): ?string { @@ -310,6 +334,8 @@ public function getDescription(int $limit = null): ?string /** * Description of the book with HTML sanitized. + * + * If original description doesn't have HTML, it will be the same as `getDescription()`. */ public function getDescriptionHtml(): ?string { @@ -399,7 +425,7 @@ public function getPath(): string } /** - * Filename of the ebook. + * Filename of the ebook, e.g. `The Clan of the Cave Bear`. */ public function getFilename(): string { @@ -407,7 +433,15 @@ public function getFilename(): string } /** - * Extension of the ebook. + * Basename of the ebook, e.g. `The Clan of the Cave Bear.epub`. + */ + public function getBasename(): string + { + return $this->basename; + } + + /** + * Extension of the ebook, e.g. `epub`. */ public function getExtension(): string { @@ -415,15 +449,24 @@ public function getExtension(): string } /** - * Archive reader. + * Archive reader, from `kiwilan/php-archive`. + * + * @docs https://github.com/kiwilan/php-archive */ public function getArchive(): ?BaseArchive { + // if (! $this->archive) { + // error_log("{$this->path} can't be read as archive."); + // throw new \Exception("{$this->path} can't be read as archive."); + // } + return $this->archive; } /** - * Audio reader. + * Audio reader, from `kiwilan/php-audio`. + * + * @docs https://github.com/kiwilan/php-audio */ public function getAudio(): ?Audio { @@ -735,6 +778,7 @@ public function toArray(): array 'pagesCount' => $this->pagesCount, 'path' => $this->path, 'filename' => $this->filename, + 'basename' => $this->basename, 'extension' => $this->extension, 'format' => $this->format, 'metadata' => $this->metadata?->toArray(), diff --git a/tests/EpubOpfTest.php b/tests/EpubOpfTest.php index 93b5546..0a1d981 100644 --- a/tests/EpubOpfTest.php +++ b/tests/EpubOpfTest.php @@ -84,22 +84,8 @@ expect($opf->getEpubVersion())->toBeGreaterThanOrEqual(2); }); -it('can parse epub opf without author', function (string $path) { +it('can parse epub opf with empty dc:creator', function (string $path) { $opf = OpfMetadata::make(file_get_contents($path), $path); - ray($opf); - // expect($opf)->tobeInstanceOf(OpfMetadata::class); - // expect($path)->toBeReadableFile(); - // expect($opf->getDcTitle())->toBeString(); - // expect($opf->getDcCreators())->toBeArray(); - // expect($opf->getDcDescription())->toBeString(); - // expect($opf->getDcContributors())->toBeArray(); - // expect($opf->getDcRights())->toBeArray(); - // expect($opf->getDcPublisher())->toBeString(); - // expect($opf->getDcIdentifiers())->toBeArray(); - // expect($opf->getDcSubject())->toBeArray(); - // expect($opf->getDcLanguage())->toBeString(); - // expect($opf->getMeta())->toBeArray(); - // expect($opf->getCoverPath())->toBeString(); - // expect($opf->getEpubVersion())->toBeGreaterThanOrEqual(2); -})->with([EPUB_OPF_EPUB2_NO_TAGS]); + expect($opf->getDcCreators())->toBeEmpty(); +})->with([EPUB_OPF_EMPTY_CREATOR]); diff --git a/tests/EpubTest.php b/tests/EpubTest.php index b564957..6e5e23f 100644 --- a/tests/EpubTest.php +++ b/tests/EpubTest.php @@ -10,10 +10,12 @@ it('can parse epub entity', function () { $ebook = Ebook::read(EPUB); $firstAuthor = $ebook->getAuthors()[0]; + $filename = pathinfo(EPUB, PATHINFO_FILENAME); $basename = pathinfo(EPUB, PATHINFO_BASENAME); expect($ebook->getpath())->toBe(EPUB); - expect($ebook->getFilename())->toBe($basename); + expect($ebook->getFilename())->toBe($filename); + expect($ebook->getBasename())->toBe($basename); expect($ebook->hasMetadata())->toBeTrue(); expect($ebook)->toBeInstanceOf(Ebook::class); @@ -47,6 +49,7 @@ $metadata = $ebook->getMetadata(); expect($metadata->toArray())->toBeArray(); expect($metadata->toJson())->toBeString(); + expect(Ebook::isValid(EPUB))->toBeTrue(); }); it('can get epub cover', function () { @@ -160,8 +163,11 @@ it('can handle bad file', function () { $ebook = Ebook::read(EPUB_BAD_FILE); + expect(Ebook::isValid(EPUB_BAD_FILE))->toBeFalse(); expect($ebook->hasMetadata())->toBeFalse(); expect($ebook->isBadFile())->toBeTrue(); + // expect(fn () => $ebook->getArchive())->toThrow(Exception::class); + expect(fn () => $ebook->getArchive()?->filter('opf'))->not()->toThrow(Exception::class); }); it('can handle bad epub', function (string $epub) { diff --git a/tests/Pest.php b/tests/Pest.php index d539909..646198f 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -27,6 +27,7 @@ define('EPUB_OPF_LAGUERREETERNELLE', __DIR__.'/media/opf-la-guerre-eternelle.opf'); define('EPUB_OPF_EPEEETMORT', __DIR__.'/media/opf-content-epee-et-mort.opf'); define('EPUB_OPF_NOT_FORMATTED', __DIR__.'/media/opf-not-formatted.opf'); +define('EPUB_OPF_EMPTY_CREATOR', __DIR__.'/media/opf-epub2-empty-creator.opf'); define('EPUB', __DIR__.'/media/test-epub.epub'); define('EPUB_ONE_TAG', __DIR__.'/media/epub-one-tag.epub'); diff --git a/tests/media/opf-epub2-empty-creator.opf b/tests/media/opf-epub2-empty-creator.opf new file mode 100644 index 0000000..18203be --- /dev/null +++ b/tests/media/opf-epub2-empty-creator.opf @@ -0,0 +1,81 @@ + + + + Le clan de l'ours des cavernes + + calibre (6.12.0) [https://calibre-ebook.com] + <div> + <p>Quelque part en Europe, 35 000 ans avant notre ère. Petite fille Cro-Magnon de cinq + ans, Ayla est séparée de ses parents à la suite d'un violent tremblement de terre. Elle est + recueillie par le clan de l'ours des cavernes, une tribu Neandertal qui l'adopte, non sans + réticence, ayant reconnu en elle la représentante d'une autre espèce, plus évoluée. + <br><br>Iza, la guérisseuse, Brun, le chef et Creb, le magicien lui enseignent les + règles de la vie communautaire, leurs rites, leurs peurs, leurs audaces. Mais Ayla, la + fillette blonde aux yeux bleus les surprend par sa puissance de raisonnement qui lui permet de + s'adapter, de réagir rapidement et de ne pas être totalement dépendante de son environnement. + Une différence qui ne tarde pas à faire d'elle une menace pour tout le clan, et à attiser la + convoitise de Brud, le fils du chef...</p></div> + Presses de la cité + a2cf2f25-4de2-4f77-82cc-0198352b0851 + 1980-01-13T21:00:00+00:00 + Fiction + fr + a2cf2f25-4de2-4f77-82cc-0198352b0851 + 63CTHAAACAAJ + 9782266122122 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/media/opf-epub2-no-tags.opf b/tests/media/opf-epub2-no-tags.opf index 18203be..878935b 100644 --- a/tests/media/opf-epub2-no-tags.opf +++ b/tests/media/opf-epub2-no-tags.opf @@ -4,7 +4,7 @@ xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"> Le clan de l'ours des cavernes - + Jean M. Auel calibre (6.12.0) [https://calibre-ebook.com] <div> <p>Quelque part en Europe, 35 000 ans avant notre ère. Petite fille Cro-Magnon de cinq From 7855f0dbbc14b573c26ef17f6855533e76b9a4ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Mon, 28 Aug 2023 09:18:01 +0200 Subject: [PATCH 5/9] docs --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index e79d74f..a1a7842 100644 --- a/README.md +++ b/README.md @@ -265,6 +265,7 @@ Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed re - [`spatie`](https://github.com/spatie) for `spatie/package-skeleton-php` - [`kiwilan`](https://github.com/kiwilan) for `kiwilan/php-archive`, `kiwilan/php-audio`, `kiwilan/php-xml-reader` +- [All Contributors](../../contributors) ## License From 3effc3e76e0762893d3ddc90caa778ee8dd6ede9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Mon, 28 Aug 2023 09:22:05 +0200 Subject: [PATCH 6/9] ci --- .github/workflows/run-tests.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 026b3aa..f39dd36 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -17,9 +17,10 @@ jobs: steps: - name: Install run: | + sudo apt update sudo apt -y install p7zip-full ghostscript imagemagick - sudo apt-get install -y unrar - sudo apt-get install -y libunrar-dev + sudo apt install -y unrar + sudo apt install -y libunrar-dev sudo sed -i '/disable ghostscript format types/,+6d' /etc/ImageMagick-6/policy.xml shell: bash From 494f114d87224deb6854497ca3a17105084d28f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Mon, 28 Aug 2023 09:27:53 +0200 Subject: [PATCH 7/9] docs --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a1a7842..141ad8d 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,6 @@ composer require kiwilan/php-ebook ## Usage -### Main - With eBook files (`.epub`, `.cbz`, `.cba`, `.cbr`, `.cb7`, `.cbt`, `.pdf`) or audiobook files (`mp3`, `m4a`, `m4b`, `flac`, `ogg`). ```php @@ -143,6 +141,14 @@ $ebook->getExtras(); // array => additional data for book $ebook->getExtra(string $key); // mixed => safely extract data from `extras` array ``` +To know if eBook is valid, you can use `isValid()` static method, before `read()`. + +```php +use Kiwilan\Ebook\Ebook; + +$isValid = Ebook::isValid('path/to/ebook.epub'); +``` + To get additional data, you can use these methods: ```php From 2e9a30dbf79c5327059e1a93aa2e85fcfe649e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Mon, 28 Aug 2023 09:28:47 +0200 Subject: [PATCH 8/9] ci --- .github/workflows/codecov.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index bd22b79..dd5b7a1 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -15,9 +15,10 @@ jobs: steps: - name: Install for Linux run: | + sudo apt update sudo apt -y install p7zip-full ghostscript imagemagick - sudo apt-get install -y unrar - sudo apt-get install -y libunrar-dev + sudo apt install -y unrar + sudo apt install -y libunrar-dev sudo sed -i '/disable ghostscript format types/,+6d' /etc/ImageMagick-6/policy.xml shell: bash From 384cf2345c559fdd74cab0efe3b73dcf77a4975d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ewilan=20Rivi=C3=A8re?= Date: Mon, 28 Aug 2023 09:33:10 +0200 Subject: [PATCH 9/9] add documentation --- src/Ebook.php | 5 ++++- src/Formats/EbookModule.php | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Ebook.php b/src/Ebook.php index bfa65b7..fb345de 100755 --- a/src/Ebook.php +++ b/src/Ebook.php @@ -319,7 +319,10 @@ public function getAuthors(): array } /** - * Description of the book. + * Description of the book, without HTML. + * + * If original description has HTML, all HTML will be removed and text will be trimmed. + * You can use `getDescriptionHtml()` to get the original description sanitized. * * @param int|null $limit Limit the length of the description. */ diff --git a/src/Formats/EbookModule.php b/src/Formats/EbookModule.php index 6e3e886..4fefc15 100644 --- a/src/Formats/EbookModule.php +++ b/src/Formats/EbookModule.php @@ -22,6 +22,9 @@ abstract public function toCounts(): Ebook; abstract public function toArray(): array; + /** + * Convert HTML to string, remove all tags. + */ protected function htmlToString(?string $html): ?string { if (! $html) { @@ -34,6 +37,9 @@ protected function htmlToString(?string $html): ?string return $html; } + /** + * Sanitize HTML, remove all tags except div, p, br, b, i, u, strong, em. + */ protected function sanitizeHtml(?string $html): ?string { if (! $html) { @@ -55,6 +61,9 @@ protected function sanitizeHtml(?string $html): ?string return $html; } + /** + * Clean string, remove tabs, new lines, carriage returns, and multiple spaces. + */ private function formatText(string $text): string { $text = str_replace("\n", '', $text);