From 408da0c17a7c198a2f0de44450902c711b9ef4c6 Mon Sep 17 00:00:00 2001 From: Alexander Kurilo Date: Sun, 23 Oct 2016 22:18:59 +0300 Subject: [PATCH] Make HTML checks more strict --- src/PhpSpreadsheet/Reader/HTML.php | 59 +++++++++++++++++-- tests/PhpSpreadsheetTests/Reader/HTMLTest.php | 19 ++++++ .../Reader/HTML/csv_with_angle_bracket.csv | 1 + 3 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Reader/HTMLTest.php create mode 100644 tests/data/Reader/HTML/csv_with_angle_bracket.csv diff --git a/src/PhpSpreadsheet/Reader/HTML.php b/src/PhpSpreadsheet/Reader/HTML.php index 178d110a..983a3937 100644 --- a/src/PhpSpreadsheet/Reader/HTML.php +++ b/src/PhpSpreadsheet/Reader/HTML.php @@ -33,6 +33,11 @@ use PhpOffice\PhpSpreadsheet\Spreadsheet; /** PhpSpreadsheet root directory */ class HTML extends BaseReader implements IReader { + /** + * Sample size to read to determine if it's HTML or not + */ + const TEST_SAMPLE_SIZE = 2048; + /** * Input encoding * @@ -126,14 +131,56 @@ class HTML extends BaseReader implements IReader */ protected function isValidFormat() { - // Reading 2048 bytes should be enough to validate that the format is HTML - $data = fread($this->fileHandle, 2048); - if ((strpos($data, '<') !== false) && - (strlen($data) !== strlen(strip_tags($data)))) { - return true; + $beginning = $this->readBeginning(); + + if (!self::startsWithTag($beginning)) { + return false; } - return false; + if (!self::containsTags($beginning)) { + return false; + } + + if (!self::endsWithTag($this->readEnding())) { + return false; + } + + return true; + } + + private function readBeginning() + { + fseek($this->fileHandle, 0); + + return fread($this->fileHandle, self::TEST_SAMPLE_SIZE); + } + + private function readEnding() + { + $meta = stream_get_meta_data($this->fileHandle); + $filename = $meta['uri']; + + $size = filesize($filename); + $blockSize = self::TEST_SAMPLE_SIZE; + + fseek($this->fileHandle, $size - $blockSize); + + return fread($this->fileHandle, $blockSize); + } + + private static function startsWithTag($data) + { + return '<' === substr(trim($data), 0, 1); + } + + private static function endsWithTag($data) + { + return '>' === substr(trim($data), -1, 1); + } + + private static function containsTags($data) + { + return strlen($data) !== strlen(strip_tags($data)); } /** diff --git a/tests/PhpSpreadsheetTests/Reader/HTMLTest.php b/tests/PhpSpreadsheetTests/Reader/HTMLTest.php new file mode 100644 index 00000000..96196408 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/HTMLTest.php @@ -0,0 +1,19 @@ +assertFalse($this->getInstance()->canRead($filename)); + } + + private function getInstance() + { + return new HTML(); + } +} diff --git a/tests/data/Reader/HTML/csv_with_angle_bracket.csv b/tests/data/Reader/HTML/csv_with_angle_bracket.csv new file mode 100644 index 00000000..6e1b0abb --- /dev/null +++ b/tests/data/Reader/HTML/csv_with_angle_bracket.csv @@ -0,0 +1 @@ +Collection Name,Number of items with weight <= 50kg