Last active
December 15, 2021 13:49
-
-
Save artoodetoo/5e73ce8e9e81fdae36b7258a1d3f7303 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class JpegSectionIterator implements \Iterator | |
{ | |
const | |
UNKNOWN = 'unknown', | |
SOI = 'SOI', // Start Of Image | |
SOF0 = 'SOF0', // Start Of Frame (baseline DCT) | |
SOF1 = 'SOF1', // Start Of Frame (extended DCT) | |
SOF2 = 'SOF2', // Start Of Frame (progressive DCT) | |
DHT = 'DHT', // Define Huffman Table(s) | |
DQT = 'DQT', // Define Quantization Table(s) | |
DRI = 'DRI', // Define Restart Interval | |
SOS = 'SOS', // Start Of Scan | |
RSTn = 'RSTn', // Restart | |
APPn = 'APPn', // Application-specific, e.g. an Exif JPEG file uses an APP1 | |
COM = 'COM', // Comment | |
EOI = 'EOI' // End Of Image | |
; | |
private string $content; | |
private int $contentLength; | |
private int $offset; | |
public function __construct(string $content) | |
{ | |
if (($this->contentLength = strlen($content))< 4) { | |
throw new \InvalidArgumentException('Wrong JPEG'); | |
} | |
$this->content = $content; | |
$this->offset = 0; | |
} | |
public function current() | |
{ | |
return $this->detectCurrentSection(); | |
} | |
public function key() | |
{ | |
return $this->offset; | |
} | |
public function next() | |
{ | |
$marker = $this->detectCurrentSection(); | |
$this->offset += $marker['length']; | |
return $this->offset; | |
} | |
public function rewind() | |
{ | |
$this->offset = 0; | |
} | |
public function valid() | |
{ | |
return isset($this->content[$this->offset]); | |
} | |
public function currentSection() | |
{ | |
$marker = $this->detectCurrentSection(); | |
return substr($this->content, $this->offset, $marker['length']); | |
} | |
private function detectCurrentSection() | |
{ | |
$word = unpack('ni', substr($this->content, $this->offset, 2))['i']; | |
$name = $this->sectionName($word); | |
switch ($name) { | |
// markers followed by payload length | |
case self::SOF0: | |
case self::SOF1: | |
case self::SOF2: | |
case self::DHT: | |
case self::DQT: | |
case self::APPn: | |
case self::COM: | |
$length = unpack('ni', substr($this->content, $this->offset + 2, 2))['i'] + 2; | |
break; | |
case self::DRI: | |
$length = 6; | |
break; | |
// marker followed by payload length, then number of image components in scan and scan itself | |
case self::SOS: | |
$offset = $this->offset + unpack('ni', substr($this->content, $this->offset + 2, 2))['i'] + 2 + 1; | |
while (true) { | |
if (($offset = strpos($this->content, "\xff", $offset)) === false) { | |
$offset = $this->contentLength; | |
break; | |
} | |
$testWord = unpack('ni', substr($this->content, $offset, 2))['i']; | |
// any marker except internals (like RSTn) | |
if (!($testWord == 0xFF00 || ($testWord >= 0xFFD0 && $testWord <= 0xFFD7))) { | |
break; | |
} | |
$offset += 2; | |
} | |
$length = $offset - $this->offset; | |
break; | |
// markers without payload | |
default: | |
$length = 2; | |
} | |
return compact('name', 'length', 'word'); | |
} | |
private function sectionName(int $word) | |
{ | |
$name = self::UNKNOWN; | |
if ($word == 0xFFD8) { | |
$name = self::SOI; | |
} elseif ($word == 0xFFC0) { | |
$name = self::SOF0; | |
} elseif ($word == 0xFFC1) { | |
$name = self::SOF1; | |
} elseif ($word == 0xFFC2) { | |
$name = self::SOF2; | |
} elseif ($word == 0xFFC4) { | |
$name = self::DHT; | |
} elseif ($word == 0xFFDB) { | |
$name = self::DQT; | |
} elseif ($word == 0xFFDD) { | |
$name = self::DRI; | |
} elseif ($word == 0xFFDA) { | |
$name = self::SOS; | |
} elseif (($word & 0xFFD0) == 0xFFD0 && (($word & 0x000F) <= 7)) { | |
$name = self::RSTn; | |
} elseif (($word & 0xFFE0) == 0xFFE0) { | |
$name = self::APPn; | |
} elseif ($word == 0xFFFE) { | |
$name = self::COM; | |
} elseif ($word == 0xFFD9) { | |
$name = self::EOI; | |
} | |
return $name; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use JpegSectionIterator; | |
function stripExif($filename) | |
{ | |
if (finfo_file(finfo_open(FILEINFO_MIME_TYPE), $filename) == 'image/jpeg') { | |
$content = ''; | |
$changed = false; | |
$iterator = new JpegSectionIterator(file_get_contents($filename)); | |
foreach ($iterator as $section) { | |
// Skip application-specific sections (like Exif) and comments | |
if (in_array($section['name'], [JpegSectionIterator::APPn, JpegSectionIterator::COM])) { | |
$changed = true; | |
continue; | |
} | |
$content .= $iterator->currentSection(); | |
} | |
if ($changed) { | |
file_put_contents($filename, $content); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ php artisan tinker | |
Psy Shell v0.10.9 (PHP 7.4.25 — cli) by Justin Hileman | |
>>> $iter = new JpegSectionIterator(file_get_contents('~/240666275_4734447076631485_5341300544913327001_n.jpg')) | |
=> JpegSectionIterator {#4179} | |
>>> foreach($iter as $key => $value) { echo sprintf("%08X %s %04X\n", $key, $value['name'], $value['word']); } | |
00000000 SOI FFD8 | |
00000002 APPn FFE0 | |
00000014 APPn FFED | |
00000092 DQT FFDB | |
000000D7 DQT FFDB | |
0000011C SOF2 FFC2 | |
0000012F DHT FFC4 | |
0000014D DHT FFC4 | |
00000169 SOS FFDA | |
00001B28 DHT FFC4 | |
00001B5D SOS FFDA | |
00004B99 DHT FFC4 | |
00004BC4 SOS FFDA | |
00004EA2 DHT FFC4 | |
00004ECD SOS FFDA | |
00005258 DHT FFC4 | |
000052A3 SOS FFDA | |
000091EC DHT FFC4 | |
00009215 SOS FFDA | |
0000E56F SOS FFDA | |
0000EA37 DHT FFC4 | |
0000EA5C SOS FFDA | |
0000EBEE DHT FFC4 | |
0000EC12 SOS FFDA | |
0000EE3E DHT FFC4 | |
0000EE67 SOS FFDA | |
0001588B EOI FFD9 | |
>>> q | |
Exit: Goodbye |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The aim was to strip EXIF information without IMagic dependency. I believe it can help for other purposes as well.
A bit of information to understand: JPEG file consists of sections. There is no "index" to navigate through the file. We can only read file sequentially. Each section starts with a two-bytes FFxx marker, then optional payload length and payload. The file starts with "SOI" and ends with "EOI" marker without payloads.
As for "SOS" section, it contains the graphical data. But the length field does NOT reflect the data length! It is about some header length only. So, we have to traverse data to find the next FFxx section. It is what JpegSectionIterator class is about.
Thanks to JPEG Wiki and SO topics about file parsing.