From d912d35b87688ce85a27a91868c33d4c92b55aa1 Mon Sep 17 00:00:00 2001 From: Deon George Date: Tue, 24 Sep 2024 21:55:40 +1000 Subject: [PATCH] Added AVI x-msvideo parsing --- app/Jobs/CatalogScan.php | 8 +- app/Media/Base.php | 24 ++ app/Media/Factory.php | 1 + app/Media/MSVideo.php | 248 +++++++++++++++++++ app/Media/MSVideo/Container.php | 31 +++ app/Media/MSVideo/Containers/Unknown.php | 23 ++ app/Media/MSVideo/Containers/junk.php | 26 ++ app/Media/MSVideo/Containers/rlist.php | 104 ++++++++ app/Media/MSVideo/Containers/rlist/avih.php | 50 ++++ app/Media/MSVideo/Containers/rlist/isft.php | 30 +++ app/Media/MSVideo/Containers/rlist/junk.php | 10 + app/Media/MSVideo/Containers/rlist/movi.php | 59 +++++ app/Media/MSVideo/Containers/rlist/rlist.php | 10 + app/Media/MSVideo/Containers/rlist/strh.php | 61 +++++ app/Media/QuickTime/Atoms/SubAtom.php | 23 -- 15 files changed, 681 insertions(+), 27 deletions(-) create mode 100644 app/Media/MSVideo.php create mode 100644 app/Media/MSVideo/Container.php create mode 100644 app/Media/MSVideo/Containers/Unknown.php create mode 100644 app/Media/MSVideo/Containers/junk.php create mode 100644 app/Media/MSVideo/Containers/rlist.php create mode 100644 app/Media/MSVideo/Containers/rlist/avih.php create mode 100644 app/Media/MSVideo/Containers/rlist/isft.php create mode 100644 app/Media/MSVideo/Containers/rlist/junk.php create mode 100644 app/Media/MSVideo/Containers/rlist/movi.php create mode 100644 app/Media/MSVideo/Containers/rlist/rlist.php create mode 100644 app/Media/MSVideo/Containers/rlist/strh.php diff --git a/app/Jobs/CatalogScan.php b/app/Jobs/CatalogScan.php index 1cbce10..e322e5a 100644 --- a/app/Jobs/CatalogScan.php +++ b/app/Jobs/CatalogScan.php @@ -32,7 +32,7 @@ class CatalogScan implements ShouldQueue, ShouldBeUnique public function middleware(): array { - return [new WithoutOverlapping($this->o::config.'|'.$this->o->id)]; + return [new WithoutOverlapping($this->uniqueId())]; } /** @@ -40,7 +40,7 @@ class CatalogScan implements ShouldQueue, ShouldBeUnique */ public function uniqueId(): string { - return $this->o->id; + return $this->o::config.'|'.$this->o->id; } /** @@ -62,7 +62,7 @@ class CatalogScan implements ShouldQueue, ShouldBeUnique // Check the details are valid if ($this->o->file_signature === $this->o->getObjectOriginal('file_signature')) { // For sanity, we'll check a couple of other attrs - if (($this->o->width != $this->o->getObjectOriginal('width')) || ($this->o->height != $this->o->getObjectOriginal('height'))) { + if (($this->o->width && ($this->o->width != $this->o->getObjectOriginal('width'))) || (($this->o->height && $this->o->height != $this->o->getObjectOriginal('height')))) { Log::alert(sprintf('Dimensions [%s] (%s x %s) mismatch for [%s]', $this->o->dimensions, $this->o->getObjectOriginal('width'), @@ -76,7 +76,7 @@ class CatalogScan implements ShouldQueue, ShouldBeUnique $this->o->file_name(FALSE))); } - } else { + } elseif ($this->o->file_signature) { Log::alert(sprintf('File Signature [%s] doesnt match [%s] for [%s]', $x=$this->o->getObjectOriginal('file_signature'), $this->o->file_signature, diff --git a/app/Media/Base.php b/app/Media/Base.php index 953b1de..bc052b9 100644 --- a/app/Media/Base.php +++ b/app/Media/Base.php @@ -2,6 +2,7 @@ namespace App\Media; +use Illuminate\Support\Collection; use Illuminate\Support\Facades\Log; abstract class Base @@ -16,6 +17,8 @@ abstract class Base private mixed $fh; private int $fp; + protected ?string $unused_data; + public function __construct(string $filename,string $type) { Log::info(sprintf('Create a media type [%s] for [%s]',get_class($this),$filename)); @@ -43,6 +46,27 @@ abstract class Base } } + /** + * Unpack data into our cache + * + * @param string|null $data + * @return Collection + * @throws \Exception + */ + protected function cache(?string $data=NULL): Collection + { + $data = $data ?: $this->data(); + + if (! count($this->cache) && $this->size) { + $this->cache = collect(unpack($this->unpack(),$data)); + + if ($this->size > ($x=$this->unpack_size())) + $this->unused_data = substr($data,$x); + } + + return $this->cache; + } + protected function data(int $size=4096): string { // Quick validation diff --git a/app/Media/Factory.php b/app/Media/Factory.php index 2e45c26..9891339 100644 --- a/app/Media/Factory.php +++ b/app/Media/Factory.php @@ -12,6 +12,7 @@ class Factory { */ public const map = [ 'video/quicktime' => QuickTime::class, + 'video/x-msvideo' => MSVideo::class, ]; /** diff --git a/app/Media/MSVideo.php b/app/Media/MSVideo.php new file mode 100644 index 0000000..f648fc2 --- /dev/null +++ b/app/Media/MSVideo.php @@ -0,0 +1,248 @@ +size = $this->filesize; + $this->offset = 0; + $this->containers = collect(); + + $this->fopen(); + + $format = $this->fread(4); + switch ($format) { + case 'RIFF': // AVI, WAV, etc + case 'SDSS': // SDSS is identical to RIFF, just renamed. Used by SmartSound QuickTracks (www.smartsound.com) + case 'RMP3': // RMP3 is identical to RIFF, just renamed. Used by [unknown program] when creating RIFF-MP3s + $be = FALSE; // Big Endian ints + $data = unpack('Vsize/a4subtype',$this->fread(8)); + + // RMP3 is identical to WAVE, just renamed. Used by [unknown program] when creating RIFF-MP3s + if ($data['subtype'] === 'RMP3') + $data['subtype'] = 'WAVE'; + + // AMV files are RIFF-AVI files with parts of the spec deliberately broken, such as chunk size fields hardcoded to zero (because players known in hardware that these fields are always a certain size + if ($data['subtype'] !== 'AMV ') { + // Handled separately in ParseRIFFAMV() + $this->containers = $this->get_containers(self::container_classes,Unknown::class,$x=$this->ftell(),$this->filesize-$x,$be); + } + + break; + + default: + $data = unpack('Nsize/a4subtype',$this->fread(8)); + dump($data); + throw new \Exception('Cannot handle this RIFF file format yet: '.$format); + } + + $this->fclose(); + } + + public function __get(string $key): mixed + { + switch ($key) { + case 'audio_channels': + return $this->getAudioAtoms() + ->count(); + + case 'audio_codec': + case 'audio_samplerate': + return $this->getAudioAtoms() + ->map(fn($item)=>$item->{$key}) + ->join(','); + + // Signatures are calculated by the sha of the MDAT atom. + case 'signature': + $container = $this->find_containers(movi::class,1); + + return $container?->{$key}; + + // Creation Time is in the MOOV/MVHD atom + case 'creation_date': + return NULL; // I dont think create date is in an AVI file + + case 'duration': + return $this->getVideoAtoms() + ->map(fn($item)=>$item->{$key}) + ->join(','); + + // Height/Width is in the rlist/avih container + case 'height': + case 'width': + $container = $this->find_containers(avih::class,1); + + return $container?->{$key}; + + case 'gps_altitude': + case 'gps_lat': + case 'gps_lon': + return NULL; // No GPFS details in an AVI file? + + case 'make': + case 'model': + return NULL; // Make/Model of camera not in an avi file + + case 'software': + $container = $this->find_containers(isft::class,1); + + return $container?->software; + + case 'time_scale': + $container = $this->find_containers(avih::class,1); + + return $container?->time_scale; + + case 'type': + return parent::__get($key); + + case 'video_codec': + case 'video_framerate': + return $this->getVideoAtoms() + ->map(fn($item)=>$item->{$key}) + ->join(','); + + default: + throw new \Exception('Unknown key: '.$key); + } + } + + protected function get_containers(string $class_prefix,string $unknown,int $offset,int $size,bool $be=TRUE,string $bytes=NULL,string $passthru=NULL,\Closure $callback=NULL): Collection + { + $rp = 0; + if (! $bytes) { + $fh = fopen($this->filename,'r'); + fseek($fh,$offset); + } + + $result = collect(); + + while ($rp < $size) { + $read = $bytes ? substr($bytes,$rp,8) : fread($fh,8); + $rp += 8; + + $header = unpack(sprintf('a4name/%ssize',($be ? 'N' : 'V')),$read); + + if (strlen($header['name'] < 4)) + throw new \Exception(sprintf('Name is less than 4 chars: [%s]',$header['name'])); + + if (($header['size'] === 0) && ($header['name'] !== 'JUNK')) + throw new \Exception(sprintf('Chunk [%s] is unexpectedly zero',$header['name'])); + + // all structures are packed on word boundaries + if (($header['size']%2) != 0) + $header['size']++; + + // We cant have a php function named 'list', so we change it to rlist + if ($header['name'] === 'LIST') + $header['name'] = 'RLIST'; + + // Load our class for this supplier + $class = $class_prefix.$header['name']; + + $data = $bytes + ? substr($bytes,$rp,$header['size']) + : ($header['size'] && ($header['size'] <= self::record_size) ? fread($fh,$header['size']) : NULL); + + if ($header['size'] >= 8) { + $o = class_exists($class) + ? new $class($offset+$rp,$header['size'],$this->filename,$be,$data,$passthru) + : new $unknown($offset+$rp,$header['size'],$this->filename,$header['name'],$be,$data); + + $result->push($o); + + $rp += $header['size']; + + // Only need to seek if we didnt read all the data + if ((! $bytes) && ($header['size'] > self::record_size)) + fseek($fh,$offset+$rp); + + } else { + dd([get_class($this) => $data]); + } + + // Work out if data from the last container next to be passed onto the next one + if ($callback) + $passthru = $callback($o); + } + + if (! $bytes) { + fclose($fh); + unset($fh); + } + + return $result; + } + + /** + * Find all the video track atoms + * + * @return Collection + * @throws \Exception + */ + public function getAudioAtoms(): Collection + { + return $this->find_containers(strh::class) + ->filter(fn($item)=>$item->type==='auds'); + } + + /** + * Find all the video track atoms + * + * @return Collection + * @throws \Exception + */ + public function getVideoAtoms(): Collection + { + return $this->find_containers(strh::class) + ->filter(fn($item)=>$item->type==='vids'); + } + + /** + * Recursively look through our object hierarchy of containers looking for a specific one + * + * @param string $subcontainer + * @param int|NULL $expect + * @param int $depth + * @return Collection|Container|NULL + * @throws \Exception + */ + protected function find_containers(string $subcontainer,?int $expect=NULL,int $depth=100): Collection|Container|NULL + { + if (! isset($this->containers) || ($depth < 0)) + return NULL; + + $subcontainero = $this->containers->filter(fn($item)=>get_class($item)===$subcontainer); + + $subcontainero = $subcontainero + ->merge($this->containers->map(fn($item)=>$item->find_containers($subcontainer,NULL,$depth-1)) + ->filter(fn($item)=>$item ? $item->count() : NULL) + ->flatten()); + + if (! $subcontainero->count()) + return $subcontainero; + + if ($expect && ($subcontainero->count() !== $expect)) + throw new \Exception(sprintf('! Expected %d sub containers of %s, but have %d',$expect,$subcontainer,$subcontainero->count())); + + return ($expect === 1) ? $subcontainero->pop() : $subcontainero; + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Container.php b/app/Media/MSVideo/Container.php new file mode 100644 index 0000000..e6ec7f4 --- /dev/null +++ b/app/Media/MSVideo/Container.php @@ -0,0 +1,31 @@ +offset = $offset; + + // Quick validation + if ($size < 0) + throw new \Exception(sprintf('Container cannot be negative. (%d)',$size)); + + $this->size = $size; + $this->filename = $filename; + $this->cache = collect(); + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Containers/Unknown.php b/app/Media/MSVideo/Containers/Unknown.php new file mode 100644 index 0000000..e1783b2 --- /dev/null +++ b/app/Media/MSVideo/Containers/Unknown.php @@ -0,0 +1,23 @@ +container = $container; + + // For debugging + if (FALSE) + $this->debug = hex_dump($data ?: $this->data()); + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Containers/junk.php b/app/Media/MSVideo/Containers/junk.php new file mode 100644 index 0000000..6b93d41 --- /dev/null +++ b/app/Media/MSVideo/Containers/junk.php @@ -0,0 +1,26 @@ +debug = hex_dump($data ?: $this->data()); + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Containers/rlist.php b/app/Media/MSVideo/Containers/rlist.php new file mode 100644 index 0000000..787045b --- /dev/null +++ b/app/Media/MSVideo/Containers/rlist.php @@ -0,0 +1,104 @@ +containers = $this->parseContainer($data); + + // For debugging + if (FALSE) + $this->debug = hex_dump($data ?: $this->data(min($size,256))); + } + + private function parseContainer(string $bytes=NULL): Collection + { + $this->be = FALSE; // @todo + + $rp = 0; + if (! $bytes) { + $fh = fopen($this->filename,'r'); + fseek($fh,$this->offset); + } + + $result = collect(); + + // Our first container should be hrl + $read = $bytes ? substr($bytes,$rp,4) : fread($fh,4); + $rp += 4; + $header = unpack('a4name',$read); + + switch ($header['name']) { + case 'hdrl': + case 'strl': + case 'INFO': + while ($rp < $this->size) { + $read = $bytes ? substr($bytes,$rp,8) : fread($fh,8); + $rp += 8; + + $header = unpack('a4name/Vsize',$read); + + // We cant have a php function named 'list', so we change it to rlist + if ($header['name'] === 'LIST') + $header['name'] = 'RLIST'; + + $class = self::container_classes.$header['name']; + + $data = $bytes + ? substr($bytes,$rp,$header['size']) + : ($header['size'] && ($header['size'] <= self::record_size) ? fread($fh,$header['size']) : NULL); + + if ($header['size']) { + $o = class_exists($class) + ? new $class($this->offset+$rp,$header['size'],$this->filename,$this->be,$data) + : new Unknown($this->offset+$rp,$header['size'],$this->filename,$header['name'],$this->be,$data); + + $result->push($o); + + $rp += $header['size']; + + // Only need to seek if we didnt read all the data + if ((! $bytes) && ($header['size'] > self::record_size)) + fseek($fh,$this->offset+$rp); + + } else { + dd([get_class($this) => $data,'header'=>$header,'ptr'=>$rp,'size'=>$this->size,'bytes'=>$bytes]); + } + } + + break; + + case 'movi': + $result->push(new movi($this->offset,$this->size,$this->filename,$this->be)); + + break; + + default: + throw new \Exception('Unhandled header type: '.$header['name']); + } + + if (! $bytes) { + fclose($fh); + unset($fh); + } + + return $result; + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Containers/rlist/avih.php b/app/Media/MSVideo/Containers/rlist/avih.php new file mode 100644 index 0000000..d641a91 --- /dev/null +++ b/app/Media/MSVideo/Containers/rlist/avih.php @@ -0,0 +1,50 @@ +['V',4], + 'mdr'=>['V',4], + 'PG'=>['V',4], + 'flags'=>['V',4], + 'frames'=>['V',4], + 'init_frames'=>['V',4], + 'streams'=>['V',4], + 'buffer_size'=>['V',4], + 'width'=>['V',4], + 'height'=>['V',4], + 'time_scale'=>['V',4], + 'data_rate'=>['V',4], + 'start_time'=>['V',4], + 'data_length'=>['V',4], + ]; + + public function __construct(int $offset,int $size,string $filename,bool $be,?string $data) + { + parent::__construct($offset,$size,$filename,$be,$data); + + $this->cache = $this->cache($data); + + // For debugging + if (FALSE) + $this->debug = hex_dump($data ?: $this->data()); + } + + public function __get(string $key): mixed + { + switch ($key) { + case 'height': + case 'width': + return Arr::get($this->cache,$key); + + default: + return parent::__get($key); + } + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Containers/rlist/isft.php b/app/Media/MSVideo/Containers/rlist/isft.php new file mode 100644 index 0000000..8291f7b --- /dev/null +++ b/app/Media/MSVideo/Containers/rlist/isft.php @@ -0,0 +1,30 @@ +cache = collect(['software'=>rtrim($data)]); + + // For debugging + if (FALSE) + $this->debug = hex_dump($data ?: $this->data()); + } + + public function __get(string $key): mixed + { + switch ($key) { + case 'software': + return $this->cache->get('software'); + + default: + return parent::__get($key); + } + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Containers/rlist/junk.php b/app/Media/MSVideo/Containers/rlist/junk.php new file mode 100644 index 0000000..7d3b1c7 --- /dev/null +++ b/app/Media/MSVideo/Containers/rlist/junk.php @@ -0,0 +1,10 @@ +debug = hex_dump($data ?: $this->data()); + } + + public function __get(string $key): mixed + { + switch ($key) { + case 'signature': + return $this->signature(); + + default: + return parent::__get($key); + } + } + + /** + * Calculate the signature of the data + * + * @param string $alg + * @return string + */ + private function signature(string $alg='sha1'): string + { + if (! Arr::has($this->cache,'signature')) { + if ($this->size) { + $this->fopen(); + + $hash = hash_init($alg); + + while (!is_null($read = $this->fread(16384))) + hash_update($hash, $read); + + $this->fclose(); + + $this->cache['signature'] = hash_final($hash); + + } else { + $this->cache['signature'] = NULL; + } + } + + return $this->cache['signature']; + } +} \ No newline at end of file diff --git a/app/Media/MSVideo/Containers/rlist/rlist.php b/app/Media/MSVideo/Containers/rlist/rlist.php new file mode 100644 index 0000000..9e03fe0 --- /dev/null +++ b/app/Media/MSVideo/Containers/rlist/rlist.php @@ -0,0 +1,10 @@ +['a4',4], // FCC type + 'handler'=>['a4',4], // FourCC of codec to be used + 'flags'=>['a4',4], + 'priority'=>['v',2], + 'language'=>['v',2], + 'init_frames'=>['V',4], // Number of the First block of the stream that is present in the file. + 'scale'=>['V',4], + 'rate'=>['V',4], + 'start'=>['V',4], // Start time of stream. + 'length'=>['V',4], // Size of stream in units as defined in dwRate and dwScale + 'buffer_size'=>['V',4], // Size of Buffer necessary to store blocks of that stream. Can be 0 (in that case the application has to guess) + 'quality'=>['V',4], + 'sample_size'=>['V',4], // number of bytes of one stream atom + 'frame'=>['V',4], + ]; + + public function __construct(int $offset,int $size,string $filename,bool $be,?string $data) + { + parent::__construct($offset,$size,$filename,$be,$data); + + $this->cache = $this->cache($data); + + $this->type = Arr::get($this->cache,'type'); + + // For debugging + if (FALSE) + $this->debug = hex_dump($data ?: $this->data()); + } + + public function __get(string $key): mixed + { + switch ($key) { + case 'audio_samplerate': + case 'video_framerate': + return Arr::get($this->cache,'rate') / Arr::get($this->cache,'scale',1); + + case 'audio_codec': + case 'video_codec': + return Arr::get($this->cache,'handler'); + + case 'duration': + return Arr::get($this->cache,'length'); + + default: + return parent::__get($key); + } + } +} \ No newline at end of file diff --git a/app/Media/QuickTime/Atoms/SubAtom.php b/app/Media/QuickTime/Atoms/SubAtom.php index c8f99b2..8315843 100644 --- a/app/Media/QuickTime/Atoms/SubAtom.php +++ b/app/Media/QuickTime/Atoms/SubAtom.php @@ -12,8 +12,6 @@ abstract class SubAtom extends Atom { use ObjectIssetFix; - protected ?string $unused_data; - protected const atom_record = [ 'version'=>['c',1], 'flags'=>['a3',3], @@ -35,25 +33,4 @@ abstract class SubAtom extends Atom throw new \Exception('Unknown key: '.$key); } } - - /** - * Unpack data into our cache - * - * @param string|null $data - * @return Collection - * @throws \Exception - */ - protected function cache(?string $data=NULL): Collection - { - $data = $data ?: $this->data(); - - if (! count($this->cache) && $this->size) { - $this->cache = collect(unpack($this->unpack(),$data)); - - if ($this->size > ($x=$this->unpack_size())) - $this->unused_data = substr($data,$x); - } - - return $this->cache; - } } \ No newline at end of file