Skip to content

Instantly share code, notes, and snippets.

@qvoid
Last active September 27, 2023 01:27
Show Gist options
  • Save qvoid/d4689a74253df704bd843d1377e36024 to your computer and use it in GitHub Desktop.
Save qvoid/d4689a74253df704bd843d1377e36024 to your computer and use it in GitHub Desktop.
parse video or JPEG file under directory using mediainfo and format result to import to Excel
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import subprocess
import sys
import os
from os import listdir
import os.path
from os.path import isfile, join
import time
import re
import json
from enum import Enum
class FILE_TYPE(Enum):
UNKNOWN = 0
FILE = 1
DIR = 2
media_dir = sys.argv[1]
if not media_dir:
sys.exit("Please input directory path to scan!!!\n")
root_dir_str_len = len(media_dir)
def cmd(command):
ret_code = None
ret = False
std_out_data = None
std_err_data = None
subp = subprocess.Popen(command, shell = True, close_fds= True,stdout = subprocess.PIPE, stderr = subprocess.PIPE, encoding = "utf-8")
try:
std_out_data, std_err_data = subp.communicate(timeout=60)
except subprocess.TimeoutExpired:
subp.kill()
std_out_data, std_err_data = subp.communicate()
print("Timeout : ", command)
if subp.poll() == 0:
ret = True
else:
ret = False
return (ret, std_out_data, std_err_data)
# check path is file or directory, also permission
def check_path(path):
if not os.path.exists(path):
print("Not exist : ", path)
return (FILE_TYPE.UNKNOWN, False)
elif os.path.isdir(path):
if not os.access(path, os.X_OK):
print("Could not access directory : ", path)
return (FILE_TYPE.DIR, False)
else:
return (FILE_TYPE.DIR, True)
elif os.path.isfile(path):
if not os.access(path, os.R_OK):
print("Could not read file : ", path)
return (FILE_TYPE.FILE, False)
else:
return (FILE_TYPE.FILE, True)
else:
print("Check failed for : ", path)
return (FILE_TYPE.UNKNOWN, False)
file_type, permission = check_path(media_dir)
if (file_type != FILE_TYPE.DIR or permission != True):
sys.exit("Access {media_dir} failed!!")
INFO_KEYS = [
"@type", # General, Video, Image, Audio
"Format", # HEVC, AVC, JPEG, PNG
"Format_Profile", # Main, Baseline, Main 10
"Format_Level", # 4, 6
"Format_Tier", # Main, High
"CodecID", # hev1
"Duration", # 0.33 (in sec)
"Width", "height",
"Sampled_Width", "Sampled_Height",
"PixelAspectRatio", "DisplayAspectRatio",
"FrameRate", # 30.000
"FrameCount", # 10
"ColorSpace", # RGB, YUV
"ChromaSubsampling", # 4:2:0
"BitDepth", # 8, 10
"ScanType", # Progressive
"colour_description_present", # Yes
"colour_description_present_Source", # Stream
"colour_range", # Limited, Full
"colour_range_Source", # Stream
"colour_primaries", # BT.709, BT.2020
"colour_primaries_Source", # Stream
"transfer_characteristics", # BT.709, "PQ"
"transfer_characteristics_Source", # Stream
"matrix_coefficients", # BT.709, "BT.2020 non-constant"
"matrix_coefficients_Source", # Stream
"MasteringDisplay_ColorPrimaries", # Display P3,
"MasteringDisplay_ColorPrimaries_Source",# Stream
"MasteringDisplay_Luminance", # min: 0.0200 cd/m2, max: 1200 cd/m2
"MasteringDisplay_Luminance_Source", # Stream,
"OverallBitRate",
"BitRate", # 4788391
"ColorSpace_ICC", # RGB
"HDR_Format", # SMPTE ST 2086,
"HDR_Format_Compatibility", # "HDR10"
]
Export_Keys = (
'Path', # file path
# in General part
'Container', # container format, key "Format": MPEG-4, JPEG, etc
'Duration',
'FileSize',
'OverallBitRate',
# in Video or Image part
'Type', # file type: Video, Image
'Format', # codec format: HEVC, AVC, JPEG
'Profile',
'CodecID', # hvc1, avc1
'Level',
'Width',
'Height',
'ScanType',
'BitRate',
'BitRate_Maximum',
'FrameRate_Mode',
'FrameRate',
'FrameCount',
'ColorSpace',
'ChromaSubsampling',
'BitDepth',
'colour_range',
'colour_primaries',
'transfer_characteristics',
'matrix_coefficients',
'MasteringDispaly_ColorPrimaries',
'MasteringDispaly_Luminance',
'ColorSpace_ICC',
'HDR_Format',
'HDR_Format_Compatibility',
)
exclude_files = ('json', 'db', 'rar', 'zip', 'mp3', 'm4a', 'txt', 'link', 'png', 'xls', 'xlsx', 'DS_Store', 'log', 'img', 'bin', 'suo', 'sqlite', 'yuv', 'rgb')
mediainfo_cmd="mediainfo --Output=JSON"
output_fp = open("./scan_res.log", "w")
for item in Export_Keys:
output_fp.write(item)
output_fp.write(' | ')
output_fp.write('\n')
def scan_dir(dst_dir):
for root, dirs, files in os.walk(dst_dir):
for file in files:
full_path = os.path.join(root, file)
file_suffix = full_path.split(".")[-1]
should_skip = False
for suffix in exclude_files:
if file_suffix == suffix:
should_skip = True
break
if should_skip:
print("Skip : ", full_path)
continue
else:
print("Process : ", full_path)
# in case of space in file path
#cmd_ret, cmd_stdout, cmd_stderr = cmd(f'{mediainfo_cmd} {full_path}\n')
cmd_ret, cmd_stdout, cmd_stderr = cmd(str(mediainfo_cmd) + ' ' + '"' + str(full_path) + '"')
if not cmd_stdout or cmd_ret == False:
print("Failed : ", full_path[root_dir_str_len + 1:])
continue
info_dict = json.loads(cmd_stdout)
if 'media' in info_dict:
media_dict = info_dict['media']
if media_dict == "null" or not media_dict:
print("Failed : ", full_path[root_dir_str_len:])
continue
elif 'track' in media_dict:
track_list = media_dict['track']
is_media = False
is_general_tag_found = False
is_video_tag_found = False
new_dict = dict.fromkeys(Export_Keys)
for track in track_list:
type = track.get('@type', '')
print("found type ", type)
if type == 'General':
is_general_tag_found = True
new_dict['Path'] = full_path[root_dir_str_len + 1:]
new_dict['Container'] = track.get('Format', '')
new_dict['FileSize'] = track.get('FileSize', '')
new_dict['Duration'] = track.get('Duration', '')
new_dict['OverallBitRate'] = track.get('OverallBitRate', '')
continue
elif type == 'Video' or type == 'Image':
is_media = True
is_video_tag_found = True
if type == 'Image' and track.get('Format', '') != 'JPEG':
is_media = False
break
new_dict['Type'] = type
new_dict['Profile'] = track.get('Format_Profile', '')
new_dict['Level'] = track.get('Format_Level', '')
for key in Export_Keys:
value = track.get(key, '')
if value:
new_dict[key] = value
else:
continue
if is_video_tag_found and is_general_tag_found:
print("parse finished")
break
if is_media:
print("Success : ", new_dict['Path'])
for key in new_dict:
output_fp.write(str(new_dict.get(key, ' ')))
output_fp.write(' | ')
output_fp.write('\n')
else:
print("No Video or Image : ", full_path)
new_dict.clear()
else:
print("Not a media file: ", full_path)
scan_dir(media_dir)
output_fp.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment