Last active
September 27, 2023 01:27
-
-
Save qvoid/d4689a74253df704bd843d1377e36024 to your computer and use it in GitHub Desktop.
parse video or JPEG file under directory using mediainfo and format result to import to Excel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: UTF-8 -*- | |
import subprocess | |
import sys | |
import os | |
from os import listdir | |
import os.path | |
from os.path import isfile, join | |
import time | |
import re | |
import json | |
from enum import Enum | |
class FILE_TYPE(Enum): | |
UNKNOWN = 0 | |
FILE = 1 | |
DIR = 2 | |
media_dir = sys.argv[1] | |
if not media_dir: | |
sys.exit("Please input directory path to scan!!!\n") | |
root_dir_str_len = len(media_dir) | |
def cmd(command): | |
ret_code = None | |
ret = False | |
std_out_data = None | |
std_err_data = None | |
subp = subprocess.Popen(command, shell = True, close_fds= True,stdout = subprocess.PIPE, stderr = subprocess.PIPE, encoding = "utf-8") | |
try: | |
std_out_data, std_err_data = subp.communicate(timeout=60) | |
except subprocess.TimeoutExpired: | |
subp.kill() | |
std_out_data, std_err_data = subp.communicate() | |
print("Timeout : ", command) | |
if subp.poll() == 0: | |
ret = True | |
else: | |
ret = False | |
return (ret, std_out_data, std_err_data) | |
# check path is file or directory, also permission | |
def check_path(path): | |
if not os.path.exists(path): | |
print("Not exist : ", path) | |
return (FILE_TYPE.UNKNOWN, False) | |
elif os.path.isdir(path): | |
if not os.access(path, os.X_OK): | |
print("Could not access directory : ", path) | |
return (FILE_TYPE.DIR, False) | |
else: | |
return (FILE_TYPE.DIR, True) | |
elif os.path.isfile(path): | |
if not os.access(path, os.R_OK): | |
print("Could not read file : ", path) | |
return (FILE_TYPE.FILE, False) | |
else: | |
return (FILE_TYPE.FILE, True) | |
else: | |
print("Check failed for : ", path) | |
return (FILE_TYPE.UNKNOWN, False) | |
file_type, permission = check_path(media_dir) | |
if (file_type != FILE_TYPE.DIR or permission != True): | |
sys.exit("Access {media_dir} failed!!") | |
INFO_KEYS = [ | |
"@type", # General, Video, Image, Audio | |
"Format", # HEVC, AVC, JPEG, PNG | |
"Format_Profile", # Main, Baseline, Main 10 | |
"Format_Level", # 4, 6 | |
"Format_Tier", # Main, High | |
"CodecID", # hev1 | |
"Duration", # 0.33 (in sec) | |
"Width", "height", | |
"Sampled_Width", "Sampled_Height", | |
"PixelAspectRatio", "DisplayAspectRatio", | |
"FrameRate", # 30.000 | |
"FrameCount", # 10 | |
"ColorSpace", # RGB, YUV | |
"ChromaSubsampling", # 4:2:0 | |
"BitDepth", # 8, 10 | |
"ScanType", # Progressive | |
"colour_description_present", # Yes | |
"colour_description_present_Source", # Stream | |
"colour_range", # Limited, Full | |
"colour_range_Source", # Stream | |
"colour_primaries", # BT.709, BT.2020 | |
"colour_primaries_Source", # Stream | |
"transfer_characteristics", # BT.709, "PQ" | |
"transfer_characteristics_Source", # Stream | |
"matrix_coefficients", # BT.709, "BT.2020 non-constant" | |
"matrix_coefficients_Source", # Stream | |
"MasteringDisplay_ColorPrimaries", # Display P3, | |
"MasteringDisplay_ColorPrimaries_Source",# Stream | |
"MasteringDisplay_Luminance", # min: 0.0200 cd/m2, max: 1200 cd/m2 | |
"MasteringDisplay_Luminance_Source", # Stream, | |
"OverallBitRate", | |
"BitRate", # 4788391 | |
"ColorSpace_ICC", # RGB | |
"HDR_Format", # SMPTE ST 2086, | |
"HDR_Format_Compatibility", # "HDR10" | |
] | |
Export_Keys = ( | |
'Path', # file path | |
# in General part | |
'Container', # container format, key "Format": MPEG-4, JPEG, etc | |
'Duration', | |
'FileSize', | |
'OverallBitRate', | |
# in Video or Image part | |
'Type', # file type: Video, Image | |
'Format', # codec format: HEVC, AVC, JPEG | |
'Profile', | |
'CodecID', # hvc1, avc1 | |
'Level', | |
'Width', | |
'Height', | |
'ScanType', | |
'BitRate', | |
'BitRate_Maximum', | |
'FrameRate_Mode', | |
'FrameRate', | |
'FrameCount', | |
'ColorSpace', | |
'ChromaSubsampling', | |
'BitDepth', | |
'colour_range', | |
'colour_primaries', | |
'transfer_characteristics', | |
'matrix_coefficients', | |
'MasteringDispaly_ColorPrimaries', | |
'MasteringDispaly_Luminance', | |
'ColorSpace_ICC', | |
'HDR_Format', | |
'HDR_Format_Compatibility', | |
) | |
exclude_files = ('json', 'db', 'rar', 'zip', 'mp3', 'm4a', 'txt', 'link', 'png', 'xls', 'xlsx', 'DS_Store', 'log', 'img', 'bin', 'suo', 'sqlite', 'yuv', 'rgb') | |
mediainfo_cmd="mediainfo --Output=JSON" | |
output_fp = open("./scan_res.log", "w") | |
for item in Export_Keys: | |
output_fp.write(item) | |
output_fp.write(' | ') | |
output_fp.write('\n') | |
def scan_dir(dst_dir): | |
for root, dirs, files in os.walk(dst_dir): | |
for file in files: | |
full_path = os.path.join(root, file) | |
file_suffix = full_path.split(".")[-1] | |
should_skip = False | |
for suffix in exclude_files: | |
if file_suffix == suffix: | |
should_skip = True | |
break | |
if should_skip: | |
print("Skip : ", full_path) | |
continue | |
else: | |
print("Process : ", full_path) | |
# in case of space in file path | |
#cmd_ret, cmd_stdout, cmd_stderr = cmd(f'{mediainfo_cmd} {full_path}\n') | |
cmd_ret, cmd_stdout, cmd_stderr = cmd(str(mediainfo_cmd) + ' ' + '"' + str(full_path) + '"') | |
if not cmd_stdout or cmd_ret == False: | |
print("Failed : ", full_path[root_dir_str_len + 1:]) | |
continue | |
info_dict = json.loads(cmd_stdout) | |
if 'media' in info_dict: | |
media_dict = info_dict['media'] | |
if media_dict == "null" or not media_dict: | |
print("Failed : ", full_path[root_dir_str_len:]) | |
continue | |
elif 'track' in media_dict: | |
track_list = media_dict['track'] | |
is_media = False | |
is_general_tag_found = False | |
is_video_tag_found = False | |
new_dict = dict.fromkeys(Export_Keys) | |
for track in track_list: | |
type = track.get('@type', '') | |
print("found type ", type) | |
if type == 'General': | |
is_general_tag_found = True | |
new_dict['Path'] = full_path[root_dir_str_len + 1:] | |
new_dict['Container'] = track.get('Format', '') | |
new_dict['FileSize'] = track.get('FileSize', '') | |
new_dict['Duration'] = track.get('Duration', '') | |
new_dict['OverallBitRate'] = track.get('OverallBitRate', '') | |
continue | |
elif type == 'Video' or type == 'Image': | |
is_media = True | |
is_video_tag_found = True | |
if type == 'Image' and track.get('Format', '') != 'JPEG': | |
is_media = False | |
break | |
new_dict['Type'] = type | |
new_dict['Profile'] = track.get('Format_Profile', '') | |
new_dict['Level'] = track.get('Format_Level', '') | |
for key in Export_Keys: | |
value = track.get(key, '') | |
if value: | |
new_dict[key] = value | |
else: | |
continue | |
if is_video_tag_found and is_general_tag_found: | |
print("parse finished") | |
break | |
if is_media: | |
print("Success : ", new_dict['Path']) | |
for key in new_dict: | |
output_fp.write(str(new_dict.get(key, ' '))) | |
output_fp.write(' | ') | |
output_fp.write('\n') | |
else: | |
print("No Video or Image : ", full_path) | |
new_dict.clear() | |
else: | |
print("Not a media file: ", full_path) | |
scan_dir(media_dir) | |
output_fp.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment