Last active
March 31, 2023 02:54
-
-
Save sc0tt/dd0c3bfb9f70d907291e0780fcee3d5f to your computer and use it in GitHub Desktop.
I needed to sort around 5k files into different directories based on extension. I used this to try out ChatGPT.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import shutil | |
import json | |
from pathlib import Path | |
# Define the source and destination directories | |
source_dir = Path('root') | |
destination_dir = Path('root_sorted') | |
# Define the mappings between file extensions and categories | |
category_mappings = { | |
'audio': ['mp3', 'ogg', 'm4a', 'wav'], | |
'video': ['mp4', 'webm', 'gifv', 'mov', 'mpg'], | |
'image': ['svg', 'png', 'gif', 'jpg', 'jpeg', 'webp', 'heic', 'ico'], | |
'document': ['zip', 'csv', 'epub', 'mobi', '7z', 'gz', 'txt', 'json', 'pdf', 'log', 'xlsx', 'docx'], | |
'application': ['apk', 'iso', 'exe'], | |
'database': ['db', 'sqlite'] | |
} | |
def copy_files(debug=False): | |
# Initialize variables to keep track of file counts and errors | |
file_counts = {} | |
invalid_extensions = {} | |
copied_files = [] | |
not_copied_files = [] | |
conflicts = [] | |
errors = [] | |
# Create the destination directories | |
for category in category_mappings: | |
(destination_dir / category).mkdir(parents=True, exist_ok=True) | |
(destination_dir / 'unknown').mkdir(parents=True, exist_ok=True) | |
# Get the total number of files in the source directory for progress tracking | |
total_files = sum(1 for _ in source_dir.glob('*')) | |
# Initialize the file counter | |
file_count = 0 | |
# Iterate through the source directory and copy files to the appropriate destination directory | |
for file_path in source_dir.glob('*'): | |
# Get the file extension and category | |
extension = file_path.suffix.lower()[1:] | |
category = None | |
for category_name, extensions in category_mappings.items(): | |
if extension in extensions: | |
category = category_name | |
break | |
# If the file extension is not in the category_mappings, put it in the 'unknown' category | |
if category is None: | |
category = 'unknown' | |
invalid_extensions[extension] = invalid_extensions.get(extension, 0) + 1 | |
else: | |
# Check if the destination file already exists and has the same size and modified time | |
destination_path = destination_dir.joinpath(category, file_path.name) | |
if destination_path.exists(): | |
if file_path.stat().st_size != destination_path.stat().st_size or \ | |
file_path.stat().st_mtime != destination_path.stat().st_mtime: | |
conflicts.append(file_path.name) | |
else: | |
# Copy the file to the destination directory and update the file counts | |
if not debug: | |
try: | |
shutil.copy2(file_path, destination_path) | |
except Exception as e: | |
errors.append(file_path.name) | |
file_counts[category] = file_counts.get(category, 0) + 1 | |
copied_files.append(str(destination_path)) | |
# Update the file counter and print progress information | |
file_count += 1 | |
print(f'Progress: {file_count}/{total_files} files processed.', end='\r') | |
# Add files with invalid extensions to the not_copied_files list | |
not_copied_files = [str(file_path) for file_path in source_dir.glob('*') if file_path.suffix.lower()[1:] not in sum(category_mappings.values(), [])] | |
# Create a report dictionary | |
report = { | |
'file_counts': file_counts, | |
'total_files_copied': len(copied_files), | |
'total_files_not_copied': len(not_copied_files), | |
'total_conflicts': len(conflicts), | |
'total_invalid_extensions': sum(invalid_extensions.values()), | |
'invalid_extensions': invalid_extensions, | |
'conflicts': conflicts, | |
'errors': errors, | |
} | |
return report | |
# Co-pilot did this: | |
def create_directory_report_json(): | |
# Include the file list of each category as well as the total number of files in each category | |
directory_report = {} | |
for category in category_mappings: | |
directory_path = destination_dir / category | |
file_list = [str(file_path) for file_path in directory_path.glob('*')] | |
directory_report[category] = { | |
'total_files': len(file_list), | |
'files': file_list | |
} | |
# Save the directory report to a JSON file | |
directory_report_path = Path('directory_report.json') | |
with directory_report_path.open(mode='w') as f: | |
json.dump(directory_report, f, indent=2) | |
def main(): | |
debug = False | |
report = copy_files(debug) | |
# Save the report to a JSON file | |
report_path = Path('category_report.json') | |
with report_path.open(mode='w') as f: | |
json.dump(report, f, indent=2) | |
# Create a directory report | |
create_directory_report_json() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment