Last active
October 26, 2020 03:21
-
-
Save zlalanne/4728645 to your computer and use it in GitHub Desktop.
Python script to merge a folder of pdfs, can define order in list.txt contained within the directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Simple Python script that combines a folder of pdfs into a single | |
pdf. By default the pdfs are merged in alphabetical order and | |
only appear once. To change this create a "list.txt" file in the | |
directory with the pdfs to merge and list the order of the pdfs | |
to merge. | |
Ex list.txt: | |
doc2.pdf | |
doc1.pdf | |
doc2.pdf | |
To use the script: | |
python PDFMerge.py /path/to/pdf/dir output.pdf | |
""" | |
import os | |
import sys | |
from PyPDF2 import PdfFileWriter, PdfFileReader | |
def main(): | |
# Saving arguments | |
pdfDir = sys.argv[1] | |
if(sys.argv[2].endswith(".pdf") == False): | |
filename = sys.argv[2] + ".pdf" | |
else: | |
filename = sys.argv[2] | |
print "Directory to merge: %s" % pdfDir | |
print "File to create: %s" % filename | |
# Check if path exists, then change directory | |
if(os.path.exists(pdfDir) == True): | |
print "Changing to directory" | |
os.chdir(pdfDir) | |
if(os.path.isfile(filename) == True): | |
os.remove(filename) | |
else: | |
print "Path doesn't exist" | |
sys.exit() | |
# Check if list.txt exists | |
if(os.path.isfile("list.txt") == True): | |
print "Found list.txt" | |
order = True | |
else: | |
print "No list.txt, doing in alphabetical order" | |
order = False | |
# Getting list of files | |
files = [] | |
if(order == True): | |
f = open('list.txt', 'r') | |
for line in f: | |
line = line.strip() | |
if((line.endswith(".pdf") == True) and (os.path.isfile(line) == True)): | |
files.append(line) | |
else: | |
dirFiles = os.listdir('.') | |
dirFiles.sort() | |
for foundFile in dirFiles: | |
if(foundFile.endswith(".pdf") == True): | |
files.append(foundFile) | |
if(len(files) < 1): | |
print "No files found for merging" | |
sys.exit() | |
# Doing the pdf merge | |
pdfOutput = PdfFileWriter() | |
for pdfFile in files: | |
print "Adding %s" % pdfFile | |
pdfInput = PdfFileReader(open(pdfFile, "rb")) | |
numPages = pdfInput.getNumPages() | |
for i in range(numPages): | |
pdfOutput.addPage(pdfInput.getPage(i)) | |
outputStream = file(filename, "wb") | |
pdfOutput.write(outputStream) | |
if __name__ == "__main__": | |
if(len(sys.argv) < 3): | |
print "Incorrect number of parameters." | |
print "Usage: python PDFMerge.py path/to/pdf/dir output.pdf" | |
sys.exit() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Awesome,
I have multiple folders, each one containing multiple pdf image files like this:
Folder 1
file1.pdf
file2.pdf
file3.pdf
...
file100.pdf
Folder 2
file1.pdf
file2.pdf
file3.pdf
...
file100.pdf
Is there any way to create a sort of script that outputs at one time each folder as one pdf file like this?
Folder1.pdf
Folder2.pdf
...
Folder94.pdf