Skip to content

Instantly share code, notes, and snippets.

@beancurd1
Created January 1, 2016 03:43
Show Gist options
  • Save beancurd1/f850890fe7e7e5069f06 to your computer and use it in GitHub Desktop.
Save beancurd1/f850890fe7e7e5069f06 to your computer and use it in GitHub Desktop.
A Powershell script which uses itextsharp.dll library to extract date from PDF files located on a network share and check whether they are valid. Email the result to people
# PDF Files Check Script (created by beancurd1, please distribute the code with this session, thanks)
# It uses itextsharp.dll (downloaded from SourceForge) to parse PDF files, extract the first date it found
# compare it against a predefined Date. Email the PDF file names to people if they doesn't match the predefined date
Add-Type -Path .\itextsharp.dll
$validDate = "11 Dec 2015"
$day = ([datetime]$validDate).ToString('dd') ; $day = $day -replace "^0", ""
#Define valid Month+Year format here, this will combine with $day in the search
$validMYArray = @(([datetime]$validDate).ToString(' MMM yyyy'))
$validMYArray += ([datetime]$validDate).ToString(' MMMM yyyy')
$validMYArray += ([datetime]$validDate).ToString('/MM/yyyy')
$validMYArray += ([datetime]$validDate).ToString('-MMM-yy')
# PDF Counters
$countTotal = $countGood = $countBad = 0
$badDate = $foundDate = ""
$PDFPath="\\server\share"
$badPDF="Bad PDF (e.g. incorrect date, empty date):`n`n"
Write-Host "Mapping a Drive..."
New-PSDrive -Name NetworkDrive -PSProvider FileSystem -Root $PDFPath
Write-Host "Parsing PDF Files..."
#################################################################
### Search PDF Files from UNC folder, parse each PDF ###
### output PDFs with incorrect date ###
#################################################################
Get-ChildItem -Path NetworkDrive:\ -Filter *.pdf -Recurse |
Foreach-Object{
$countTotal++
$reader = New-Object iTextSharp.text.pdf.pdfreader -ArgumentList $_.FullName
$pageText = [iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($reader, 1) -join "" -split "`n"
# search each line, look for a date which match the format defined in above
:loop ForEach ($line in $pageText) {
ForEach ($validMY in $validMYArray) {
if ($line -cmatch "0?$day$validMY") {
$countGood++
$foundDate="Yes"
break loop
} elseif ($line -cmatch "[0-3]?\d$validMY" -and $badDate -eq "") {
# extract incorrect date and append it to the PDFs
$badDate = [regex]::Matches($line, "([0-3]?\d$validMY)")[0].Groups[1].Value
break loop
}
}
}
if ($foundDate -ne "Yes") {
$countBad++
$badPDF += $_.FullName + " ($badDate)`t`n" #<-Insert a Tab character before `n to avoid Outlook Extra Line Break issue
$badDate = ""
}
$foundDate = ""
}
$reader.Dispose() #<-Destroy/free the Object, it locks the PDF files otherwise
# Unmap the drive
Remove-PSDrive -Name NetworkDrive
$stopWatch.Stop()
# Remove "\\server\share\" from file path
$badPDF = $badPDF -replace "\\\\.*\\", ""
Write-Host "$badPDF`n`n$countTotal PDFs, Good=$countGood, Bad=$countBad $($stopWatch.Elapsed.TotalSeconds) sec
$PDFPath$validDateNum"
Write-Host "Email Result..."
$messageParameters = @{
Subject = "PDF Checked has finish"
Body = "Say something here"
From = "a@yahoo.com"
To = "b@yahoo.com"
SmtpServer = "mailserver"
}
Send-MailMessage @messageParameters
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment