Last active
August 3, 2022 20:22
-
-
Save lucienbill/3c4d7ce97168ecf586c89ac92788ad15 to your computer and use it in GitHub Desktop.
Given a folder that contains some zip files, this script lists the files within the zipped files that contain a specific string.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### What is this script ? ### | |
# Given a folder that contains some zip files, this script lists the files | |
# within the zipped files that contain a specific string. | |
# | |
# Example behavior (if you run the script with the example values) : | |
# This script will search for files that have the following characteristics : | |
# - ".csv" files contained within a zip, in a the folder | |
# "C:\Users\lulu\Downloads\tmp\dev" or its subfolders | |
# - The file must contain the string "foo" | |
# - The script will return 2 files or less | |
### How to use this script ? ### | |
# 0) !!! tested only on Windows 10 !!! | |
# 1) download this powershell script | |
# 2) open this script in a editor (ex: NotePad++) | |
# 3) change the parameters (listed below) and save | |
# 4) open a Powershell terminal | |
# 5) within the terminal, execute the script | |
### PARAMETERS ### | |
# Where are the zip you need to read? | |
# Example: "C:\Users\lulu\Downloads\tmp\dev" | |
$folder = "C:\Users\lulu\Downloads\tmp\dev" | |
# What type of files inside the zip do you need to read? | |
# Example: "\.csv$" -> anything that ends with ".csv" | |
$filesTypesRegex = "\.csv$" | |
# What is the string you are looking for in the files ? | |
# Example : "foo" -> files that contain "foo" | |
$stringToSearch = "foo" | |
# How many files do you need ? | |
# Example : 2 -> 2 files ; $null -> no limit | |
$numberOfFilesToGet = 2 | |
### END OF PARAMETERS ### | |
Add-Type -assembly "system.io.compression.filesystem" | |
$filesFound = 0 | |
# For each zip in the designated folder and its subfolders | |
Get-ChildItem -Path $folder -Filter *.zip -Recurse | ForEach-Object { | |
# Alternative to line 39: 'the zip name must contain "202105"': | |
# Get-ChildItem -Path $folder -Filter *.zip -Recurse | Where-Object{$_ -match ".*202105.*"} | ForEach-Object { | |
$zippedFile = $_.fullName | |
# Open the zip file to access its content | |
$zip = [io.compression.zipfile]::OpenRead($zippedFile) | |
# For each file within the zip | |
$files = $zip.Entries | where-object { $_.Name -Match $filesTypesRegex} | |
$files | ForEach-Object { | |
# Open the file, and search for the string | |
$stream = $_.Open() | |
$reader = New-Object IO.StreamReader($stream) | |
$text = $reader.ReadToEnd() | |
if ($text.Contains($stringToSearch)) { | |
$filesFound += 1 | |
Write-Host "Match:"$zippedFile"\"$_ | |
} | |
if($numberOfFilesToGet -eq $filesFound){ | |
Write-Host 'Search: done.' $numberOfFilesToGet 'files found.' | |
Exit(0) #Stop searching | |
} | |
$reader.Close() | |
$stream.Close() | |
} | |
$zip.Dispose() | |
} | |
Write-Host 'Search: done.' $filesFound 'files found.' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment