PoshCode Archive  Artifact Content

Artifact 22ed5791193ab6be4c331a13853b4489f534d11371b2e278e6226e63283a969f:

  • File SearchZIP-psm1-Module-Fi.ps1 — part of check-in [7267e4a881] at 2018-06-10 13:22:42 on branch trunk — If there are a large number of, for example, saved Web Pages (mht,htm,html,pdf) (user: Archdeacon size: 5236)

# encoding: ascii
# api: powershell
# title: SearchZIP.psm1 Module Fi
# description: If there are a large number of, for example, saved Web Pages (mht,htm,html,pdf)
# version: 0.1
# type: function
# author: Archdeacon
# license: CC0
# x-poshcode-id: 3236
# x-archived: 2012-02-18T15:19:47
# x-published: 2012-02-14T20:15:00
#
# stored in compressed ZIP or RAR files, it can be very difficult to locate any 
# particular one.
# In any such directory containing a number of these files this (module) procedure
# will search each individual file name for simple text strings, listing both the
# source RAR/ZIP file and the individual file name containing the string. The 
# relevant RAR/ZIP can then be subsequently opened in the usual way.
# The first step will find any lines containing the simple selected pattern 
# (which can be anywhere in the file name). Each successful find will then be 
# split into 2 headings: ‘Source’ and ‘File’.
# Note that there may be the odd occasion where a ‘non-readable’ character in the
# returned string slips through the net!
# The results can be displayed with ‘Out-GridView’ by use of the ‘-Table’ switch and
# then filtered more extensively if desired.
#
function SearchZIPfiles {
<#
.SYNOPSIS 
Search for (filename) strings inside compressed ZIP or RAR files.
.DESCRIPTION
In any directory containing a large number of ZIP/RAR compressed Web Page files 
this procedure will search each individual file name for simple text strings, 
listing both the source RAR/ZIP file and the individual file name containing
the string. The relevant RAR/ZIP can then be subsequently opened in the usual
way.
Using the '-Table' switch will show the results with the Out-GridView display.
.EXAMPLE
extract -find 'library' -path d:\scripts

Searching for 'library' - please wait... (Use CTRL+C to quit)
[Editor.zip] File: Windows 7 Library Procedures.mht
[Editor.rar] File: My Library collection from 1974 Idi Amin.html
[Test2.rar] File: Playlists from library - Windows 7 Forums.mht
[Test3.rar] File: Module library functions UserGroup.pdf
A total of 4 matches for 'library' found in folder 'D:\Scripts'.
.EXAMPLE
extract -find 'backup' -path desktop

Searching for 'backup' - please wait... (Use CTRL+C to quit)
[Web.zip] File: How To Backup User and System Files.mht
[Pages.rar] File: Create an Image Backup.pdf
A total of 2 matches for 'backup' found in folder 'C:\Users\Sam\Desktop'.
.NOTES
The first step will find any lines containing the selected pattern (which can
be anywhere in the line). Each of these lines will then be split into 2 
headings: Source and Filename.
Note that there may be the odd occasion where a 'non-readable' character in the
returned string slips through the net! 
.LINK
Web Address Http://www.SeaStarDevelopment.BraveHost.com
#>
   [CmdletBinding()]
   param([string][string][Parameter(Mandatory=$true)]$Find,
         [string][ValidateNotNullOrEmpty()]$path = $pwd,
         [switch]$table)

   [int]$count = 0
   if ($path -like 'desk*') {
      $path = Join-Path $home 'desktop\*'
   }
   else {
      $path = Join-Path $path '*'
   }
   if (!(Test-path $path)) {
      Write-Warning "Path '$($path.Replace('\*',''))' is invalid - resubmit"
      return
   }
   $folder = $path.Replace('*','')
   $lines = @{}
   $regex = '(?s)^(?<zip>.+?\.(?:zip|rar)):(?:\d+):.*\\(?<file>.*\.(mht|html?|pdf))(.*)$'

   Get-ChildItem $path -include '*.rar','*.zip' |
      Select-String -SimpleMatch -Pattern $find |
         foreach-Object `
            -begin { Write-Host "Searching for '$find' - please wait... (Use CTRL+C to quit)" } `
            -process {
                $_ = $_ -replace '/','\'
                if ( $_ -match $regex ) {
                   $source = $matches.zip.Replace($folder,'')
                   $file   = $matches.file
                   $file = $file -replace '\s+',' '         #Single space words.
                   $file = $file -replace '\p{S}',' '   #Remove some 'Dingbats'.
                   if ($table) {
                      $key = "{0:D4}" -f $count
                      $lines["$key $source"] = $file       #Create a unique key.
                   }
                   else {
                      Write-Host "[$source] File: $file"
                   }
                   $count++ 
                }
            } `
            -end { 
                $title = "A total of $($count) matches for '$find' found in host folder '$($path.Replace('\*',''))'."
                if ($table -and ($lines.count -gt 0)) {        
                   $lines.GetEnumerator() | 
                      Select-Object @{name = 'Source';expression = {$_.Key.SubString(5)}},
                                    @{name = 'File'  ;expression = {$_.Value}} |
                         Sort-Object File |
                            Out-GridView -Title $title
                }
                else {
                   Write-Host $title
                } 
            }
} #End function.

New-Alias extract SearchZIPfiles    -Description 'Find Web files inside ZIP/RAR'
Export-ModuleMember -Function SearchZIPfiles -Alias Extract