# encoding: utf-8 # api: powershell # title: finddupe.ps1 # description: Find duplicates in comparing size then SHA-512 hash of files. Usage: finddupe.ps1 ... [-delete] [-noprompt] [-recurse] ; -delete will prompt to delete duplicates. -delete and -noprompt together will delete duplicates without prompting. -recurse looks in all subdirectories of all listed directories. The first file in a duplicate pair will not be deleted ever. # version: 0.1 # type: function # author: James Gentile # license: CC0 # function: Get-SHA512 # x-poshcode-id: 2617 # x-derived-from-id: 3328 # x-archived: 2017-04-30T12:38:04 # x-published: 2011-04-16T11:55:00 # # function Get-SHA512([System.IO.FileInfo] $file = $(throw 'Usage: Get-MD5 [System.IO.FileInfo]')) { $stream = $null; $cryptoServiceProvider = [System.Security.Cryptography.SHA512CryptoServiceProvider]; $hashAlgorithm = new-object $cryptoServiceProvider $stream = $file.OpenRead(); $hashByteArray = $hashAlgorithm.ComputeHash($stream); $stream.Close(); ## We have to be sure that we close the file stream if any exceptions are thrown. trap { if ($stream -ne $null) { $stream.Close(); } break; } foreach ($byte in $hashByteArray) { if ($byte -lt 16) {$result += “0{0:X}” -f $byte } else { $result += “{0:X}” -f $byte }} return [string]$result; } $starttime=[datetime]::now write-host "FindDupe.ps1 - find and optionally delete duplicates. FindDupe.ps1 -help or FindDupe.ps1 -h for usage options." $matches = 0 # initialize number of matches for summary. $filesdeleted = 0 # number of files deleted. $bytesrec = 0 # Number of bytes recovered. if ($args -eq "-help" -or $args -eq "-h") # check for help request, if found display usage options... { "" "Usage:" " PS>.\FindDupe.ps1 ... [-delete] [-noprompt] [-recurse] [-help]" "Options:" " -recurse recurses through all subdirectories of any specified directories." " -delete prompts to delete duplicates (but not originals.)" " -delete with -noprompt deletes duplicates without prompts (but again not originals.)" " -hidden checks hidden files, default is to ignore hidden files." " -help displays this usage option data, and ignores all other arguments." "" "Examples:" " PS>.\finddupe.ps1 c:\data d:\finance -recurse" " PS>.\finddupe.ps1 d: -recurse -delete -noprompt" " PS>.\finddupe.ps1 c:\users\alice\pictures\ -recurse -delete" exit } # build list of files, by running dir on $args minus elements that have FindDupe.ps1 switches, recursively if specified. $files=(dir ($args | ?{$_ -ne "-delete" -and $_ -ne "-noprompt" -and $_ -ne "-recurse" -and $_ -ne "-hidden"}) -recurse:$([bool]($args -eq "-recurse")) -force:$([bool]($args -eq "-hidden")) |?{$_.psiscontainer -eq $false}) if ($files.count -lt 2) # if the number of files is less than 2, then exit { "Need at least two files to check.`a" exit } for ($i=0;$i -ne $files.count; $i++) # Cycle thru all files { if ($files[$i] -eq $null) {continue} # file was already identified as a duplicate if $null, so do next file $filecheck = $files[$i] # backup file object $files[$i] = $null # erase file object from object database, so it is not matched against itself for ($c=$i+1;$c -lt $files.count; $c++) # cycle through all files again { if ($files[$c] -eq $null) {continue} # $null = file was already checked/matched. if ($filecheck.fullname -eq $files[$c].fullname) {$files[$c]=$null;continue} # If referencing the same file, skip if ($filecheck.length -eq $files[$c].length) # if files match size then check SHA512's { if ($filecheck.SHA512 -eq $null) # if SHA512 is not already computed, compute it { $SHA512 = (get-SHA512 $filecheck.fullname) $filecheck = $filecheck | %{add-member -inputobject $_ -name SHA512 -membertype noteproperty -value $SHA512 -passthru} } if ($files[$c].SHA512 -eq $null) # resulting in no file being SHA512'ed twice. { $SHA512 = (get-SHA512 $files[$c].fullname) $files[$c] = $files[$c] | %{add-member -inputobject $_ -name SHA512 -membertype noteproperty -value $SHA512 -passthru} } if ($filecheck.SHA512 -eq $files[$c].SHA512) # Size already matched, if SHA512 matches, then it's a duplicate. { write-host "Size and SHA512 match: " -fore red -nonewline write-host "`"$($filecheck.fullname)`" and `"$($files[$c].fullname)`"" $matches += 1 # Number of matches ++ if ($args -eq "-delete") # check if user specified to delete the duplicate { if ($args -eq "-noprompt") # if -delete select, and -noprompt selected { del $files[$c].fullname # then delete the file without prompting write-host "Deleted duplicate: " -f red -nonewline write-host "`"$($files[$c].fullname).`"" } else { del $files[$c].fullname -confirm # otherwise prompt for confirmation to delete } if ((get-item -ea 0 $files[$c].fullname) -eq $null) # check if file was deleted. { $filesdeleted += 1 # update records $bytesrec += $files[$c].length } } $files[$c] = $null # erase file object so it is not checked/matched again. } } } # And loop to next inner loop file } # And loop to next file in outer/original loop write-host "" write-host "Number of Files checked: $($files.count)." # Display useful info; files checked and matches found. write-host "Number of duplicates found: $matches." Write-host "Number of duplicates deleted: $filesdeleted." # Display number of duplicate files deleted and bytes recovered. write-host "$bytesrec bytes recovered." write-host "" write-host "Time to run: $(([datetime]::now)-$starttime|select hours, minutes, seconds, milliseconds)" write-host ""