PoshCode Archive  Artifact Content

Artifact 806fca1c45e32086645d7563affac48b9890610803dcda178b7e4f7993993f62:

  • File finddupe.ps1 — part of check-in [e4e92dbecf] at 2018-06-10 13:13:13 on branch trunk — Find duplicates in <directories/files> comparing size then SHA-512 hash of files. Usage: finddupe.ps1 <directory/file #1> <directory/file #2> ... <directory/file #N> [-delete] [-noprompt] [-recurse] ; -delete will prompt to delete duplicates. -delete and -noprompt together will delete duplicates without prompting. -recurse looks in all subdirectories of all listed directories. The first file in a duplicate pair will not be deleted ever. (user: James Gentile size: 6121)

# encoding: utf-8
# api: powershell
# title: finddupe.ps1
# description: Find duplicates in <directories/files> comparing size then SHA-512 hash of files. Usage: finddupe.ps1 <directory/file #1> <directory/file #2> ... <directory/file #N> [-delete] [-noprompt] [-recurse] ; -delete will prompt to delete duplicates. -delete and -noprompt together will delete duplicates without prompting. -recurse looks in all subdirectories of all listed directories. The first file in a duplicate pair will not be deleted ever.
# version: 0.1
# type: function
# author: James Gentile
# license: CC0
# function: Get-SHA512
# x-poshcode-id: 2617
# x-derived-from-id: 3328
# x-archived: 2017-04-30T12:38:04
# x-published: 2011-04-16T11:55:00
#
#
function Get-SHA512([System.IO.FileInfo] $file = $(throw 'Usage: Get-MD5 [System.IO.FileInfo]'))
{
  	$stream = $null;
  	$cryptoServiceProvider = [System.Security.Cryptography.SHA512CryptoServiceProvider];
  	$hashAlgorithm = new-object $cryptoServiceProvider
  	$stream = $file.OpenRead();
  	$hashByteArray = $hashAlgorithm.ComputeHash($stream);
  	$stream.Close();

  	## We have to be sure that we close the file stream if any exceptions are thrown.

  	trap
  	{
   		if ($stream -ne $null)
    		{
			$stream.Close();
		}
  		break;
	}	

 	foreach ($byte in $hashByteArray) { if ($byte -lt 16) {$result += 0{0:X} -f $byte } else { $result += {0:X} -f $byte }}
	return [string]$result;
}

$starttime=[datetime]::now

write-host "FindDupe.ps1 - find and optionally delete duplicates. FindDupe.ps1 -help or FindDupe.ps1 -h for usage options."

$matches = 0     	# initialize number of matches for summary.
$filesdeleted = 0 	# number of files deleted.
$bytesrec = 0 		# Number of bytes recovered.


if ($args -eq "-help" -or $args -eq "-h") # check for help request, if found display usage options...
{
	""
	"Usage:"
	"       PS>.\FindDupe.ps1 <directory/file #1> <directory/file #2> ... <directory/file #N> [-delete] [-noprompt] [-recurse] [-help]"
	"Options:"
	"       -recurse recurses through all subdirectories of any specified directories."
	"       -delete prompts to delete duplicates (but not originals.)"
	"       -delete with -noprompt deletes duplicates without prompts (but again not originals.)"
	"	-hidden checks hidden files, default is to ignore hidden files."
	"	-help displays this usage option data, and ignores all other arguments."
	""
	"Examples:"
	"          PS>.\finddupe.ps1 c:\data d:\finance -recurse"
	"          PS>.\finddupe.ps1 d: -recurse -delete -noprompt"
	"          PS>.\finddupe.ps1 c:\users\alice\pictures\ -recurse -delete"
 	exit
}


# build list of files, by running dir on $args minus elements that have FindDupe.ps1 switches, recursively if specified.

$files=(dir ($args | ?{$_ -ne "-delete" -and $_ -ne "-noprompt" -and $_ -ne "-recurse" -and $_ -ne "-hidden"}) -recurse:$([bool]($args -eq "-recurse")) -force:$([bool]($args -eq "-hidden")) |?{$_.psiscontainer -eq $false})


if ($files.count -lt 2)  # if the number of files is less than 2, then exit
{
	"Need at least two files to check.`a"
	exit
}

for ($i=0;$i -ne $files.count; $i++)  # Cycle thru all files
{
	if ($files[$i] -eq $null) {continue}  # file was already identified as a duplicate if $null, so do next file

	$filecheck = $files[$i]  	      # backup file object
	$files[$i] = $null	              # erase file object from object database, so it is not matched against itself

	for ($c=$i+1;$c -lt $files.count; $c++)  # cycle through all files again
	{
		if ($files[$c] -eq $null) {continue}  # $null = file was already checked/matched.
	
		if ($filecheck.fullname -eq $files[$c].fullname) {$files[$c]=$null;continue} # If referencing the same file, skip
	
		if ($filecheck.length -eq $files[$c].length)  # if files match size then check SHA512's
		{
			if ($filecheck.SHA512 -eq $null)         # if SHA512 is not already computed, compute it
			{ 
				$SHA512 = (get-SHA512 $filecheck.fullname)
				$filecheck = $filecheck | %{add-member -inputobject $_ -name SHA512 -membertype noteproperty -value $SHA512 -passthru}			
			}
			if ($files[$c].SHA512 -eq $null)         # resulting in no file being SHA512'ed twice.
			{ 
				$SHA512 = (get-SHA512 $files[$c].fullname)
				$files[$c] = $files[$c] | %{add-member -inputobject $_ -name SHA512 -membertype noteproperty -value $SHA512 -passthru}				
			}
			
			if ($filecheck.SHA512 -eq $files[$c].SHA512) # Size already matched, if SHA512 matches, then it's a duplicate.
			{
				
				write-host "Size and SHA512 match: " -fore red -nonewline
				write-host "`"$($filecheck.fullname)`" and `"$($files[$c].fullname)`""

				$matches += 1			# Number of matches ++
				
				if ($args -eq "-delete")        # check if user specified to delete the duplicate
				{
					if ($args -eq "-noprompt")  # if -delete select, and -noprompt selected
					{
						del $files[$c].fullname  # then delete the file without prompting
						write-host "Deleted duplicate: " -f red -nonewline
						write-host "`"$($files[$c].fullname).`""
					}
					else
					{
						del $files[$c].fullname -confirm # otherwise prompt for confirmation to delete
					}
					if ((get-item -ea 0 $files[$c].fullname) -eq $null) # check if file was deleted.
					{
						$filesdeleted += 1		# update records
						$bytesrec += $files[$c].length
					}

				}
	
				$files[$c] = $null		# erase file object so it is not checked/matched again.
			}
		}	
	}	# And loop to next inner loop file
}		# And loop to next file in outer/original loop
write-host ""
write-host "Number of Files checked: $($files.count)."	# Display useful info; files checked and matches found.
write-host "Number of duplicates found: $matches."
Write-host "Number of duplicates deleted: $filesdeleted." # Display number of duplicate files deleted and bytes recovered.
write-host "$bytesrec bytes recovered."	
write-host ""
write-host "Time to run: $(([datetime]::now)-$starttime|select hours, minutes, seconds, milliseconds)"
write-host ""