# encoding: ascii
# api: csharp
# title: Findup
# description: Findup – Find Duplicates, and optionall delete them. C# version of the powershell script FindDupe.ps1, C# version is probably 100 times faster though.
# version: 1.0
# type: class
# author: James Gentile
# license: CC0
# x-poshcode-id: 2326
# x-derived-from-id: 2332
# x-archived: 2010-10-30T04:55:29
#
# Findup.exe -help for usage information and options.
#
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Security.Cryptography;
using System.Runtime.InteropServices;
using Microsoft.Win32;
using System.IO;
namespace Findup
{
public class FileInfoExt
{
public FileInfoExt(FileInfo fi)
{
FI = fi;
// Checked = false; // Set if the file has already been checked.
// string SHA512_1st1K; // SHA512 hash of first 1K bytes.
// string SHA512_All; // SHA512 hash of complete file.
}
public FileInfo FI { get; private set; }
public bool Checked { get; set; }
public string SHA512_1st1K { get; set; }
public string SHA512_All { get; set; }
}
class Recurse // Return FileInfoExt list of files matching filenames, file specifications (IE: *.*), and in directories in pathRec
{
public void Recursive(string[] pathRec, string searchPattern, Boolean recursiveFlag, List<FileInfoExt> returnList)
{
foreach (string d in pathRec)
{
Recursive(d, searchPattern, recursiveFlag, returnList);
}
return;
}
public void Recursive(string pathRec, string searchPattern, Boolean recursiveFlag, List<FileInfoExt> returnList)
{
if (File.Exists(pathRec))
{
try
{
returnList.Add(new FileInfoExt(new FileInfo(pathRec)));
}
catch (Exception e)
{
Console.WriteLine("Add file error: " + e.Message);
}
}
else if (Directory.Exists(pathRec))
{
try
{
DirectoryInfo Dir = new DirectoryInfo(pathRec);
returnList.AddRange(Dir.GetFiles(searchPattern).Select(s => new FileInfoExt(s)));
}
catch (Exception e)
{
Console.WriteLine("Add files from Directory error: " +e.Message);
}
if (recursiveFlag == true)
{
try
{
foreach (string d in (Directory.GetDirectories(pathRec)))
{
Recursive(d, searchPattern, recursiveFlag, returnList);
}
}
catch (Exception e)
{
Console.WriteLine("Add Directory error: " + e.Message);
}
}
}
else
{
string filePart = Path.GetFileName(pathRec);
string dirPart = Path.GetDirectoryName(pathRec);
if (filePart.IndexOfAny(new char[] { '?', '*' }) >= 0)
{
if ((dirPart == null) || (dirPart == ""))
dirPart = Directory.GetCurrentDirectory();
if (Directory.Exists(dirPart))
{
Recursive(dirPart, filePart, recursiveFlag, returnList);
}
else
{
Console.WriteLine("Invalid file path, directory path, file specification, or program option specified: " + pathRec);
}
}
else
{
Console.WriteLine("Invalid file path, directory path, file specification, or program option specified: " + pathRec);
}
}
return;
}
}
class Program
{
public static void Main(string[] args)
{
Console.WriteLine("Findup.exe v1.0 - use -help for usage information. Created in 2010 by James Gentile.");
Console.WriteLine(" ");
string[] paths = new string[0];
System.Boolean recurse = false;
System.Boolean delete = false;
System.Boolean noprompt = false;
List<FileInfoExt> fs = new List<FileInfoExt>();
long bytesInDupes = 0; // bytes in all the duplicates
long numOfDupes = 0; // number of duplicate files found.
long bytesRec = 0; // number of bytes recovered.
long delFiles = 0; // number of files deleted.
int c = 0;
int i = 0;
string deleteConfirm;
for (i = 0; i < args.Length; i++)
{
if ((System.String.Compare(args[i],"-help",true) == 0) || (System.String.Compare(args[i],"-h",true) == 0))
{
Console.WriteLine("Usage: findup.exe <file/directory #1> <file/directory #2> ... <file/directory #N> [-recurse] [-delete] [-noprompt]");
Console.WriteLine(" ");
Console.WriteLine("Options: -help - displays this help infomration.");
Console.WriteLine(" -recurse - recurses through subdirectories.");
Console.WriteLine(" -delete - deletes duplicates with confirmation prompt.");
Console.WriteLine(" -noprompt - when used with -delete option, deletes files without confirmation prompt.");
Console.WriteLine(" ");
Console.WriteLine("Examples: findup.exe c:\\finances -recurse");
Console.WriteLine(" findup.exe c:\\users\\alice\\plan.txt d:\\data -recurse -delete -noprompt");
Console.WriteLine(" ");
return;
}
if (System.String.Compare(args[i],"-recurse",true) == 0)
{
recurse = true;
continue;
}
if (System.String.Compare(args[i],"-delete",true) == 0)
{
delete = true;
continue;
}
if (System.String.Compare(args[i],"-noprompt",true) == 0)
{
noprompt = true;
continue;
}
Array.Resize(ref paths, paths.Length + 1);
paths[c] = args[i];
c++;
}
if (paths.Length == 0)
{
Console.WriteLine("No files specified, try findup.exe -help");
return;
}
Recurse recurseMe = new Recurse();
recurseMe.Recursive(paths, "*.*", recurse, fs);
if (fs.Count < 2)
{
Console.WriteLine("Findup.exe needs at least 2 files to compare. try findup.exe -help");
return;
}
for (i = 0; i < fs.Count; i++)
{
if (fs[i].Checked == true) // If file was already matched, then skip to next.
continue;
for (c = i+1; c < fs.Count; c++)
{
if (fs[c].Checked == true) // skip already matched inner loop files.
continue;
if (fs[i].FI.Length != fs[c].FI.Length) // If file size matches, then check hash.
continue;
if (fs[i].FI.FullName == fs[c].FI.FullName) // don't count the same file as a match.
continue;
if (fs[i].SHA512_1st1K == null) // check/hash first 1K first.
fs[i].SHA512_1st1K = ComputeInitialHash(fs[i].FI.FullName);
if (fs[c].SHA512_1st1K == null)
fs[c].SHA512_1st1K = ComputeInitialHash(fs[c].FI.FullName);
if (fs[i].SHA512_1st1K != fs[c].SHA512_1st1K) // if the 1st 1K has the same hash..
continue;
if (fs[i].SHA512_1st1K == null) // if hash error, then skip to next file.
continue;
if (fs[i].FI.Length > 1024) // skip hashing the file again if < 1024 bytes.
{
if (fs[i].SHA512_All == null) // check/hash the rest of the files.
fs[i].SHA512_All = ComputeFullHash(fs[i].FI.FullName);
if (fs[c].SHA512_All == null)
fs[c].SHA512_All = ComputeFullHash(fs[c].FI.FullName);
if (fs[i].SHA512_All != fs[c].SHA512_All)
continue;
if (fs[i].SHA512_All == null) // check for hash fail before declairing a duplicate.
continue;
}
Console.WriteLine(" Match: " + fs[i].FI.FullName);
Console.WriteLine(" with: " + fs[c].FI.FullName);
fs[c].Checked = true; // do not check or match against this file again.
numOfDupes++; // increase count of matches.
bytesInDupes += fs[c].FI.Length; // accumulate number of bytes in duplicates.
if (delete != true) // if delete is specified, try to delete the duplicate file.
continue;
if (noprompt == false)
{
Console.Write("Delete the duplicate file <Y/n>?");
deleteConfirm = Console.ReadLine();
if ((deleteConfirm[0] != 'Y') && (deleteConfirm[0] != 'y'))
continue;
}
try
{
File.Delete(fs[c].FI.FullName);
Console.WriteLine("Deleted: " + fs[c].FI.FullName);
bytesRec += fs[c].FI.Length;
delFiles++;
}
catch (Exception e)
{
Console.WriteLine("File delete error: " + e.Message);
}
}
}
Console.WriteLine(" ");
Console.WriteLine("Files checked: " + fs.Count);
Console.WriteLine("Duplicate files: " + numOfDupes);
Console.WriteLine("Bytes in duplicate files: " + bytesInDupes);
Console.WriteLine("Duplicates deleted: " + delFiles);
Console.WriteLine("Bytes recovered: " + bytesRec);
return;
}
private static readonly byte[] readBuf = new byte[1024];
private static string ComputeInitialHash(string path)
{
try
{
using (var stream = File.OpenRead(path))
{
var length = stream.Read(readBuf, 0, readBuf.Length);
var hash = SHA512.Create().ComputeHash(readBuf, 0, length);
return BitConverter.ToString(hash);
}
}
catch (Exception e)
{
Console.WriteLine("Hash Error: " + e.Message);
return (null);
}
}
private static string ComputeFullHash(string path)
{
try
{
using (var stream = File.OpenRead(path))
{
return BitConverter.ToString(SHA512.Create().ComputeHash(stream));
}
}
catch (Exception e)
{
Console.WriteLine("Hash error: " + e.Message);
return (null);
}
}
}
}