PoshCode Archive  Artifact [902ff640d5]

Artifact 902ff640d54e7fa86b475dbce629d401cf0138646423cd44583f53b293c6cf21:

  • File Findup.ps1 — part of check-in [059a09a112] at 2018-06-10 13:24:10 on branch trunk — Findup – Find duplicates C# version. Compares files sizes and SHA512 hashes to identify duplicates. New regex Include/Exclude feature. Should be compiled with Visual Studio 11 (beta as of now), as older Visual Studio C# compilers seem to have a bug that causes crashes on long file names. (user: James Gentile size: 18537)

# encoding: ascii
# api: csharp
# title: Findup
# description: Findup – Find duplicates C# version. Compares files sizes and SHA512 hashes to identify duplicates. New regex Include/Exclude feature. Should be compiled with Visual Studio 11 (beta as of now), as older Visual Studio C# compilers seem to have a bug that causes crashes on long file names.
# version: 2.0
# type: class
# author: James Gentile
# license: CC0
# x-poshcode-id: 3342
# x-derived-from-id: 3349
# x-archived: 2012-04-14T01:43:44
# x-published: 2012-04-11T14:02:00
#
#
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Security.Cryptography;
using System.Runtime.InteropServices;
using Microsoft.Win32;
using System.IO;
using System.Text.RegularExpressions;



namespace Findup
{
    public class FileLengthComparer : IComparer<FileInfo>
    {
        public int Compare(FileInfo x, FileInfo y)
        {
            return (x.Length.CompareTo(y.Length));
        }
    }
    
    class Findup
    {                  
        public static Dictionary<string, List<string>> optionspaths = new Dictionary<string, List<string>>
            { {"-x", new List<string>()},{"-i",new List<string>()},{"-xf",new List<string>()},{"-if",new List<string>()},
            {"-xd",new List<string>()},{"-id",new List<string>()},{"-paths",new List<string>()} };
        public static Dictionary<string, Boolean> optionsbools = new Dictionary<string, bool> { { "-recurse", false }, { "-regex", false }, { "-noerr", false }, {"-delete",false} };
        public static long numOfDupes = 0;                                 // number of duplicate files found.
        public static long dupeBytes = 0;                                  // number of bytes in duplicates.
        public static long bytesrecovered = 0;                             // Bytes recovered from deleting dupes.
        public static long deletedfiles = 0;                               // Number of deleted dupes.
        public static string delconfirm;        

        public static void Main(string[] args)
        {            
            Console.WriteLine("Findup.exe v2.0 - By James Gentile - JamesRaymondGentile@gmail.com - 2012.");
            Console.WriteLine(" ");                        
            string optionspathskey = "-paths";        
            List<FileInfo> files = new List<FileInfo>();            
            int i = 0;            

            for (i = 0; i < args.Length; i++)
            {
                string argitem=args[i].ToLower();

                if ((System.String.Compare(argitem, "-help", true) == 0) || (System.String.Compare(argitem, "-h", true) == 0))
                {
                    Console.WriteLine("Usage:    findup.exe <file/directory #1..#N> [-recurse] [-noerr] [-x/-i/-xd/-id/-xf/-if] <files/directories/regex> [-regex] [-delete]");
                    Console.WriteLine(" ");
                    Console.WriteLine("Options:  -help     - displays this help message.");
                    Console.WriteLine("          -recurse  - recurses through subdirectories when directories or file specifications (e.g. *.txt) are specified.");                    
                    Console.WriteLine("          -noerr    - discards error messages.");
                    Console.WriteLine("          -delete   - delete each duplicate file with confirmation.");
                    Console.WriteLine("          -x        - eXcludes if full file path starts with (or RegEx matches if -regex supplied) one of the items following this switch until another switch is used.");
                    Console.WriteLine("          -i        - include if full file path starts with (or Regex matches if -regex supplied) one of the items following this switch until another switch is used.");
                    Console.WriteLine("          -xd       - eXcludes all directories (using RegEx if -regex supplied) including subdirs following this switch until another switch is used.");
                    Console.WriteLine("          -id       - Include only directories (using RegEx if -regex supplied) including subdirs following this switch until another switch is used.");
                    Console.WriteLine("          -xf       - eXcludes all files (using RegEx if -regex supplied) following this switch until another switch is used.");
                    Console.WriteLine("          -if       - Include only files (using RegEx if -regex supplied) following this switch until another switch is used.");
                    Console.WriteLine("          -regex    - Use Regex notation for exclude (-x) and include (-i) option.");
                    Console.WriteLine("          -paths    - not needed unless you want to specify files/dirs after an include/exclude without using another non-exclude/non-include option.");
                    Console.WriteLine(" ");
                    Console.WriteLine("Examples: findup.exe c:\\finances -recurse -noerr");
                    Console.WriteLine("                     - Find dupes in c:\\finance.");
                    Console.WriteLine("                     - recurse all subdirs of c:\\finance.");
                    Console.WriteLine("                     - suppress error messages.");
                    Console.WriteLine("          findup.exe c:\\users\\alice\\plan.txt d:\\data -recurse -x d:\\data\\webpics");
                    Console.WriteLine("                     - Find dupes in c:\\users\\alice\\plan.txt, d:\\data");
                    Console.WriteLine("                     - recurse subdirs in d:\\data.");
                    Console.WriteLine("                     - exclude any files in d:\\data\\webpics and subdirs.");
                    Console.WriteLine("          findup.exe c:\\data *.txt c:\\reports\\quarter.doc -x \"(jim)\" -regex");
                    Console.WriteLine("                     - Find dupes in c:\\data, *.txt in current directory and c:\\reports\\quarter.doc");
                    Console.WriteLine("                     - exclude any file with 'jim' in the name as specified by the Regex item \"(jim)\"");
                    Console.WriteLine("          findup.exe c:\\data *.txt c:\\reports\\bobsmithquarter.doc -x \"[bf]\" -i \"(smith)\" -regex");
                    Console.WriteLine("                     - Find dupes in c:\\data, *.txt in current directory and c:\\reports\\bobsmithquarter.doc");
                    Console.WriteLine("                     - Include only files with 'smith' and exclude any file with letters b or f in the name as specified by the Regex items \"[bf]\",\"(smith)\"");
                    Console.WriteLine("Note:     Exclude takes precedence over Include.");
                    Console.WriteLine("          -xd,-id,-xf,-if are useful if for instance you want to apply a RegEx to only file names but not directory names or vice versa.");
                    Console.WriteLine("          if for instance you wanted all files that contained the letter \"d\" on your D: drive but didn't want the d:\\ to cause all files on the d:\\ ");
                    Console.WriteLine("          drive to be included, you would specify:");
                    Console.WriteLine("          findup.exe d:\\ -recurse -noerr -regex -if \"[d]\"  ");
                    return;
                }
                if (optionsbools.ContainsKey(argitem))
                {
                    optionsbools[argitem] = true;
                    optionspathskey = "-paths";
                    continue;
                }                
                if (optionspaths.ContainsKey(argitem))
                {
                    optionspathskey = argitem;
                    continue;
                }                
                optionspaths[optionspathskey].Add(argitem);                                    
            }
            if (optionspaths["-paths"].Count == 0)
            {
                WriteErr("No files, file specifications, or directories specified. Try findup.exe -help. Assuming current directory.");
                optionspaths["-paths"].Add(".");
            }
            Console.Write("Getting file info and sorting file list...");
            getFiles(optionspaths["-paths"], "*.*", optionsbools["-recurse"], files);
                         
            if (files.Count < 2)
            {
                WriteErr("\nFindup.exe needs at least 2 files to compare. Try findup.exe -help");
                return;
            }

            files.Sort(new FileLengthComparer());
            Console.WriteLine("Completed!");  

            Console.Write("Building dictionary of file sizes, SHA512 hashes and full paths...");

            var SizeHashName = new Dictionary<long, Dictionary<string,List<string>>>();            

            for (i = 0; i < (files.Count - 1); i++)
            {
                if (files[i].Length != files[i + 1].Length) { continue; }
                var breakout = false;
                while (true)
                {
                    var _SHA512 = SHA512.Create();
                    try
                    {                       
                        using (var fstream = File.OpenRead(files[i].FullName))
                        {                    
                            _SHA512.ComputeHash(fstream);
                        }
                        System.Text.Encoding enc = System.Text.Encoding.ASCII;
                        string SHA512string = enc.GetString(_SHA512.Hash);

                        if (!SizeHashName.ContainsKey(files[i].Length))                        
                            SizeHashName.Add(files[i].Length, new Dictionary<string,List<string>>());                                             
                        if (!SizeHashName[files[i].Length].ContainsKey(SHA512string))                        
                            SizeHashName[files[i].Length][SHA512string] = new List<string>() {};                        
                        SizeHashName[files[i].Length][SHA512string].Add(files[i].FullName);
                    }
                    catch (Exception e)
                    {
                        WriteErr("Hash error: " + e.Message);                     
                    }

                    if (breakout == true) {break;}
                    i++;
                    if (i == (files.Count - 1)) { breakout = true; continue; }
                    breakout = (files[i].Length != files[i + 1].Length);
                }
            }

            Console.WriteLine("Completed!");

            foreach (var SizeGroup in SizeHashName)
            {
                foreach (var HashGroup in SizeGroup.Value)
                {
                    var SGK = (long)SizeGroup.Key;
                    var HGVC = (int)HashGroup.Value.Count;
                    if (HGVC > 1)
                    {                        
                        Console.WriteLine("{0:N0} Duplicate files. {1:N0} Bytes each. {2:N0} Bytes total : ", HGVC, SGK, SGK*HGVC);
                        foreach (var FileName in HashGroup.Value)
                        {
                            Console.WriteLine(FileName);
                            numOfDupes++;
                            if (optionsbools["-delete"])
                                if (DeleteDupe(FileName)) { bytesrecovered += SGK; }
                        }
                        dupeBytes += SGK * HGVC;
                    }
                }                
            }

            Console.WriteLine("\n ");
            Console.WriteLine("Files checked      : {0:N0}", files.Count);
            Console.WriteLine("Duplicate files    : {0:N0}", numOfDupes);
            Console.WriteLine("Duplicate bytes    : {0:N0}", dupeBytes);
            Console.WriteLine("Deleted duplicates : {0:N0}", deletedfiles);
            Console.WriteLine("Bytes recovered    : {0:N0}", bytesrecovered);
            return;                                                                             // Exit after displaying statistics.
        }               

        private static void WriteErr(string Str)
        {
            if (!optionsbools["-noerr"])
                Console.WriteLine(Str);
        }

        private static Boolean DeleteDupe(string FileName)
        {
            Console.Write("Delete this file <y/N> <ENTER>?");
            delconfirm = Console.ReadLine();
            if ((delconfirm[0] == 'Y') || (delconfirm[0] == 'y'))
            {
                try
                {
                    File.Delete(FileName);
                    Console.WriteLine("File Successfully deleted.");                    
                    deletedfiles++;
                    return true;
                }
                catch (Exception e)
                {
                    Console.WriteLine("File could not be deleted: " + e.Message);                    
                }
            }
            return false;
        }


        private static Boolean CheckAll(string full)
        {
            if (!CheckWorker(full, optionspaths["-x"]))
                return false;
            if ((optionspaths["-i"].Count > 0) == CheckWorker(full, optionspaths["-i"]))
                return false;

            var filePart = Path.GetFileName(full);
            var dirPart = Path.GetDirectoryName(full);

            if (!CheckWorker(filePart, optionspaths["-xf"]))
                return false;
            if (!CheckWorker(dirPart, optionspaths["-xd"]))
                return false;            
            if ((optionspaths["-if"].Count > 0) == CheckWorker(filePart, optionspaths["-if"]))
                return false;
            if ((optionspaths["-id"].Count > 0) == CheckWorker(dirPart, optionspaths["-id"]))
                return false;
            return true;
        }

        private static Boolean CheckWorker(string full, List<string> pathsitems)
        {
            foreach (var x in pathsitems)
            {
                if (optionsbools["-regex"])
                {
                    try
                    {
                        Regex rgx = new Regex(x);
                        if (rgx.IsMatch(full))
                            return false;
                    }
                    catch (Exception e)
                    {
                        WriteErr("Invalid regex used: " + x + " exception: " + e);
                    }
                }
                else { if (full.ToLower().StartsWith(x)) { return false; } }
            }
            return true;
        }        
        private static void getFiles(List<string> pathRec, string searchPattern, Boolean recursiveFlag, List<FileInfo> returnList)
        {

            foreach (string d in pathRec)
            {
                getFiles(d, searchPattern, recursiveFlag, returnList);
            }
            return;
        }

        private static void getFiles(string[] pathRec, string searchPattern, Boolean recursiveFlag, List<FileInfo> returnList)
        {

            foreach (string d in pathRec)
            {
                getFiles(d, searchPattern, recursiveFlag, returnList);
            }
            return;
        }

        private static void getFiles(string pathRec, string searchPattern, Boolean recursiveFlag, List<FileInfo> returnList)
        {

            string dirPart;
            string filePart;

            if (File.Exists(pathRec))
            {
                try
                {
                    FileInfo addf = (new FileInfo(pathRec));
                    if (((addf.Attributes & FileAttributes.ReparsePoint) == 0) && CheckAll(addf.FullName))
                        returnList.Add(addf);
                }
                catch (Exception e)
                {
                    WriteErr("Add file error: " + e.Message);
                }                
            }
            else if (Directory.Exists(pathRec))
            {
                try
                {
                    DirectoryInfo Dir = new DirectoryInfo(pathRec);
                    foreach (FileInfo addf in (Dir.GetFiles(searchPattern)))
                    {
                        if (((addf.Attributes & FileAttributes.ReparsePoint) == 0) && CheckAll(addf.FullName))
                            returnList.Add(addf);
                    }
                }
                catch (Exception e)
                {
                    WriteErr("Add files from Directory error: " + e.Message);
                }

                if (recursiveFlag)
                {
                    try
                    {
                            getFiles((Directory.GetDirectories(pathRec)), searchPattern, recursiveFlag, returnList);
                    }
                    catch (Exception e)
                    {
                        WriteErr("Add Directory error: " + e.Message);
                    }
                }                
            }
            else
            {
                try
                {
                    filePart = Path.GetFileName(pathRec);
                    dirPart = Path.GetDirectoryName(pathRec);
                }
                catch (Exception e)
                {
                    WriteErr("Parse error on: " + pathRec);
                    WriteErr(@"Make sure you don't end a directory with a \ when using quotes. The console arg parser doesn't accept that.");
                    WriteErr("Exception: " + e.Message);
                    return;
                }

                if (filePart.IndexOfAny(new char[] {'?','*'}) >= 0)
                {
                    if ((dirPart == null) || (dirPart == ""))
                        dirPart = Directory.GetCurrentDirectory();
                    if (Directory.Exists(dirPart))
                    {
                        getFiles(dirPart, filePart, recursiveFlag, returnList);
                        return;
                    }
                }
                WriteErr("Invalid file path, directory path, file specification, or program option specified: " + pathRec);                                                        
            }            
        }
    }
}