unit mainform;

{
Revision history:

1997 Jan 14  V0.0.0  First version
1997 Jan 16  V0.0.2  Allow timestamp instead of checksum
                     Use inherited file properties dialog box
1997 Jan 19  V1.0.0  Add icon display to file properties dialog box
                     Make timestamp the default method for speed and
                     integrity of file access date
1997 Jan 27  V1.0.2  Remove code timing the components of the compare phase
                     Store directory name separately from file name to save space
1997 Jan 29  V1.0.4  Clear directory list at end of compare phase
}

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
  StdCtrls, TreeScan, ComCtrls, ExtCtrls;

type
  TfrmMain = class(TForm)
    TreeScanner1: TTreeScanner;
    StatusBar1: TStatusBar;
    GroupBox1: TGroupBox;
    lbxDrives: TListBox;
    Label1: TLabel;
    edtFileMask: TEdit;
    Label2: TLabel;
    btnSearch: TButton;
    GroupBox2: TGroupBox;
    lbxFiles: TListBox;
    Label3: TLabel;
    lblDuplicateBytes: TLabel;
    Label4: TLabel;
    lblDuplicateFiles: TLabel;
    edtRootDir: TEdit;
    Label7: TLabel;
    Timer1: TTimer;
    btnExit: TButton;
    chkCheckFileTimestamp: TCheckBox;
    procedure FormCreate(Sender: TObject);
    procedure FormDestroy(Sender: TObject);
    procedure btnCloseClick(Sender: TObject);
    procedure TreeScanner1FileFound(Sender: TObject);
    procedure FormCloseQuery(Sender: TObject; var CanClose: Boolean);
    procedure TreeScanner1DirectoryFound(Sender: TObject);
    procedure btnSearchClick(Sender: TObject);
    procedure lbxFilesDblClick(Sender: TObject);
    procedure Timer1Timer(Sender: TObject);
    procedure btnExitClick(Sender: TObject);
  private
    { Private declarations }
    file_list: TStringList;
    dir_list: TStringList;
    current_dir: integer;  // index of current directory in its list
    comparing: boolean;
    continue_compare: boolean;
    duplicate_bytes: integer;
    duplicate_files: integer;
    progress_bar: TProgressBar;
    procedure find_files_to_compare;
    procedure compare_files;
  public
    { Public declarations }
  end;

var
  frmMain: TfrmMain;

implementation

uses filprops;

{$R *.DFM}
{$R VERSION.RES}

type
  TChecksum = Longint;   // for rapid checksum on the first 512 bytes of a file

const
  same_as = 'same as ';

procedure TfrmMain.FormCreate(Sender: TObject);
begin
  // Create the list to hold the file sizes and names found from the
  // directory scan.  This must be a sorted list to keep same sized
  // files together.
  file_list := TStringList.Create;
  with file_list do
    begin
    Sorted := True;
    Duplicates := dupAccept;
    end;

  dir_list := TStringList.Create;
  dir_list.Sorted := False;

  progress_bar := TProgressBar.Create (Self);
  with progress_bar do
    begin
    Parent := StatusBar1;
    Left := 0;
    Top := 2;
    Width := StatusBar1.Panels[0].Width;
    Height := StatusBar1.Height - 2;
    Visible := False;
    end;

  comparing := False;
  Timer1.Enabled := True;
end;


procedure TfrmMain.FormDestroy(Sender: TObject);
begin
  progress_bar.Free;
  file_list.Free;
  dir_list.Free;
end;


procedure TfrmMain.Timer1Timer(Sender: TObject);
var
  drive_letter: char;      // A..Z
  drive_root: string;      // e.g. A:\
  drive_type: integer;     // fixed, removable etc.
  drive_label: array [0..64] of char;
  dw: DWORD;   // dummy variable, not used
begin
  Timer1.Enabled := False;
  // get listing of all available drives perhaps with the exception of drive A:
  // fill the drive list box with drive letter and volume label
  for drive_letter := 'A' to 'Z' do
    begin
    drive_root := drive_letter + ':\';
    drive_type := GetDriveType (PChar (drive_root));
    FillChar (drive_label, 64, 0);
    GetVolumeInformation (PChar (drive_root), drive_label, 64, nil, dw, dw, nil, 0);
    with lbxDrives.Items do
      begin
      case drive_type of
        DRIVE_FIXED, DRIVE_REMOVABLE,
        DRIVE_REMOTE, DRIVE_CDROM:
          Add (drive_letter + ':  ' + LowerCase (drive_label));
      end;
      // help the user by initially selecting the local fixed disks
      if drive_type = DRIVE_FIXED then
        lbxDrives.Selected [Count-1] := True;
      end;
    end;
end;


procedure TfrmMain.FormCloseQuery(Sender: TObject; var CanClose: Boolean);
// not that we are required to close
begin
  TreeScanner1.Continue := False;
  continue_compare := False;
end;


procedure TfrmMain.btnCloseClick(Sender: TObject);
begin
  Close;
end;


procedure TfrmMain.find_files_to_compare;
var
  i: integer;
  root_dir: string;
  drives: string;
begin
  // Find all the files in the tree that match the specification on all the
  // selected drives.  The files are sorted by size.
  lbxFiles.Clear;
  file_list.Clear;
  dir_list.Clear;
  root_dir := edtRootDir.Text;
  if root_dir = '' then root_dir := '\';
  if edtFileMask.Text = '' then edtFileMask.Text := '*.*';
  StatusBar1.Panels [0].Text := 'Directory scan phase';
  with lbxDrives do
    begin
    drives := '';
    for i := 0 to Items.Count - 1 do
      if Selected [i] then
        drives := drives + ' ' + Copy (Items.Strings [i], 1, 2);
    Caption := 'Find Duplicate Files on' + drives;
    for i := 0 to Items.Count - 1 do
      if Selected [i] then
        begin
        TreeScanner1.InitialDirectory := Copy (Items.Strings [i], 1, 2) + root_dir;
        TreeScanner1.FileMask := edtFileMask.Text;
        TreeScanner1.ScanTree;
        if not continue_compare then Exit;
        end;
    end;

  StatusBar1.Panels [0].Text := '';
  StatusBar1.Panels [1].Text := '';
end;


procedure TfrmMain.TreeScanner1DirectoryFound(Sender: TObject);
begin
  StatusBar1.Panels [1].Text := 'Scanning ' + TreeScanner1.FileFound + '...';
  dir_list.Add (Trim (TreeScanner1.FileFound));
  current_dir := dir_list.Count - 1;
end;


procedure TfrmMain.TreeScanner1FileFound(Sender: TObject);
var
  filename: String;
  f: TSearchRec;
begin
  filename := TreeScanner1.FileFound;
  // if we can find the file, add an entry to the file list with the size as a
  // numeric prefix (to allow sorting) and with the directory as an index into
  // the directory list (store the index as an integer, type-cast to a pointer)
  if FindFirst (filename, faAnyFile, f) = 0 then
    file_list.AddObject (Format ('%10d ', [f.size]) + ExtractFileName (filename),
                         Pointer (current_dir));  // integer type-cast as pointer
  FindClose (f);
end;


procedure TfrmMain.compare_files;
// This is the main procedure to compare all files found from the directory
// scan.  This is sorted by size, so we now look for blocks of files that
// have the same size.  For each of these blocks of multiples, create a list
// with the file initial checksum and the file name and pass that to an
// inner procedure.
//
// Measurements show that the critical routine here (in terms of execution time)
// is the Checksum function, hence its default replacement by a simple
// date and time check.
var
  items_done: integer;
  check_file_timestamp: boolean;   // true for proper checksum comparison

  function files_match (size: integer;  name1, name2: String): boolean;
  type
    PByte = ^byte;
  var
    same: boolean;
    handle1, handle2: THandle;
    mapping1, mapping2: THandle;
    base1, base2: Pointer;
    p1, p2: PByte;
    i: integer;
  begin
    if size = 0 then
      begin
      Inc (duplicate_files);
      Result := True;
      Exit;
      end;

    Result := False;
    handle1 := CreateFile (PChar(name1), GENERIC_READ, FILE_SHARE_READ, nil,
                           OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
    if handle1 <> INVALID_HANDLE_VALUE then
      begin
      handle2 := CreateFile (PChar(name2), GENERIC_READ, FILE_SHARE_READ, nil,
                             OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
      if handle2 <> INVALID_HANDLE_VALUE then
        begin
        mapping1 := CreateFileMapping (handle1, nil, PAGE_READONLY, 0, 0, nil);
        if mapping1 <> 0 then
          begin
          mapping2 := CreateFileMapping (handle2, nil, PAGE_READONLY, 0, 0, nil);
          if mapping2 <> 0 then
            begin
            base1 := MapViewOfFile (mapping1, FILE_MAP_READ, 0, 0, 0);
            if base1 <> nil then
              begin
              base2 := MapViewOfFile (mapping2, FILE_MAP_READ, 0, 0, 0);
              if base2 <> nil then
                begin
                same := True;
                p1 := PByte (base1);
                p2 := PByte (base2);
                i := 0;
                while same and (i < size) do
                  begin
                  same := p1^ = p2^;
                  Inc (i);
                  Inc (p1);
                  Inc (p2)
                  end;
                Result := same;
                if same then
                  begin
                  Inc (duplicate_bytes, size);
                  Inc (duplicate_files);
                  end;
                UnmapViewOfFile (base2);
                end;
              UnmapViewOfFile (base1);
              end;
            CloseHandle (mapping2);
            end;
          CloseHandle (mapping1);
          end;
        CloseHandle (handle2);
        end;
      CloseHandle (handle1);
      end;
  end;

  procedure update_compare_display;
  begin
    lblDuplicateBytes.Caption := Format ('%10.0n', [duplicate_bytes + 0.0]);
    lblDuplicateFiles.Caption := Format ('%10.0n', [duplicate_files + 0.0]);
    progress_bar.Position := items_done;
  end;

  procedure do_multiples (size: integer;  list: TStringList);
  var
    master_filename: string;
    old_filename: string;
    checksum: TChecksum;
    old_checksum: TChecksum;
    in_checksum_group: boolean;
    in_match_group: boolean;
    i: integer;
    s: String;
    space: integer;
    same_checksum: boolean;
    exact_match: boolean;
  begin
    old_filename := '';
    old_checksum := $12345678;
    in_checksum_group := False;
    in_match_group := False;
    for i := 0 to list.Count - 1 do
      begin
      s := Trim (list.Strings [i]);
      space := Pos (' ', s);
      checksum := StrToInt (Copy (s, 1, space-1));
      s := Trim (Copy (s, space, 999));
      same_checksum := checksum = old_checksum;

      if same_checksum and (not in_checksum_group)
        then master_filename := old_filename;

      old_checksum := checksum;
      old_filename := s;

      if same_checksum
        then exact_match := files_match (size, master_filename, s)
        else exact_match := False;

      in_checksum_group := same_checksum;

      if (not in_match_group) and exact_match then
        with lbxFiles.Items do
        begin
        Add (' ');
        Add (IntToStr (size) + ' byte files ....');
        Add ('              ' + master_filename);
        end;
      in_match_group := exact_match;

      if in_checksum_group and exact_match then
        with lbxFiles.Items do Add (same_as + s);
      end;

    update_compare_display;
  end;

  function do_checksum (size: integer;  filename: String): TChecksum;
  const
    bytes_per_sector = 512;
    max_dwords_in_checksum = bytes_per_sector div SizeOf (TChecksum);
  var
    checksum: TChecksum;
    handle: THandle;
    mapping: THandle;
    base: Pointer;
    dwords_to_checksum: integer;
    p: ^DWORD;
    dw: DWORD;
    search: TSearchRec;
  begin
    checksum := 0;

    if check_file_timestamp
    then
      begin
      if FindFirst (filename, faAnyFile, search)  = 0 then
        checksum := search.Time;
      end
    else
      begin
      handle := CreateFile (PChar(filename), GENERIC_READ, FILE_SHARE_READ, nil,
                           OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
      if handle <> INVALID_HANDLE_VALUE then
        begin
        mapping := CreateFileMapping (handle, nil, PAGE_READONLY, 0, 0, nil);
        if mapping <> 0 then
          begin
          base := MapViewOfFile (mapping, FILE_MAP_READ, 0, 0, 0);
          if base <> nil then
            begin
            p := base;
            dwords_to_checksum := size div SizeOf (DWORD);
            if dwords_to_checksum > max_dwords_in_checksum then
              dwords_to_checksum := max_dwords_in_checksum;
            for dw := 1 to dwords_to_checksum do
              begin
              checksum := checksum + p^;
              Inc (p);
              end;
            UnmapViewOfFile (base);
            end;
          CloseHandle (mapping);
          end;
        CloseHandle (handle);
        end;
      end;
    Result := checksum;
  end;

var
  current_size: integer;
  old_size: integer;
  old_filename: String;
  current_filename: String;
  item: integer;
  s: String;
  in_multiple: boolean;
  same_size: boolean;
  multiples_list: TStringList;
  chk: TChecksum;
  space: integer;
begin
  // scan up the tree finding all the files of duplicate size
  comparing := True;
  duplicate_bytes := 0;
  duplicate_files := 0;
  old_size := -1;
  old_filename := '';
  in_multiple := False;
  multiples_list := TStringList.Create;
  with multiples_list do
    begin
    Sorted := True;
    Duplicates := dupAccept;
    end;

  progress_bar.Max := file_list.Count - 1;
  progress_bar.Visible := True;
  items_done := 0;

  check_file_timestamp := chkCheckFileTimestamp.Checked;

  lbxFiles.Clear;
  // use a WHILE loop to scan the list rather than a DO
  // to allow for the comparison to be interrupted
  item := file_list.Count - 1;
  while (item >= 0) and continue_compare do
    begin
    Application.ProcessMessages;             // look for any button press
    s := Trim (file_list.Strings [item]);    // get the current size and filename
    space := Pos (' ', s);                   // split into components
    // retrieve the directory name from the stored list, the index into the
    // list is given by the integer, stored as a pointer in the Object field
    current_filename := dir_list.Strings [Integer (file_list.Objects [item])] +
                        Trim (Copy (s, space, 999));  // add the file name
    Delete (s, space, 999);
    current_size := StrToInt (s);            // the file size

    same_size := current_size = old_size;    // same as before ?
    if same_size and (not in_multiple) then  // start of a new set of files
      begin
      multiples_list.Clear;                  // clear out any existing stuff
      chk := do_checksum (old_size, old_filename);     // get the checksum and add to list
      multiples_list.Add (Format ('%12d ', [chk]) + ' ' + old_filename);
      // update the user on the progress of the comparison
      StatusBar1.Panels [1].Text := 'Comparing ' +
          Format ('%.0n', [current_size + 0.0]) + ' byte files ...';
      StatusBar1.Update;
      end;

    in_multiple := same_size;
    if in_multiple
    then
      // if in a list checksum this file and add it to the list
      begin
      chk := do_checksum (current_size, current_filename);
      multiples_list.Add (Format ('%12d ', [chk]) + ' ' + current_filename);
      end
    else
      // if not in a list, process any existing list and reset it by clearing out
      begin
      if multiples_list.Count <> 0 then
        begin
        do_multiples (old_size, multiples_list);
        multiples_list.Clear;
        end;
      end;

    old_size := current_size;
    old_filename := current_filename;
    Inc (items_done);
    file_list.Delete (item);       // give the memory back for this item
    Dec (item);
    end;

  // we might get here and still have a list of multiples pending
  // so process it, but there's no point in clearing out the list
  if multiples_list.Count <> 0 then
    do_multiples (old_size, multiples_list);

  // try and make the status text reflect the most recent user command
  if TreeScanner1.Continue
    then StatusBar1.Panels [1].Text := 'Scan complete'
    else StatusBar1.Panels [1].Text := 'Scan interrupted';
  if not continue_compare
    then StatusBar1.Panels [1].Text := 'Comparison interrupted';

  // clear up after this procedure
  progress_bar.Visible := False;
  progress_bar.Position := 0;
  multiples_list.Free;
  file_list.Clear;
  dir_list.Clear;
  comparing := False;
  btnSearch.Caption := 'Start Search';
end;

procedure TfrmMain.btnSearchClick(Sender: TObject);
begin
  if TreeScanner1.SearchInProgress
  then
    TreeScanner1.Continue := False
  else
    if comparing
    then
      continue_compare := False
    else
      begin
      btnSearch.Caption := 'Stop';
      continue_compare := True;
      find_files_to_compare;
      compare_files;
      end;
end;

procedure TfrmMain.lbxFilesDblClick(Sender: TObject);
var
  filename: string;
begin
  with lbxFiles do
    begin
    if SelCount = 0 then Exit;
    filename := Trim (Items.Strings [ItemIndex]);
    if Length (filename) = 0 then Exit;
    if filename [1] in ['0'..'9'] then Exit;
    if Pos (same_as, filename) = 1 then Delete (filename, 1, Length (same_as));
    frmFileProperties1.filename := filename;
    frmFileProperties1.ShowModal;
    end;
end;

procedure TfrmMain.btnExitClick(Sender: TObject);
begin
  Close;
end;

end.

