Skip to content

Instantly share code, notes, and snippets.

@deanebarker
Created September 13, 2024 20:25
Show Gist options
  • Save deanebarker/d7213a037f24b6038bcca8ccca5b58ba to your computer and use it in GitHub Desktop.
Save deanebarker/d7213a037f24b6038bcca8ccca5b58ba to your computer and use it in GitHub Desktop.
C# code to do a one-way file sync between a source and a target directory
void Main()
{
var sm = new FileSystemSyncManager(
source: @"C:\Users\deane\Dropbox\_sites\dbn\content",
target: @"C:\Users\deane\Dropbox\Desktop\sync-test\target"
);
// This archives a file right before it's overwritten
sm.BeforeOverwriteExistingFile = (source, target) => {
versionTargetFile(target, sm.TargetFiles.Path);
return true;
};
// This archives a file right before it's deleted
sm.BeforeDeleteFile = (fileToDelete) => {
versionTargetFile(fileToDelete, sm.TargetFiles.Path);
return true;
};
sm.Sync();
File.WriteAllText(@$"C:\Users\deane\Dropbox\Desktop\sync-test\logs\{DateTime.Now:yyyy-MM-dd-hh-mm-ss}.txt", JsonSerializer.Serialize(sm.Log, new JsonSerializerOptions() { WriteIndented = true, PropertyNamingPolicy = System.Text.Json.JsonNamingPolicy.CamelCase }));
Console.WriteLine(sm.Log);
// This copies the target file to an archive folder before it disappears
// The folder will retain the same LastWriteTime which is when it was
// last changed in source
void versionTargetFile(string path, string targetDirectoryPath)
{
if (File.Exists(path))
{
var relativePath = new String(path.RemoveMatchFromStart(targetDirectoryPath).ToArray());
var versionFileName = string.Concat(
DateTime.Now.ToString("yyyy-MM-dd-hh-mm-ss"),
"-",
relativePath.Replace("/", ".").Replace(@"\", ".").TrimStart('.')
);
File.Copy(path, Path.Combine(@"C:\Users\deane\Dropbox\Desktop\sync-test\archives", versionFileName));
}
}
}
// QUESTION: Do we delete empty directories? Yes... ?
// QUESTION: Are we syncing directories too, or just files in directories? As of right now, we only sync files, so empty directores are invisible to us...
public class FileSystemSyncManager
{
public List<LogEntry> Log { get; private set; } = new();
public FileRefCollection SourceFiles { get; set; }
public FileRefCollection TargetFiles { get; set; }
// Return false from any of these to cancel the operation
public Func<string, string, bool> BeforeWriteNewFile = (s, t) => { return true; };
public Func<string, string, bool> BeforeOverwriteExistingFile = (s,t) => { return true; };
public Func<string, bool> BeforeDeleteFile = (s) => { return true; };
public FileSystemSyncManager(string source, string target)
{
if (source == null) throw new NullReferenceException($"{nameof(source)} cannot be null");
if (target == null) throw new NullReferenceException($"{nameof(target)} cannot be null");
if (!Directory.Exists(source)) throw new InvalidOperationException($"Source directory \"{source}\" does not exist");
if (!Directory.Exists(target)) throw new InvalidOperationException($"Target directory \"{target}\" does not exist");
SourceFiles = new FileRefCollection(source);
TargetFiles = new FileRefCollection(target);
}
public void Sync()
{
// When examining the source, we compare both location and content, because we want to know
// 1. What files don't exist in target? (location)
// 2. What files have different content in target? (content)
var inSourceNotTarget = SourceFiles.ByLocationAndContent.Keys.Except(TargetFiles.ByLocationAndContent.Keys).Select(f => SourceFiles.ByLocationAndContent[f].RelativePath);
inSourceNotTarget.ToList().ForEach(f =>
{
var sourcePath = Path.Combine(SourceFiles.Path, f);
var targetPath = Path.Combine(TargetFiles.Path, f);
var proceed = false;
var operation = string.Empty;
if(File.Exists(targetPath))
{
// We're overwriting an existing file
proceed = BeforeOverwriteExistingFile(sourcePath, targetPath);
operation = LogEntry.OverwriteFileLogLabel;
}
else
{
// We're copying a new file
proceed = BeforeWriteNewFile(sourcePath, targetPath);
operation = LogEntry.CopyFileLogLabel;
}
if(proceed)
{
var targetDirectory = Path.GetDirectoryName(targetPath);
if (!Directory.Exists(targetDirectory))
{
Directory.CreateDirectory(Path.GetDirectoryName(targetPath));
Log.Add(new(LogEntry.CreateDirectoryLogLabel, targetPath));
}
File.Copy(sourcePath, targetPath, true);
Log.Add(new(operation, targetPath));
}
});
// When examining the target, we just compare location, because we don't care about content changes (there shouldn't be any for files from source)
var inTargetNotSource = TargetFiles.ByLocation.Keys.Except(SourceFiles.ByLocation.Keys).Select(f => TargetFiles.ByLocation[f].RelativePath);
inTargetNotSource.ToList().ForEach(f =>
{
var targetPath = Path.Combine(TargetFiles.Path, f);
if (BeforeDeleteFile(targetPath))
{
File.Delete(targetPath);
Log.Add(new(LogEntry.DeleteFileLogLabel, targetPath));
}
});
// Delete empty directories
// We order them in the reverse order of "depth" in the hopes that we delete children before parents
var directoriesByDepth = new DirectoryInfo(TargetFiles.Path).GetDirectories(("*"), new EnumerationOptions() { RecurseSubdirectories = true })
.Select(d => d.FullName)
.OrderByDescending(d => d);
directoriesByDepth.ToList().ForEach(d =>
{
if (!new DirectoryInfo(d).GetFiles().Any() && !new DirectoryInfo(d).GetDirectories().Any())
{
Directory.Delete(d);
Log.Add(new(LogEntry.DeleteDirectoryLogLabel, d));
}
});
}
}
// This represents all the files (recursively) in a given directory path
public class FileRefCollection
{
public string Path { get; init; }
public Dictionary<string, FileRef> ByLocation { get; init; } = new();
public Dictionary<string, FileRef> ByLocationAndContent { get; init; } = new();
public FileRefCollection(string path)
{
Path = path;
FileSystemUtils.GetAllFiles(path)
.Select(f => new FileRef(f, path))
.ToList()
.ForEach(f =>
{
// We "index" the files by both (1) just location, and (2) location and content
ByLocation[f.RelativePath] = f;
ByLocationAndContent[f.Hash] = f;
});
}
// This is a consolidated hash of all the individual file hashes
// If the contents or location of any file changes, this will also change
// If this is equal to the hash of another FileRefCollection, then those file system structures are identical
public string Hash => string.Join(Environment.NewLine, ByLocation.OrderBy(f => f.Value.RelativePath).Select(f => f.Value.Hash));
}
public class FileRef
{
// The "content signature" is simply a string that will change if the content of the file change
// How you decide this is up to you
// This is exposed as a static property so you can re-implement it if you want
// Example: you could create a hash of the actual bytes of the file
public static Func<FileRef, string> GetContentSignature = (f) => { return f.File.LastWriteTime.ToFileTimeUtc().ToString(); };
// The path relative to the source or target directory
public string RelativePath { get; set; }
public FileInfo File { get; init; }
public FileRef(FileInfo fileInfo, string relativeTo)
{
File = fileInfo; // Saved so you can use it in the GetContentSignature method if you want it
relativeTo = NormalizePath(relativeTo);
var filePath = NormalizePath(fileInfo.FullName);
var filePathWithoutRelativeDirectory = string.Join('\\', filePath.Split('\\').RemoveMatchFromStart(relativeTo.ToLower().Split('\\')));
RelativePath = filePathWithoutRelativeDirectory;
}
private string NormalizePath(string path)
{
return path.ToLower().Replace('/', '\\');
}
public string Hash => string.Concat(RelativePath, GetContentSignature(this));
}
public record LogEntry
{
public static string OverwriteFileLogLabel = "overwrite-file";
public static string CopyFileLogLabel = "copy-file";
public static string DeleteFileLogLabel = "delete-file";
public static string DeleteDirectoryLogLabel = "delete-directory";
public static string CreateDirectoryLogLabel = "create-directory";
// What we did
public string Operation { get; init; }
// The file we did it on
public string Path { get; init; }
// When we did it
public DateTime Timestamp { get; init; }
public LogEntry(string operation, string path)
{
Timestamp = DateTime.Now;
Operation = operation;
Path = path;
}
public override string ToString()
{
return $"{Timestamp}: {Operation}: {Path}";
}
}
// These are from my DeaneBarker.Utils library
public static class LinqQueryExtensions
{
/// <summary>
/// Takes a small enumerable and removes everything from the start of the source that matches.
/// </summary>
/// <example>
/// new[] { 1,2,3,4 }.ToList().RemoveMatchFromStart(new[] {1,2}); // Result is 3,4; 1,2 were removed from the start
/// </example>
/// <typeparam name="T"></typeparam>
/// <param name="source"></param>
/// <param name="prefix"></param>
/// <returns></returns>
public static IEnumerable<T> RemoveMatchFromStart<T>(this IEnumerable<T> source, IEnumerable<T> prefix)
{
if (prefix.Count() > source.Count())
{
// If the prefix is longer that the source, then it will negate ALL members by definition
return Enumerable.Empty<T>();
}
if (prefix.Count() == 0)
{
// There's nothing to check
return source;
}
var sourceList = source.ToList();
var prefixList = prefix.ToList();
var i = 0;
for (; i < prefix.Count(); i++)
{
if (!sourceList[i].Equals(prefixList[i]))
{
break;
}
}
return sourceList.Skip(i);
}
}
public static class FileSystemUtils
{
/// <summary>
/// Returns every file, recursively, from the suppled path down
/// </summary>
/// <param name="path"></param>
/// <param name="searchPattern"></param>
/// <returns></returns>
public static List<FileInfo> GetAllFiles(string path, string searchPattern = "*.*")
{
return new DirectoryInfo(path).GetFiles(searchPattern, SearchOption.AllDirectories).ToList();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment