Created
September 13, 2024 20:25
-
-
Save deanebarker/d7213a037f24b6038bcca8ccca5b58ba to your computer and use it in GitHub Desktop.
C# code to do a one-way file sync between a source and a target directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void Main() | |
{ | |
var sm = new FileSystemSyncManager( | |
source: @"C:\Users\deane\Dropbox\_sites\dbn\content", | |
target: @"C:\Users\deane\Dropbox\Desktop\sync-test\target" | |
); | |
// This archives a file right before it's overwritten | |
sm.BeforeOverwriteExistingFile = (source, target) => { | |
versionTargetFile(target, sm.TargetFiles.Path); | |
return true; | |
}; | |
// This archives a file right before it's deleted | |
sm.BeforeDeleteFile = (fileToDelete) => { | |
versionTargetFile(fileToDelete, sm.TargetFiles.Path); | |
return true; | |
}; | |
sm.Sync(); | |
File.WriteAllText(@$"C:\Users\deane\Dropbox\Desktop\sync-test\logs\{DateTime.Now:yyyy-MM-dd-hh-mm-ss}.txt", JsonSerializer.Serialize(sm.Log, new JsonSerializerOptions() { WriteIndented = true, PropertyNamingPolicy = System.Text.Json.JsonNamingPolicy.CamelCase })); | |
Console.WriteLine(sm.Log); | |
// This copies the target file to an archive folder before it disappears | |
// The folder will retain the same LastWriteTime which is when it was | |
// last changed in source | |
void versionTargetFile(string path, string targetDirectoryPath) | |
{ | |
if (File.Exists(path)) | |
{ | |
var relativePath = new String(path.RemoveMatchFromStart(targetDirectoryPath).ToArray()); | |
var versionFileName = string.Concat( | |
DateTime.Now.ToString("yyyy-MM-dd-hh-mm-ss"), | |
"-", | |
relativePath.Replace("/", ".").Replace(@"\", ".").TrimStart('.') | |
); | |
File.Copy(path, Path.Combine(@"C:\Users\deane\Dropbox\Desktop\sync-test\archives", versionFileName)); | |
} | |
} | |
} | |
// QUESTION: Do we delete empty directories? Yes... ? | |
// QUESTION: Are we syncing directories too, or just files in directories? As of right now, we only sync files, so empty directores are invisible to us... | |
public class FileSystemSyncManager | |
{ | |
public List<LogEntry> Log { get; private set; } = new(); | |
public FileRefCollection SourceFiles { get; set; } | |
public FileRefCollection TargetFiles { get; set; } | |
// Return false from any of these to cancel the operation | |
public Func<string, string, bool> BeforeWriteNewFile = (s, t) => { return true; }; | |
public Func<string, string, bool> BeforeOverwriteExistingFile = (s,t) => { return true; }; | |
public Func<string, bool> BeforeDeleteFile = (s) => { return true; }; | |
public FileSystemSyncManager(string source, string target) | |
{ | |
if (source == null) throw new NullReferenceException($"{nameof(source)} cannot be null"); | |
if (target == null) throw new NullReferenceException($"{nameof(target)} cannot be null"); | |
if (!Directory.Exists(source)) throw new InvalidOperationException($"Source directory \"{source}\" does not exist"); | |
if (!Directory.Exists(target)) throw new InvalidOperationException($"Target directory \"{target}\" does not exist"); | |
SourceFiles = new FileRefCollection(source); | |
TargetFiles = new FileRefCollection(target); | |
} | |
public void Sync() | |
{ | |
// When examining the source, we compare both location and content, because we want to know | |
// 1. What files don't exist in target? (location) | |
// 2. What files have different content in target? (content) | |
var inSourceNotTarget = SourceFiles.ByLocationAndContent.Keys.Except(TargetFiles.ByLocationAndContent.Keys).Select(f => SourceFiles.ByLocationAndContent[f].RelativePath); | |
inSourceNotTarget.ToList().ForEach(f => | |
{ | |
var sourcePath = Path.Combine(SourceFiles.Path, f); | |
var targetPath = Path.Combine(TargetFiles.Path, f); | |
var proceed = false; | |
var operation = string.Empty; | |
if(File.Exists(targetPath)) | |
{ | |
// We're overwriting an existing file | |
proceed = BeforeOverwriteExistingFile(sourcePath, targetPath); | |
operation = LogEntry.OverwriteFileLogLabel; | |
} | |
else | |
{ | |
// We're copying a new file | |
proceed = BeforeWriteNewFile(sourcePath, targetPath); | |
operation = LogEntry.CopyFileLogLabel; | |
} | |
if(proceed) | |
{ | |
var targetDirectory = Path.GetDirectoryName(targetPath); | |
if (!Directory.Exists(targetDirectory)) | |
{ | |
Directory.CreateDirectory(Path.GetDirectoryName(targetPath)); | |
Log.Add(new(LogEntry.CreateDirectoryLogLabel, targetPath)); | |
} | |
File.Copy(sourcePath, targetPath, true); | |
Log.Add(new(operation, targetPath)); | |
} | |
}); | |
// When examining the target, we just compare location, because we don't care about content changes (there shouldn't be any for files from source) | |
var inTargetNotSource = TargetFiles.ByLocation.Keys.Except(SourceFiles.ByLocation.Keys).Select(f => TargetFiles.ByLocation[f].RelativePath); | |
inTargetNotSource.ToList().ForEach(f => | |
{ | |
var targetPath = Path.Combine(TargetFiles.Path, f); | |
if (BeforeDeleteFile(targetPath)) | |
{ | |
File.Delete(targetPath); | |
Log.Add(new(LogEntry.DeleteFileLogLabel, targetPath)); | |
} | |
}); | |
// Delete empty directories | |
// We order them in the reverse order of "depth" in the hopes that we delete children before parents | |
var directoriesByDepth = new DirectoryInfo(TargetFiles.Path).GetDirectories(("*"), new EnumerationOptions() { RecurseSubdirectories = true }) | |
.Select(d => d.FullName) | |
.OrderByDescending(d => d); | |
directoriesByDepth.ToList().ForEach(d => | |
{ | |
if (!new DirectoryInfo(d).GetFiles().Any() && !new DirectoryInfo(d).GetDirectories().Any()) | |
{ | |
Directory.Delete(d); | |
Log.Add(new(LogEntry.DeleteDirectoryLogLabel, d)); | |
} | |
}); | |
} | |
} | |
// This represents all the files (recursively) in a given directory path | |
public class FileRefCollection | |
{ | |
public string Path { get; init; } | |
public Dictionary<string, FileRef> ByLocation { get; init; } = new(); | |
public Dictionary<string, FileRef> ByLocationAndContent { get; init; } = new(); | |
public FileRefCollection(string path) | |
{ | |
Path = path; | |
FileSystemUtils.GetAllFiles(path) | |
.Select(f => new FileRef(f, path)) | |
.ToList() | |
.ForEach(f => | |
{ | |
// We "index" the files by both (1) just location, and (2) location and content | |
ByLocation[f.RelativePath] = f; | |
ByLocationAndContent[f.Hash] = f; | |
}); | |
} | |
// This is a consolidated hash of all the individual file hashes | |
// If the contents or location of any file changes, this will also change | |
// If this is equal to the hash of another FileRefCollection, then those file system structures are identical | |
public string Hash => string.Join(Environment.NewLine, ByLocation.OrderBy(f => f.Value.RelativePath).Select(f => f.Value.Hash)); | |
} | |
public class FileRef | |
{ | |
// The "content signature" is simply a string that will change if the content of the file change | |
// How you decide this is up to you | |
// This is exposed as a static property so you can re-implement it if you want | |
// Example: you could create a hash of the actual bytes of the file | |
public static Func<FileRef, string> GetContentSignature = (f) => { return f.File.LastWriteTime.ToFileTimeUtc().ToString(); }; | |
// The path relative to the source or target directory | |
public string RelativePath { get; set; } | |
public FileInfo File { get; init; } | |
public FileRef(FileInfo fileInfo, string relativeTo) | |
{ | |
File = fileInfo; // Saved so you can use it in the GetContentSignature method if you want it | |
relativeTo = NormalizePath(relativeTo); | |
var filePath = NormalizePath(fileInfo.FullName); | |
var filePathWithoutRelativeDirectory = string.Join('\\', filePath.Split('\\').RemoveMatchFromStart(relativeTo.ToLower().Split('\\'))); | |
RelativePath = filePathWithoutRelativeDirectory; | |
} | |
private string NormalizePath(string path) | |
{ | |
return path.ToLower().Replace('/', '\\'); | |
} | |
public string Hash => string.Concat(RelativePath, GetContentSignature(this)); | |
} | |
public record LogEntry | |
{ | |
public static string OverwriteFileLogLabel = "overwrite-file"; | |
public static string CopyFileLogLabel = "copy-file"; | |
public static string DeleteFileLogLabel = "delete-file"; | |
public static string DeleteDirectoryLogLabel = "delete-directory"; | |
public static string CreateDirectoryLogLabel = "create-directory"; | |
// What we did | |
public string Operation { get; init; } | |
// The file we did it on | |
public string Path { get; init; } | |
// When we did it | |
public DateTime Timestamp { get; init; } | |
public LogEntry(string operation, string path) | |
{ | |
Timestamp = DateTime.Now; | |
Operation = operation; | |
Path = path; | |
} | |
public override string ToString() | |
{ | |
return $"{Timestamp}: {Operation}: {Path}"; | |
} | |
} | |
// These are from my DeaneBarker.Utils library | |
public static class LinqQueryExtensions | |
{ | |
/// <summary> | |
/// Takes a small enumerable and removes everything from the start of the source that matches. | |
/// </summary> | |
/// <example> | |
/// new[] { 1,2,3,4 }.ToList().RemoveMatchFromStart(new[] {1,2}); // Result is 3,4; 1,2 were removed from the start | |
/// </example> | |
/// <typeparam name="T"></typeparam> | |
/// <param name="source"></param> | |
/// <param name="prefix"></param> | |
/// <returns></returns> | |
public static IEnumerable<T> RemoveMatchFromStart<T>(this IEnumerable<T> source, IEnumerable<T> prefix) | |
{ | |
if (prefix.Count() > source.Count()) | |
{ | |
// If the prefix is longer that the source, then it will negate ALL members by definition | |
return Enumerable.Empty<T>(); | |
} | |
if (prefix.Count() == 0) | |
{ | |
// There's nothing to check | |
return source; | |
} | |
var sourceList = source.ToList(); | |
var prefixList = prefix.ToList(); | |
var i = 0; | |
for (; i < prefix.Count(); i++) | |
{ | |
if (!sourceList[i].Equals(prefixList[i])) | |
{ | |
break; | |
} | |
} | |
return sourceList.Skip(i); | |
} | |
} | |
public static class FileSystemUtils | |
{ | |
/// <summary> | |
/// Returns every file, recursively, from the suppled path down | |
/// </summary> | |
/// <param name="path"></param> | |
/// <param name="searchPattern"></param> | |
/// <returns></returns> | |
public static List<FileInfo> GetAllFiles(string path, string searchPattern = "*.*") | |
{ | |
return new DirectoryInfo(path).GetFiles(searchPattern, SearchOption.AllDirectories).ToList(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment