在.NET中获取目录数据的最快方法

我正在使用文件同步服务来同步不同机器上的两个文件夹之间的文件。 我需要find一个非常快速的方式来枚举一个目录,并从中获取以下信息:

  • 该目录中所有文件path和子目录path的数据结构或结构,包括每个文件或子目录的最后写入时间。
  • 对于当前目录下任何级别的每个子目录,与上述相同。

到目前为止,我已经想出了这个:

static void Main(string[] args) { List<Tuple<string, DateTime>> files = new List<Tuple<string, DateTime>>(); List<Tuple<string, DateTime>> directories = new List<Tuple<string, DateTime>>(); Stopwatch watch = new Stopwatch(); while (true) { watch.Start(); while (!CheckFolderRecursiveSingleThreaded("C:\\", out files, out directories)) { // You can assume for all intents and purposes that drive C does exist and that you have access to it, which will cause this sleep to not get called. Thread.Sleep(1000); } watch.Stop(); Console.WriteLine(watch.ElapsedMilliseconds); watch.Reset(); // Do something with the information. Thread.Sleep(1000); } } static bool CheckFolderRecursiveSingleThreaded(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories) { try { DirectoryInfo directoryInformation = new DirectoryInfo(path); List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>(); foreach (FileInfo file in directoryInformation.GetFiles()) { fileList.Add(new Tuple<string, DateTime>(file.FullName, file.LastWriteTimeUtc)); } List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>(); foreach (DirectoryInfo directory in directoryInformation.GetDirectories()) { // Check for the ReparsePoint flag, which will indicate a symbolic link. if (!directory.Attributes.HasFlag(FileAttributes.ReparsePoint)) { directoryList.Add(new Tuple<string, DateTime>(directory.FullName, directory.LastWriteTimeUtc)); List<Tuple<string, DateTime>> directoryFiles; List<Tuple<string, DateTime>> directoryFolders; if (CheckFolderRecursiveSingleThreaded(directory.FullName, out directoryFiles, out directoryFolders)) { fileList.AddRange(directoryFiles); directoryList.AddRange(directoryFolders); } } } files = fileList; directories = directoryList; return true; } catch { files = null; directories = null; return false; } } 

在性能方面,大约需要22秒(无论在没有附加debugging器的情况下在发行版或debugging模式下运行)来枚举通过我的C:\驱动器,并产生约549,254个文件和83,235个文件夹的列表,但它可以快点 ? 我打开任何build议,甚至MSVC ++的build议。

编辑 :由于multithreadingLINQ的AsParallel 12秒(必须在释放模式下testing)。 请注意,这对所有的C:\子文件夹是并行的,但是recursion调用将会用到我上面提到的单线程实现,否则将需要很长时间来并行处理所有的文件夹!

 static bool CheckFolderParallelled(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories) { try { DirectoryInfo directoryInformation = new DirectoryInfo(path); List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>(); foreach (FileInfo file in directoryInformation.GetFiles()) { fileList.Add(new Tuple<string, DateTime>(file.FullName, file.LastWriteTimeUtc)); } List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>(); directoryInformation.GetDirectories().AsParallel().ForAll(directory => { // Check for the ReparsePoint flag, which will indicate a symbolic link. if (!directory.Attributes.HasFlag(FileAttributes.ReparsePoint)) { directoryList.Add(new Tuple<string, DateTime>(directory.FullName, directory.LastWriteTimeUtc)); List<Tuple<string, DateTime>> directoryFiles; List<Tuple<string, DateTime>> directoryFolders; if (CheckFolderRecursiveSingleThreaded(directory.FullName, out directoryFiles, out directoryFolders)) { fileList.AddRange(directoryFiles); directoryList.AddRange(directoryFolders); } } }); files = fileList; directories = directoryList; return true; } catch { files = null; directories = null; return false; } } 

编辑 :仍然约21秒使用阿列克谢的链接解决scheme从马克·格雷韦尔接受的答案。 这种非recursion技术并不是最快的(可能使得这个Queue数据types保持活跃状态​​的代价与在堆栈上压入和popup调用这个方法的代价一样昂贵):

 static bool CheckFolderNonRecursive(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories) { try { List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>(); List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>(); ConcurrentQueue<DirectoryInfo> pendingSearches = new ConcurrentQueue<DirectoryInfo>(); pendingSearches.Enqueue(new DirectoryInfo(path)); DirectoryInfo pendingDirectory; while (pendingSearches.Count > 0) { if (pendingSearches.TryDequeue(out pendingDirectory)) { try { foreach (FileInfo file in pendingDirectory.GetFiles()) { fileList.Add(new Tuple<string, DateTime>(file.FullName, file.LastWriteTimeUtc)); } foreach (DirectoryInfo directory in pendingDirectory.GetDirectories()) { // Check for the ReparsePoint flag, which will indicate a symbolic link. if (!directory.Attributes.HasFlag(FileAttributes.ReparsePoint)) { directoryList.Add(new Tuple<string, DateTime>(directory.FullName, directory.LastWriteTimeUtc)); pendingSearches.Enqueue(directory); } } } catch { } // Ignore directories with no access rights. } } files = fileList; directories = directoryList; return true; } catch { files = null; directories = null; return false; } } 

编辑 :这个问题对于.NET是开放的,因为MSVC ++库可能有更快的方式,比如boost,但是我还没有碰到更快的方法。 如果任何人都可以在C ++中使用更快的C盘枚举器来打败我的C#方法,并且获得相同的数据,那么首先得到更多的赞誉,其次,我会非常有兴趣去看看它,第三,它会有帮助很多人(不只是我自己)。 直到我意识到下面的方法花了大约200,000毫秒,远远超过我上面发布的任何代码:

 #include "stdafx.h" #include <iostream> #include <Windows.h> #include <boost/filesystem.hpp> #include <boost/foreach.hpp> #include <boost/timer.hpp> namespace fs = boost::filesystem; bool IterateDirectory(const wchar_t *directory); int _tmain(int argc, _TCHAR* argv[]) { boost::timer timer = boost::timer(); while (true) { timer.restart(); // L makes it wide, since IterateDirectory takes wchar_t. // R makes it a raw string literal, which tells the compiler to parse the string as-is, not escape characters and fancy tricks. IterateDirectory(LR"(C:\)"); std::cout << "Elapsed time: " << timer.elapsed() * 1000 << " ms" << std::endl; Sleep(1000); } return 0; } // IterateDirectory takes wchar_t because path.c_str() always returns wchar_t whether you are using unicode or multibyte. bool IterateDirectory(const wchar_t *directory) { if (boost::filesystem::exists(directory)) { fs::directory_iterator it(directory), eod; BOOST_FOREACH(fs::path path, std::make_pair(it, eod)) { try { if (is_regular_file(path)) { //std::cout << path << ", last write time: " << last_write_time(path) << '.' << std::endl; } if (is_directory(path)) { //std::cout << path << ", last write time: " << last_write_time(path) << '.' << std::endl; // path.c_str() always returns wchar_t, whether you are using unicode or multibyte. This is probably because of multi-language support inside of the Windows operating system and file structure. IterateDirectory(path.c_str()); } } catch (...) { } // Ignore directories we don't have access to. } return true; } return false; } 

编辑 :使用PInvoke FindFirstFile和FindNextFile花了大约6秒来迭代我的整个C驱动器(感谢重复的链接和山姆Saffron的答案)。 但是… 可以更快吗?

 [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] public static extern IntPtr FindFirstFileW(string lpFileName, out WIN32_FIND_DATAW lpFindFileData); [DllImport("kernel32.dll", CharSet = CharSet.Unicode)] public static extern bool FindNextFile(IntPtr hFindFile, out WIN32_FIND_DATAW lpFindFileData); [DllImport("kernel32.dll")] public static extern bool FindClose(IntPtr hFindFile); [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)] public struct WIN32_FIND_DATAW { public FileAttributes dwFileAttributes; internal System.Runtime.InteropServices.ComTypes.FILETIME ftCreationTime; internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastAccessTime; internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastWriteTime; public int nFileSizeHigh; public int nFileSizeLow; public int dwReserved0; public int dwReserved1; [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 260)] public string cFileName; [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 14)] public string cAlternateFileName; } static IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1); static bool FindNextFilePInvokeRecursive(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories) { List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>(); List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>(); WIN32_FIND_DATAW findData; IntPtr findHandle = INVALID_HANDLE_VALUE; List<Tuple<string, DateTime>> info = new List<Tuple<string,DateTime>>(); try { findHandle = FindFirstFileW(path + @"\*", out findData); if (findHandle != INVALID_HANDLE_VALUE) { do { if (findData.cFileName == "." || findData.cFileName == "..") continue; string fullPath = path + (path.EndsWith("\\") ? String.Empty : "\\") + findData.cFileName; // Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops. if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint)) { directoryList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime())); List<Tuple<string, DateTime>> subDirectoryFileList = new List<Tuple<string, DateTime>>(); List<Tuple<string, DateTime>> subDirectoryDirectoryList = new List<Tuple<string, DateTime>>(); if (FindNextFilePInvokeRecursive(fullPath, out subDirectoryFileList, out subDirectoryDirectoryList)) { fileList.AddRange(subDirectoryFileList); directoryList.AddRange(subDirectoryDirectoryList); } } else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory)) { fileList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime())); } } while (FindNextFile(findHandle, out findData)); } } catch (Exception exception) { Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString()); if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = null; directories = null; return false; } if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = fileList; directories = directoryList; return true; } public static class FILETIMEExtensions { public static DateTime ToDateTime(this System.Runtime.InteropServices.ComTypes.FILETIME filetime) { long highBits = filetime.dwHighDateTime; highBits = highBits << 32; return DateTime.FromFileTimeUtc(highBits + (long)filetime.dwLowDateTime); } } 

编辑 :是的,它可以更快。 使用技术来并行化目标文件夹的子目录recursion,我可以使用上面的FindNextFilePInvokeRecursive方法得到它4秒。 这是4秒迭代我的整个C驱动器与我需要的数据。 我可以看到在进程监视器中,我吃了大约30%的CPU和最多只有1%的磁盘,这对我来说有点奇怪,不知道为什么在这一刻,也许只是这个链表遍历样式导致它相当可以忽略不计。 理想情况下,它应该至less吃100%的CPU,但这可能取决于你并行的子文件夹的数量和深度。 但能更快吗?

 static bool FindNextFilePInvokeRecursiveParalleled(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories) { List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>(); List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>(); WIN32_FIND_DATAW findData; IntPtr findHandle = INVALID_HANDLE_VALUE; List<Tuple<string, DateTime>> info = new List<Tuple<string, DateTime>>(); try { findHandle = FindFirstFileW(path + @"\*", out findData); if (findHandle != INVALID_HANDLE_VALUE) { do { if (findData.cFileName == "." || findData.cFileName == "..") continue; string fullPath = path + (path.EndsWith("\\") ? String.Empty : "\\") + findData.cFileName; // Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops. if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint)) { directoryList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime())); } else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory)) { fileList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime())); } } while (FindNextFile(findHandle, out findData)); directoryList.AsParallel().ForAll(x => { List<Tuple<string, DateTime>> subDirectoryFileList = new List<Tuple<string, DateTime>>(); List<Tuple<string, DateTime>> subDirectoryDirectoryList = new List<Tuple<string, DateTime>>(); if (FindNextFilePInvokeRecursive(x.Item1, out subDirectoryFileList, out subDirectoryDirectoryList)) { fileList.AddRange(subDirectoryFileList); directoryList.AddRange(subDirectoryDirectoryList); } }); } } catch (Exception exception) { Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString()); if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = null; directories = null; return false; } if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = fileList; directories = directoryList; return true; } 

编辑 :忘了添加并发锁使用parallels时,否则你可能会捕获一个exception。 也删除元组,并为我的目的与FileInformation / DirectoryInformation类。 这剃了0.5秒。 现在3.5秒枚举我的C:驱动器。

 [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] public static extern IntPtr FindFirstFileW(string lpFileName, out WIN32_FIND_DATAW lpFindFileData); [DllImport("kernel32.dll", CharSet = CharSet.Unicode)] public static extern bool FindNextFile(IntPtr hFindFile, out WIN32_FIND_DATAW lpFindFileData); [DllImport("kernel32.dll")] public static extern bool FindClose(IntPtr hFindFile); [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)] public struct WIN32_FIND_DATAW { public FileAttributes dwFileAttributes; internal System.Runtime.InteropServices.ComTypes.FILETIME ftCreationTime; internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastAccessTime; internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastWriteTime; public int nFileSizeHigh; public int nFileSizeLow; public int dwReserved0; public int dwReserved1; [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 260)] public string cFileName; [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 14)] public string cAlternateFileName; } static IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1); static bool FindNextFilePInvokeRecursive(string path, out List<FileInformation> files, out List<DirectoryInformation> directories) { List<FileInformation> fileList = new List<FileInformation>(); List<DirectoryInformation> directoryList = new List<DirectoryInformation>(); WIN32_FIND_DATAW findData; IntPtr findHandle = INVALID_HANDLE_VALUE; List<Tuple<string, DateTime>> info = new List<Tuple<string, DateTime>>(); try { findHandle = FindFirstFileW(path + @"\*", out findData); if (findHandle != INVALID_HANDLE_VALUE) { do { // Skip current directory and parent directory symbols that are returned. if (findData.cFileName != "." && findData.cFileName != "..") { string fullPath = path + @"\" + findData.cFileName; // Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops. if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint)) { directoryList.Add(new DirectoryInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() }); List<FileInformation> subDirectoryFileList = new List<FileInformation>(); List<DirectoryInformation> subDirectoryDirectoryList = new List<DirectoryInformation>(); if (FindNextFilePInvokeRecursive(fullPath, out subDirectoryFileList, out subDirectoryDirectoryList)) { fileList.AddRange(subDirectoryFileList); directoryList.AddRange(subDirectoryDirectoryList); } } else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory)) { fileList.Add(new FileInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() }); } } } while (FindNextFile(findHandle, out findData)); } } catch (Exception exception) { Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString()); if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = null; directories = null; return false; } if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = fileList; directories = directoryList; return true; } static bool FindNextFilePInvokeRecursiveParalleled(string path, out List<FileInformation> files, out List<DirectoryInformation> directories) { List<FileInformation> fileList = new List<FileInformation>(); object fileListLock = new object(); List<DirectoryInformation> directoryList = new List<DirectoryInformation>(); object directoryListLock = new object(); WIN32_FIND_DATAW findData; IntPtr findHandle = INVALID_HANDLE_VALUE; List<Tuple<string, DateTime>> info = new List<Tuple<string, DateTime>>(); try { path = path.EndsWith(@"\") ? path : path + @"\"; findHandle = FindFirstFileW(path + @"*", out findData); if (findHandle != INVALID_HANDLE_VALUE) { do { // Skip current directory and parent directory symbols that are returned. if (findData.cFileName != "." && findData.cFileName != "..") { string fullPath = path + findData.cFileName; // Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops. if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint)) { directoryList.Add(new DirectoryInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() }); } else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory)) { fileList.Add(new FileInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() }); } } } while (FindNextFile(findHandle, out findData)); directoryList.AsParallel().ForAll(x => { List<FileInformation> subDirectoryFileList = new List<FileInformation>(); List<DirectoryInformation> subDirectoryDirectoryList = new List<DirectoryInformation>(); if (FindNextFilePInvokeRecursive(x.FullPath, out subDirectoryFileList, out subDirectoryDirectoryList)) { lock (fileListLock) { fileList.AddRange(subDirectoryFileList); } lock (directoryListLock) { directoryList.AddRange(subDirectoryDirectoryList); } } }); } } catch (Exception exception) { Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString()); if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = null; directories = null; return false; } if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle); files = fileList; directories = directoryList; return true; } public class FileInformation { public string FullPath; public DateTime LastWriteTime; } public class DirectoryInformation { public string FullPath; public DateTime LastWriteTime; } 

编辑 :BK询问从FILETIME到DateTime的转换:

 public static class FILETIMEExtensions { public static DateTime ToDateTime(this System.Runtime.InteropServices.ComTypes.FILETIME time) { ulong high = (ulong)time.dwHighDateTime; ulong low = (ulong)time.dwLowDateTime; long fileTime = (long)((high << 32) + low); return DateTime.FromFileTimeUtc(fileTime); } } 

使用LINQ和并行任务

 var stuff = dir.GetFiles("*.*", System.IO.SearchOption.AllDirectories); Parallel.ForEach(stuff, p=>{ //do things in parrallel.. }); //or this var q = stuff.AsParallel().Where(x => p(x)).Orderby(x => k(x)).Select(x => f(x)); foreach (var e in q) a(e);