Userland中断计时器访问,如通过KeQueryInterruptTime(或类似)

是否有一个“ Nt ”或相似的(即非内核模式驱动程序)函数等价于KeQueryInterruptTime或类似的东西? 似乎没有NtQueryInterruptTime这样的事情,至less我还没有find它。

我想要的是某种相当准确和可靠的单调定时器(因此不是 QPC),它是合理有效的,并且没有溢出的32位计数器的意外,没有不必要的“智能”,没有时区或复杂的结构。

所以理想情况下,我想要一个像64位值timeGetTime 。 它甚至不必是同一个计时器
有从Vista开始的GetTickCount64 ,这将是可以接受的,但我不想打破XP的支持,只有这样一个愚蠢的原因。

按照这里所示读取0x7FFE0008的四字…好, 工作 …它certificate了实际的内部计数器在XP下是64位(它也是尽可能快的速度),但是meh …让我们不要讨论一下,读取一些未知的,硬编码的内存位置是一种什么样的恶意攻击。

在调用一个人为地愣愣的(将一个64位计数器降低到32位)高级API函数和读取一个原始内存地址之间肯定有一些东西。

下面是GetTickCount()的一个线程安全包装器的例子,它将tick count值扩展到64位,并且相当于GetTickCount64()。

为避免不必要的计数器翻转,请务必每49.7天调用一次该功能。 你甚至可以有一个专用线程,其唯一的目的是调用这个函数,然后在无限循环中睡20天。

 ULONGLONG MyGetTickCount64(void) { static volatile LONGLONG Count = 0; LONGLONG curCount1, curCount2; LONGLONG tmp; curCount1 = InterlockedCompareExchange64(&Count, 0, 0); curCount2 = curCount1 & 0xFFFFFFFF00000000; curCount2 |= GetTickCount(); if ((ULONG)curCount2 < (ULONG)curCount1) { curCount2 += 0x100000000; } tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1); if (tmp == curCount1) { return curCount2; } else { return tmp; } } 

编辑 :这里是一个完整的应用程序,测试MyGetTickCount64()。

 // Compiled with Open Watcom C 1.9: wcl386.exe /we /wx /q gettick.c #include <windows.h> #include <stdio.h> #include <stdarg.h> #include <stdlib.h> // // The below code is an ugly implementation of InterlockedCompareExchange64() // that is apparently missing in Open Watcom C 1.9. // It must work with MSVC++ too, however. // UINT8 Cmpxchg8bData[] = { 0x55, // push ebp 0x89, 0xE5, // mov ebp, esp 0x57, // push edi 0x51, // push ecx 0x53, // push ebx 0x8B, 0x7D, 0x10, // mov edi, [ebp + 0x10] 0x8B, 0x07, // mov eax, [edi] 0x8B, 0x57, 0x04, // mov edx, [edi + 0x4] 0x8B, 0x7D, 0x0C, // mov edi, [ebp + 0xc] 0x8B, 0x1F, // mov ebx, [edi] 0x8B, 0x4F, 0x04, // mov ecx, [edi + 0x4] 0x8B, 0x7D, 0x08, // mov edi, [ebp + 0x8] 0xF0, // lock: 0x0F, 0xC7, 0x0F, // cmpxchg8b [edi] 0x5B, // pop ebx 0x59, // pop ecx 0x5F, // pop edi 0x5D, // pop ebp 0xC3 // ret }; LONGLONG (__cdecl *Cmpxchg8b)(LONGLONG volatile* Dest, LONGLONG* Exch, LONGLONG* Comp) = (LONGLONG (__cdecl *)(LONGLONG volatile*, LONGLONG*, LONGLONG*))Cmpxchg8bData; LONGLONG MyInterlockedCompareExchange64(LONGLONG volatile* Destination, LONGLONG Exchange, LONGLONG Comparand) { return Cmpxchg8b(Destination, &Exchange, &Comparand); } #ifdef InterlockedCompareExchange64 #undef InterlockedCompareExchange64 #endif #define InterlockedCompareExchange64(Destination, Exchange, Comparand) \ MyInterlockedCompareExchange64(Destination, Exchange, Comparand) // // This stuff makes a thread-safe printf(). // We don't want characters output by one thread to be mixed // with characters output by another. We want printf() to be // "atomic". // We use a critical section around vprintf() to achieve "atomicity". // static CRITICAL_SECTION PrintfCriticalSection; int ts_printf(const char* Format, ...) { int count; va_list ap; EnterCriticalSection(&PrintfCriticalSection); va_start(ap, Format); count = vprintf(Format, ap); va_end(ap); LeaveCriticalSection(&PrintfCriticalSection); return count; } #define TICK_COUNT_10MS_INCREMENT 0x800000 // // This is the simulated tick counter. // Its low 32 bits are going to be returned by // our, simulated, GetTickCount(). // // TICK_COUNT_10MS_INCREMENT is what the counter is // incremented by every time. The value is so chosen // that the counter quickly overflows in its // low 32 bits. // static volatile LONGLONG SimulatedTickCount = 0; // // This is our simulated 32-bit GetTickCount() // that returns a count that often overflows. // ULONG SimulatedGetTickCount(void) { return (ULONG)SimulatedTickCount; } // // This thread function will increment the simulated tick counter // whose value's low 32 bits we'll be reading in SimulatedGetTickCount(). // DWORD WINAPI SimulatedTickThread(LPVOID lpParameter) { UNREFERENCED_PARAMETER(lpParameter); for (;;) { LONGLONG c; Sleep(10); // Get the counter value, add TICK_COUNT_10MS_INCREMENT to it and // store the result back. c = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0); InterlockedCompareExchange64(&SimulatedTickCount, c + TICK_COUNT_10MS_INCREMENT, c) != c); } return 0; } volatile LONG CountOfObserved32bitOverflows = 0; volatile LONG CountOfObservedUpdateRaces = 0; // // This prints statistics that includes the true 64-bit value of // SimulatedTickCount that we can't get from SimulatedGetTickCount() as it // returns only its lower 32 bits. // // The stats also include: // - the number of times that MyGetTickCount64() observes an overflow of // SimulatedGetTickCount() // - the number of times MyGetTickCount64() fails to update its internal // counter because of a concurrent update in another thread. // void PrintStats(void) { LONGLONG true64bitCounter = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0); ts_printf(" 0x%08X`%08X <- true 64-bit count; ovfs: ~%d; races: %d\n", (ULONG)(true64bitCounter >> 32), (ULONG)true64bitCounter, CountOfObserved32bitOverflows, CountOfObservedUpdateRaces); } // // This is our poor man's implementation of GetTickCount64() // on top of GetTickCount(). // // It's thread safe. // // When used with actual GetTickCount() instead of SimulatedGetTickCount() // it must be called at least a few times in 49.7 days to ensure that // it doesn't miss any overflows in GetTickCount()'s return value. // ULONGLONG MyGetTickCount64(void) { static volatile LONGLONG Count = 0; LONGLONG curCount1, curCount2; LONGLONG tmp; curCount1 = InterlockedCompareExchange64(&Count, 0, 0); curCount2 = curCount1 & 0xFFFFFFFF00000000; curCount2 |= SimulatedGetTickCount(); if ((ULONG)curCount2 < (ULONG)curCount1) { curCount2 += 0x100000000; InterlockedIncrement(&CountOfObserved32bitOverflows); } tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1); if (tmp != curCount1) { curCount2 = tmp; InterlockedIncrement(&CountOfObservedUpdateRaces); } return curCount2; } // // This is an error counter. If a thread that uses MyGetTickCount64() notices // any problem with what MyGetTickCount64() returns, it bumps up this error // counter and stops. If one of threads sees a non-zero value in this // counter due to an error in another thread, it stops as well. // volatile LONG Error = 0; // // This is a thread function that will be using MyGetTickCount64(), // validating its return value and printing some stats once in a while. // // This function is meant to execute concurrently in multiple threads // to create race conditions inside of MyGetTickCount64() and test it. // DWORD WINAPI TickUserThread(LPVOID lpParameter) { DWORD user = (DWORD)lpParameter; // thread number ULONGLONG ticks[4]; ticks[3] = ticks[2] = ticks[1] = MyGetTickCount64(); while (!Error) { ticks[0] = ticks[1]; ticks[1] = MyGetTickCount64(); // Every ~100 ms sleep a little (slightly lowers CPU load, to about 90%) if (ticks[1] > ticks[2] + TICK_COUNT_10MS_INCREMENT * 10L) { ticks[2] = ticks[1]; Sleep(1 + rand() % 20); } // Every ~1000 ms print the last value from MyGetTickCount64(). // Thread 1 also prints stats here. if (ticks[1] > ticks[3] + TICK_COUNT_10MS_INCREMENT * 100L) { ticks[3] = ticks[1]; ts_printf("%u:0x%08X`%08X\n", user, (ULONG)(ticks[1] >> 32), (ULONG)ticks[1]); if (user == 1) { PrintStats(); } } if (ticks[0] > ticks[1]) { ts_printf("%u:Non-monotonic tick counts: 0x%016llX > 0x%016llX!\n", user, ticks[0], ticks[1]); PrintStats(); InterlockedIncrement(&Error); return -1; } else if (ticks[0] + 0x100000000 <= ticks[1]) { ts_printf("%u:Too big tick count jump: 0x%016llX -> 0x%016llX!\n", user, ticks[0], ticks[1]); PrintStats(); InterlockedIncrement(&Error); return -1; } Sleep(0); // be nice, yield to other threads. } return 0; } // // This prints stats upon Ctrl+C and terminates the program. // BOOL WINAPI ConsoleEventHandler(DWORD Event) { if (Event == CTRL_C_EVENT) { PrintStats(); } return FALSE; } int main(void) { HANDLE simulatedTickThreadHandle; HANDLE tickUserThreadHandle; DWORD dummy; // This is for the missing InterlockedCompareExchange64() workaround. VirtualProtect(Cmpxchg8bData, sizeof(Cmpxchg8bData), PAGE_EXECUTE_READWRITE, &dummy); InitializeCriticalSection(&PrintfCriticalSection); if (!SetConsoleCtrlHandler(&ConsoleEventHandler, TRUE)) { ts_printf("SetConsoleCtrlHandler(&ConsoleEventHandler) failed with error 0x%X\n", GetLastError()); return -1; } // Start the tick simulator thread. simulatedTickThreadHandle = CreateThread(NULL, 0, &SimulatedTickThread, NULL, 0, NULL); if (simulatedTickThreadHandle == NULL) { ts_printf("CreateThread(&SimulatedTickThread) failed with error 0x%X\n", GetLastError()); return -1; } // Start one thread that'll be using MyGetTickCount64(). tickUserThreadHandle = CreateThread(NULL, 0, &TickUserThread, (LPVOID)2, 0, NULL); if (tickUserThreadHandle == NULL) { ts_printf("CreateThread(&TickUserThread) failed with error 0x%X\n", GetLastError()); return -1; } // The other thread using MyGetTickCount64() will be the main thread. TickUserThread((LPVOID)1); // // The app terminates upon any error condition detected in TickUserThread() // in any of the threads or by Ctrl+C. // return 0; } 

作为一个测试,我一直在Windows XP下运行这个测试应用程序5个小时以上的空闲机器有2个CPU(空闲,以避免潜在的长期饥饿时间,因此避免遗漏计数器溢出,每5秒发生一次)仍然很好。

以下是控制台的最新输出:

 2:0x00000E1B`C8800000 1:0x00000E1B`FA800000 0x00000E1B`FA800000 <- true 64-bit count; ovfs: ~3824; races: 110858 

如您所见, MyGetTickCount64()已经观察到3824 32位溢出,并且无法使用其第二个InterlockedCompareExchange64() 110858次更新Count的值。 所以,溢出确实发生了,最后一个数字表示变量实际上是由两个线程同时更新的。

您还可以看到MyGetTickCount64()中两个线程从MyGetTickCount64()接收到的64位tick值在前32位中没有任何丢失,并且非常接近SimulatedTickCount实际的64位滴答计数,其低32位由SimulatedGetTickCount()返回。 0x00000E1BC8800000由于线程调度和不频繁的stat打印而在0x00000E1BFA800000后面可视化,它后面正好是100 * TICK_COUNT_10MS_INCREMENT或1秒。 当然,内部的差距要小得多。

现在,在InterlockedCompareExchange64()可用性…它有点奇怪,它是从Windows Vista和Windows server 2003的正式可用 。 server 2003实际上是由与Windows XP相同的代码库构建的。

但是这里最重要的是这个函数是建立在自1998年或更早以前(1) , (2)可用的Pentium CMPXCHG8B指令之上的。 我可以在我的Windows XP(SP3)二进制文件中看到这条指令。 它在ntkrnlpa.exe / ntoskrnl.exe (内核)和ntdll.dll (导出内核的NtXxxx()函数的DLL)中。 寻找一个0xF0,0x0F,0xC7的字节序列然后反汇编那个地方的代码,看看这些字节是不是有巧合的。

您可以通过CPUID指令(CPUID函数0x00000001的EDX位8和函数0x80000001)检查此指令的可用性,如果指令不存在,拒绝运行而不是崩溃,但是现在您不太可能找到一台机器不支持这个指令。 如果你这样做,它不会是一个好的Windows XP的机器,也可能是你的应用程序。

这里有另一种方法,Alex的封装变体,但只使用32位互锁。 它实际上只返回了一个60位的数字,但这个数字仍然是三千六百万年。 🙂

它确实需要更频繁地调用,至少每三天一次。 这通常不是一个主要的缺点。

 ULONGLONG MyTickCount64(void) { static volatile DWORD count = 0xFFFFFFFF; DWORD previous_count, current_tick32, previous_count_zone, current_tick32_zone; ULONGLONG current_tick64; previous_count = InterlockedCompareExchange(&count, 0, 0); current_tick32 = GetTickCount(); if (previous_count == 0xFFFFFFFF) { // count has never been written DWORD initial_count; initial_count = current_tick32 >> 28; previous_count = InterlockedCompareExchange(&count, initial_count, 0xFFFFFFFF); current_tick64 = initial_count; current_tick64 <<= 28; current_tick64 += current_tick32 & 0x0FFFFFFF; return current_tick64; } previous_count_zone = previous_count & 15; current_tick32_zone = current_tick32 >> 28; if (current_tick32_zone == previous_count_zone) { // The top four bits of the 32-bit tick count haven't changed since count was last written. current_tick64 = previous_count; current_tick64 <<= 28; current_tick64 += current_tick32 & 0x0FFFFFFF; return current_tick64; } if (current_tick32_zone == previous_count_zone + 1 || (current_tick32_zone == 0 && previous_count_zone == 15)) { // The top four bits of the 32-bit tick count have been incremented since count was last written. InterlockedCompareExchange(&count, previous_count + 1, previous_count); current_tick64 = previous_count + 1; current_tick64 <<= 28; current_tick64 += current_tick32 & 0x0FFFFFFF; return current_tick64; } // Oops, we weren't called often enough, we're stuck return 0xFFFFFFFF; } 

感谢免费提供相关文献的Google图书,我想出了一个简单而快速的GetTickCount64实现,它在Vista之前的系统上也能很好地工作(而且从硬编码的内存地址)。

它实际上就像调用映射到KiGetTickCount中断0x2A一样简单。 在GCC内联汇编中,这给出:

 static __inline__ __attribute__((always_inline)) unsigned long long get_tick_count64() { unsigned long long ret; __asm__ __volatile__ ("int $0x2a" : "=A"(ret) : : ); return ret; } 

由于KiGetTickCount工作方式,该函数可能最好叫GetTickCount46 ,因为它执行了18位的右移,返回了46位,而不是64位。尽管原来的Vista版本也是如此。

请注意, KiGetTickCount clobbers edx ,这是相关的,如果你打算实现你自己的32位版本更快的实现(在这种情况下必须添加edx到clobber列表!)。