哪一个更快? 正则expression式或EndsWith?

什么会更快?

public String Roll() { Random rnd = new Random(); int roll = rnd.Next(1, 100000); if (Regex.IsMatch(roll.ToString(), @"(.)\1{1,}$")) { return "doubles"; } return "none"; } 

要么

 public String Roll() { Random rnd = new Random(); int roll = rnd.Next(1, 100000); if (roll.ToString().EndsWith("11") || roll.ToString().EndsWith("22") || roll.ToString().EndsWith("33") || roll.ToString().EndsWith("44") || roll.ToString().EndsWith("55") || roll.ToString().EndsWith("66") || roll.ToString().EndsWith("77") || roll.ToString().EndsWith("88") || roll.ToString().EndsWith("99") || roll.ToString().EndsWith("00")) { return "doubles"; } return "none"; } 

我目前正在使用一个真正长的if语句列表充满正则expression式来检查一个int是否以double,triple,quads,quints等结尾…所以我想知道哪一个更快之前,我改变它所有。

Solutions Collecting From Web of "哪一个更快? 正则expression式或EndsWith?"

在你的具体情况下,正则Regex实际上更快…但它可能是因为你使用EndsWith与许多OR和冗余的ToString() 。 如果你简化逻辑,简单的string操作可能会更快。


下面是文本处理的性能总结 – 从最快到最慢(10百万循环[首选/非首选32位] – 排名是基于两者中最快的排序):

  1. 大查找快速随机UInt (不计算在赏金):219/273毫秒 – 矿,从Evk的改进
  2. 大型查找优化随机:228/273毫秒 – Ivan Stoev的替代解决方案
  3. 大查找快速随机:238/294毫秒 – Evk的替代解决方案
  4. Large Lookup Parameterless Random:248/287 ms – Thomas Ayoub

    我想在这个解决方案上做一些笔记(基于下面的注释):

    1. 该解决方案向小数目(<100000)引入了0.0039%的偏见(参考文献: Eric Lippert的博客文章 ,由Lucas Trzesniewski撰写 )
    2. 由于它改变了使用Random (从Random.Next(int)Random.Next() )的方式,所以在测试时不会产生与其他人相同的序号(参考Michael Liu的评论 )测试本身。

    虽然测试不能用与这个方法完全相同的数字顺序来执行(如Phil1970所提到的),但我有两点要做:

    1. 有些人可能会有兴趣看看Random.Next()与Random.Next(int)的实现,以了解为什么即使使用相同的数字序列,该解决方案仍然会更快。
    2. 实际情况下使用Random 本身(大部分时间)不会假定数字序列是相同的(或可预测的) – 只是为了测试我们的方法,我们希望Random序列是相同的(对于公平单元测试目的)。 此方法的预期更快的结果不能完全从测试结果派生,而是通过查看Next() vs Next(int)实现。
  5. 大查寻:320/284毫秒 – Evk

  6. 最快的优化随机Modded:286/333毫秒伊万Stoev
  7. Lookup Optimized Modded:315/329 ms – Corak
  8. 优化改装:471/330 ms – Stian Standahl
  9. 优化Modded + Constant:472/337 – GjermundGrøneng
  10. 最快优化改装:345/340 ms – GjermundGrøneng
  11. 改装:496/370 ms – Corak +可能Alexei Levenkov
  12. 数字:537/408毫秒 – 阿洛伊斯·克劳斯
  13. 简单:1668/1176毫秒 – 我的
  14. HashSet包含:2138/1609 ms – Dandré
  15. 列表包含:3013/2465毫秒 – 另一个矿山
  16. 编译正则表达式:8956/7675 ms – Radin Gospodinov
  17. 正则表达式:15032/16640毫秒 – OP的解决方案1
  18. EndsWith:24763/20702毫秒 – OP的解决方案2

这里是我的简单测试用例:

 Random rnd = new Random(10000); FastRandom fastRnd = new FastRandom(10000); //OP's Solution 2 public String RollRegex() { int roll = rnd.Next(1, 100000); if (Regex.IsMatch(roll.ToString(), @"(.)\1{1,}$")) { return "doubles"; } else { return "none"; } } //Radin Gospodinov's Solution Regex optionRegex = new Regex(@"(.)\1{1,}$", RegexOptions.Compiled); public String RollOptionRegex() { int roll = rnd.Next(1, 100000); string rollString = roll.ToString(); if (optionRegex.IsMatch(roll.ToString())) { return "doubles"; } else { return "none"; } } //OP's Solution 1 public String RollEndsWith() { int roll = rnd.Next(1, 100000); if (roll.ToString().EndsWith("11") || roll.ToString().EndsWith("22") || roll.ToString().EndsWith("33") || roll.ToString().EndsWith("44") || roll.ToString().EndsWith("55") || roll.ToString().EndsWith("66") || roll.ToString().EndsWith("77") || roll.ToString().EndsWith("88") || roll.ToString().EndsWith("99") || roll.ToString().EndsWith("00")) { return "doubles"; } else { return "none"; } } //My Solution public String RollSimple() { int roll = rnd.Next(1, 100000); string rollString = roll.ToString(); return roll > 10 && rollString[rollString.Length - 1] == rollString[rollString.Length - 2] ? "doubles" : "none"; } //My Other Solution List<string> doubles = new List<string>() { "00", "11", "22", "33", "44", "55", "66", "77", "88", "99" }; public String RollAnotherSimple() { int roll = rnd.Next(1, 100000); string rollString = roll.ToString(); return rollString.Length > 1 && doubles.Contains(rollString.Substring(rollString.Length - 2)) ? "doubles" : "none"; } //Dandré's Solution HashSet<string> doublesHashset = new HashSet<string>() { "00", "11", "22", "33", "44", "55", "66", "77", "88", "99" }; public String RollSimpleHashSet() { int roll = rnd.Next(1, 100000); string rollString = roll.ToString(); return rollString.Length > 1 && doublesHashset.Contains(rollString.Substring(rollString.Length - 2)) ? "doubles" : "none"; } //Corak's Solution - hinted by Alexei Levenkov too public string RollModded() { int roll = rnd.Next(1, 100000); return roll % 100 % 11 == 0 ? "doubles" : "none"; } //Stian Standahl optimizes modded solution public string RollOptimizedModded() { return rnd.Next(1, 100000) % 100 % 11 == 0 ? "doubles" : "none"; } //Gjermund Grøneng's method with constant addition private const string CONST_DOUBLES = "doubles"; private const string CONST_NONE = "none"; public string RollOptimizedModdedConst() { return rnd.Next(1, 100000) % 100 % 11 == 0 ? CONST_DOUBLES : CONST_NONE; } //Gjermund Grøneng's method after optimizing the Random (The fastest!) public string FastestOptimizedModded() { return (rnd.Next(99999) + 1) % 100 % 11 == 0 ? CONST_DOUBLES : CONST_NONE; } //Corak's Solution, added on Gjermund Grøneng's private readonly string[] Lookup = { "doubles", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none" }; public string RollLookupOptimizedModded() { return Lookup[(rnd.Next(99999) + 1) % 100 % 11]; } //Evk's Solution, large Lookup private string[] LargeLookup; private void InitLargeLookup() { LargeLookup = new string[100000]; for (int i = 0; i < 100000; i++) { LargeLookup[i] = i % 100 % 11 == 0 ? "doubles" : "none"; } } public string RollLargeLookup() { return LargeLookup[rnd.Next(99999) + 1]; } //Thomas Ayoub's Solution public string RollLargeLookupParameterlessRandom() { return LargeLookup[rnd.Next() % 100000]; } //Alois Kraus's Solution public string RollNumbers() { int roll = rnd.Next(1, 100000); int lastDigit = roll % 10; int secondLastDigit = (roll / 10) % 10; if (lastDigit == secondLastDigit) { return "doubles"; } else { return "none"; } } //Ivan Stoev's Solution public string FastestOptimizedRandomModded() { return ((int)(rnd.Next() * (99999.0 / int.MaxValue)) + 1) % 100 % 11 == 0 ? CONST_DOUBLES : CONST_NONE; } //Ivan Stoev's Alternate Solution public string RollLargeLookupOptimizedRandom() { return LargeLookup[(int)(rnd.Next() * (99999.0 / int.MaxValue))]; } //Evk's Solution using FastRandom public string RollLargeLookupFastRandom() { return LargeLookup[fastRnd.Next(99999) + 1]; } //My Own Test, using FastRandom + NextUInt public string RollLargeLookupFastRandomUInt() { return LargeLookup[fastRnd.NextUInt() % 99999 + 1]; } 

额外的FastRandom类:

 //FastRandom's part used for the testing public class FastRandom { // The +1 ensures NextDouble doesn't generate 1.0 const double REAL_UNIT_INT = 1.0 / ((double)int.MaxValue + 1.0); const double REAL_UNIT_UINT = 1.0 / ((double)uint.MaxValue + 1.0); const uint Y = 842502087, Z = 3579807591, W = 273326509; uint x, y, z, w; #region Constructors /// <summary> /// Initialises a new instance using time dependent seed. /// </summary> public FastRandom() { // Initialise using the system tick count. Reinitialise((int)Environment.TickCount); } /// <summary> /// Initialises a new instance using an int value as seed. /// This constructor signature is provided to maintain compatibility with /// System.Random /// </summary> public FastRandom(int seed) { Reinitialise(seed); } #endregion #region Public Methods [Reinitialisation] /// <summary> /// Reinitialises using an int value as a seed. /// </summary> /// <param name="seed"></param> public void Reinitialise(int seed) { // The only stipulation stated for the xorshift RNG is that at least one of // the seeds x,y,z,w is non-zero. We fulfill that requirement by only allowing // resetting of the x seed x = (uint)seed; y = Y; z = Z; w = W; } #endregion #region Public Methods [System.Random functionally equivalent methods] /// <summary> /// Generates a random int over the range 0 to int.MaxValue-1. /// MaxValue is not generated in order to remain functionally equivalent to System.Random.Next(). /// This does slightly eat into some of the performance gain over System.Random, but not much. /// For better performance see: /// /// Call NextInt() for an int over the range 0 to int.MaxValue. /// /// Call NextUInt() and cast the result to an int to generate an int over the full Int32 value range /// including negative values. /// </summary> /// <returns></returns> public int Next() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); // Handle the special case where the value int.MaxValue is generated. This is outside of // the range of permitted values, so we therefore call Next() to try again. uint rtn = w & 0x7FFFFFFF; if (rtn == 0x7FFFFFFF) return Next(); return (int)rtn; } /// <summary> /// Generates a random int over the range 0 to upperBound-1, and not including upperBound. /// </summary> /// <param name="upperBound"></param> /// <returns></returns> public int Next(int upperBound) { if (upperBound < 0) throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=0"); uint t = (x ^ (x << 11)); x = y; y = z; z = w; // The explicit int cast before the first multiplication gives better performance. // See comments in NextDouble. return (int)((REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * upperBound); } /// <summary> /// Generates a random int over the range lowerBound to upperBound-1, and not including upperBound. /// upperBound must be >= lowerBound. lowerBound may be negative. /// </summary> /// <param name="lowerBound"></param> /// <param name="upperBound"></param> /// <returns></returns> public int Next(int lowerBound, int upperBound) { if (lowerBound > upperBound) throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=lowerBound"); uint t = (x ^ (x << 11)); x = y; y = z; z = w; // The explicit int cast before the first multiplication gives better performance. // See comments in NextDouble. int range = upperBound - lowerBound; if (range < 0) { // If range is <0 then an overflow has occured and must resort to using long integer arithmetic instead (slower). // We also must use all 32 bits of precision, instead of the normal 31, which again is slower. return lowerBound + (int)((REAL_UNIT_UINT * (double)(w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))) * (double)((long)upperBound - (long)lowerBound)); } // 31 bits of precision will suffice if range<=int.MaxValue. This allows us to cast to an int and gain // a little more performance. return lowerBound + (int)((REAL_UNIT_INT * (double)(int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * (double)range); } /// <summary> /// Generates a random double. Values returned are from 0.0 up to but not including 1.0. /// </summary> /// <returns></returns> public double NextDouble() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; // Here we can gain a 2x speed improvement by generating a value that can be cast to // an int instead of the more easily available uint. If we then explicitly cast to an // int the compiler will then cast the int to a double to perform the multiplication, // this final cast is a lot faster than casting from a uint to a double. The extra cast // to an int is very fast (the allocated bits remain the same) and so the overall effect // of the extra cast is a significant performance improvement. // // Also note that the loss of one bit of precision is equivalent to what occurs within // System.Random. return (REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))); } /// <summary> /// Fills the provided byte array with random bytes. /// This method is functionally equivalent to System.Random.NextBytes(). /// </summary> /// <param name="buffer"></param> public void NextBytes(byte[] buffer) { // Fill up the bulk of the buffer in chunks of 4 bytes at a time. uint x = this.x, y = this.y, z = this.z, w = this.w; int i = 0; uint t; for (int bound = buffer.Length - 3; i < bound; ) { // Generate 4 bytes. // Increased performance is achieved by generating 4 random bytes per loop. // Also note that no mask needs to be applied to zero out the higher order bytes before // casting because the cast ignores thos bytes. Thanks to Stefan Troschьtz for pointing this out. t = (x ^ (x << 11)); x = y; y = z; z = w; w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); buffer[i++] = (byte)w; buffer[i++] = (byte)(w >> 8); buffer[i++] = (byte)(w >> 16); buffer[i++] = (byte)(w >> 24); } // Fill up any remaining bytes in the buffer. if (i < buffer.Length) { // Generate 4 bytes. t = (x ^ (x << 11)); x = y; y = z; z = w; w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); buffer[i++] = (byte)w; if (i < buffer.Length) { buffer[i++] = (byte)(w >> 8); if (i < buffer.Length) { buffer[i++] = (byte)(w >> 16); if (i < buffer.Length) { buffer[i] = (byte)(w >> 24); } } } } this.x = x; this.y = y; this.z = z; this.w = w; } // /// <summary> // /// A version of NextBytes that uses a pointer to set 4 bytes of the byte buffer in one operation // /// thus providing a nice speedup. The loop is also partially unrolled to allow out-of-order-execution, // /// this results in about a x2 speedup on an AMD Athlon. Thus performance may vary wildly on different CPUs // /// depending on the number of execution units available. // /// // /// Another significant speedup is obtained by setting the 4 bytes by indexing pDWord (eg pDWord[i++]=w) // /// instead of adjusting it dereferencing it (eg *pDWord++=w). // /// // /// Note that this routine requires the unsafe compilation flag to be specified and so is commented out by default. // /// </summary> // /// <param name="buffer"></param> // public unsafe void NextBytesUnsafe(byte[] buffer) // { // if(buffer.Length % 8 != 0) // throw new ArgumentException("Buffer length must be divisible by 8", "buffer"); // // uint x=this.x, y=this.y, z=this.z, w=this.w; // // fixed(byte* pByte0 = buffer) // { // uint* pDWord = (uint*)pByte0; // for(int i=0, len=buffer.Length>>2; i < len; i+=2) // { // uint t=(x^(x<<11)); // x=y; y=z; z=w; // pDWord[i] = w = (w^(w>>19))^(t^(t>>8)); // // t=(x^(x<<11)); // x=y; y=z; z=w; // pDWord[i+1] = w = (w^(w>>19))^(t^(t>>8)); // } // } // // this.x=x; this.y=y; this.z=z; this.w=w; // } #endregion #region Public Methods [Methods not present on System.Random] /// <summary> /// Generates a uint. Values returned are over the full range of a uint, /// uint.MinValue to uint.MaxValue, inclusive. /// /// This is the fastest method for generating a single random number because the underlying /// random number generator algorithm generates 32 random bits that can be cast directly to /// a uint. /// </summary> /// <returns></returns> public uint NextUInt() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))); } /// <summary> /// Generates a random int over the range 0 to int.MaxValue, inclusive. /// This method differs from Next() only in that the range is 0 to int.MaxValue /// and not 0 to int.MaxValue-1. /// /// The slight difference in range means this method is slightly faster than Next() /// but is not functionally equivalent to System.Random.Next(). /// </summary> /// <returns></returns> public int NextInt() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; return (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))); } // Buffer 32 bits in bitBuffer, return 1 at a time, keep track of how many have been returned // with bitBufferIdx. uint bitBuffer; uint bitMask = 1; /// <summary> /// Generates a single random bit. /// This method's performance is improved by generating 32 bits in one operation and storing them /// ready for future calls. /// </summary> /// <returns></returns> public bool NextBool() { if (bitMask == 1) { // Generate 32 more bits. uint t = (x ^ (x << 11)); x = y; y = z; z = w; bitBuffer = w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); // Reset the bitMask that tells us which bit to read next. bitMask = 0x80000000; return (bitBuffer & bitMask) == 0; } return (bitBuffer & (bitMask >>= 1)) == 0; } #endregion } 

测试场景:

 public delegate string RollDelegate(); private void Test() { List<string> rollMethodNames = new List<string>(){ "Large Lookup Fast Random UInt", "Large Lookup Fast Random", "Large Lookup Optimized Random", "Fastest Optimized Random Modded", "Numbers", "Large Lookup Parameterless Random", "Large Lookup", "Lookup Optimized Modded", "Fastest Optimized Modded", "Optimized Modded Const", "Optimized Modded", "Modded", "Simple", "Another simple with HashSet", "Another Simple", "Option (Compiled) Regex", "Regex", "EndsWith", }; List<RollDelegate> rollMethods = new List<RollDelegate>{ RollLargeLookupFastRandomUInt, RollLargeLookupFastRandom, RollLargeLookupOptimizedRandom, FastestOptimizedRandomModded, RollNumbers, RollLargeLookupParameterlessRandom, RollLargeLookup, RollLookupOptimizedModded, FastestOptimizedModded, RollOptimizedModdedConst, RollOptimizedModded, RollModded, RollSimple, RollSimpleHashSet, RollAnotherSimple, RollOptionRegex, RollRegex, RollEndsWith }; int trial = 10000000; InitLargeLookup(); for (int k = 0; k < rollMethods.Count; ++k) { rnd = new Random(10000); fastRnd = new FastRandom(10000); logBox.GetTimeLapse(); for (int i = 0; i < trial; ++i) rollMethods[k](); logBox.WriteTimedLogLine(rollMethodNames[k] + ": " + logBox.GetTimeLapse()); } } 

结果(首选32位):

 [2016-05-30 08:20:54.056 UTC] Large Lookup Fast Random UInt: 219 ms [2016-05-30 08:20:54.296 UTC] Large Lookup Fast Random: 238 ms [2016-05-30 08:20:54.524 UTC] Large Lookup Optimized Random: 228 ms [2016-05-30 08:20:54.810 UTC] Fastest Optimized Random Modded: 286 ms [2016-05-30 08:20:55.347 UTC] Numbers: 537 ms [2016-05-30 08:20:55.596 UTC] Large Lookup Parameterless Random: 248 ms [2016-05-30 08:20:55.916 UTC] Large Lookup: 320 ms [2016-05-30 08:20:56.231 UTC] Lookup Optimized Modded: 315 ms [2016-05-30 08:20:56.577 UTC] Fastest Optimized Modded: 345 ms [2016-05-30 08:20:57.049 UTC] Optimized Modded Const: 472 ms [2016-05-30 08:20:57.521 UTC] Optimized Modded: 471 ms [2016-05-30 08:20:58.017 UTC] Modded: 496 ms [2016-05-30 08:20:59.685 UTC] Simple: 1668 ms [2016-05-30 08:21:01.824 UTC] Another simple with HashSet: 2138 ms [2016-05-30 08:21:04.837 UTC] Another Simple: 3013 ms [2016-05-30 08:21:13.794 UTC] Option (Compiled) Regex: 8956 ms [2016-05-30 08:21:28.827 UTC] Regex: 15032 ms [2016-05-30 08:21:53.589 UTC] EndsWith: 24763 ms 

结果(Non Prefer 32-Bit):

 [2016-05-30 08:16:00.934 UTC] Large Lookup Fast Random UInt: 273 ms [2016-05-30 08:16:01.230 UTC] Large Lookup Fast Random: 294 ms [2016-05-30 08:16:01.503 UTC] Large Lookup Optimized Random: 273 ms [2016-05-30 08:16:01.837 UTC] Fastest Optimized Random Modded: 333 ms [2016-05-30 08:16:02.245 UTC] Numbers: 408 ms [2016-05-30 08:16:02.532 UTC] Large Lookup Parameterless Random: 287 ms [2016-05-30 08:16:02.816 UTC] Large Lookup: 284 ms [2016-05-30 08:16:03.145 UTC] Lookup Optimized Modded: 329 ms [2016-05-30 08:16:03.486 UTC] Fastest Optimized Modded: 340 ms [2016-05-30 08:16:03.824 UTC] Optimized Modded Const: 337 ms [2016-05-30 08:16:04.154 UTC] Optimized Modded: 330 ms [2016-05-30 08:16:04.524 UTC] Modded: 370 ms [2016-05-30 08:16:05.700 UTC] Simple: 1176 ms [2016-05-30 08:16:07.309 UTC] Another simple with HashSet: 1609 ms [2016-05-30 08:16:09.774 UTC] Another Simple: 2465 ms [2016-05-30 08:16:17.450 UTC] Option (Compiled) Regex: 7675 ms [2016-05-30 08:16:34.090 UTC] Regex: 16640 ms [2016-05-30 08:16:54.793 UTC] EndsWith: 20702 ms 

和图片:

在这里输入图像说明

@StianStandahls朋友在这里。 这个解决方案最快! 这与@Ians答案中以前最快的例子是一样的,但是随机生成器在这里被优化。

 private const string CONST_DOUBLES = "doubles"; private const string CONST_NONE = "none"; public string FastestOptimizedModded() { return (rnd.Next(99999)+1) % 100 % 11 == 0 ? CONST_DOUBLES : CONST_NONE; } 

至于最好的表现,我相信@Ian已经很好的涵盖了。 所有学分都归他。

在Q / A中没有回答我的满意的一件事是,在这种情况下,Regex's胜过EndsWith。 我觉得有必要解释这种差异,所以人们认识到在哪种情况下哪种解决方案可能会更好地工作。

以。。结束

EndsWith的功能基本上是按顺序对部分字符串进行“比较”。 像这样的东西:

 bool EndsWith(string haystack, string needle) { bool equal = haystack.Length >= needle.Length; for (int i=0; i<needle.Length && equal; ++i) { equal = s[i] == needle[needle.Length - haystack.Length + i]; } return equal; } 

代码非常简单, 我们只需要第一个字符,看它是否匹配,然后是下一个等,直到我们到达字符串的末尾。

正则表达式

正则表达式的工作方式不同。 考虑在一个非常大的草垛寻找针“foofoo”。 显而易见的实现是在第一个字符处,检查它是否是“f”,移到下一个字符等,直到到达字符串的末尾。 但是,我们可以做得更好:

仔细看看这个任务。 如果我们先查看字符串的第5个字符,并注意它不是“o”(最后一个字符),我们可以立即跳到第11个字符,再次检查它是否是“o”。 那样的话,在最好的情况下,我们会得到一个很好的改进,比原来的因子6的代码好,在最坏的情况下,性能也是一样的。

另外请注意,正则表达式可以通过“或”等变得更加复杂。如果我们只需要查看尾随字符,则进行正向扫描不再有意义。

这就是为什么正则表达式通常使用NFA编译为DFA的原因。 这里有一个很棒的在线工具: http ://hackingoff.com/compilers/regular-expression-to-nfa-dfa,它显示了这个样子(简单的正则表达式)。

在内部,你可以让.NET使用Reflection.Emit来编译正则表达式,而当你使用正则表达式时,你实际上会评估这个经过优化的编译状态机( RegexOptions.Compiled )。

你可能最终会得到这样的结果:

 bool Matches(string haystack) { char _1; int index = 0; // match (.) state0: if (index >= haystack.Length) { goto stateFail; } _1 = haystack[index]; state = 1; ++index; goto state1; // match \1{1} state1: if (index >= haystack.Length) { goto stateFail; } if (_1 == haystack[index]) { ++index; goto state2; } goto stateFail; // match \1{2,*}$ -- usually optimized away because it always succeeds state1: if (index >= haystack.Length) { goto stateSuccess; } if (_1 == haystack[index]) { ++index; goto state2; } goto stateSuccess; stateSuccess: return true; stateFail: return false; } 

那么什么更快?

那么,这取决于。 从表达式中确定NFA / DFA,编译程序和每次调用查找程序并对其进行评估都会产生开销。 对于非常简单的情况, EndsWith击败了正则表达式。 在这种情况下,' EndsWith '中的'OR'使其比正则表达式慢。

另一方面,正则表达式通常是您多次使用的东西,这意味着您只需要编译一次,只需查看每个调用即可。

由于在这个时候主题已经转移到Random方法微优化,我将集中在LargeLookup实现。

首先, RollLargeLookupParameterlessRandom解决方案除了偏见还有另外一个问题。 所有其他实现检查范围[1,99999]( 包括端点)中的随机数字,即总计99999个数字,而% 100000产生范围[0,999999](即总数100000个数字)。

因此,请更正此问题,并同时通过删除添加操作来优化RollLargeLookup实现:

 private string[] LargeLookup; private void InitLargeLookup() { LargeLookup = new string[99999]; for (int i = 0; i < LargeLookup.Length; i++) { LargeLookup[i] = (i + 1) % 100 % 11 == 0 ? "doubles" : "none"; } } public string RollLargeLookup() { return LargeLookup[rnd.Next(99999)]; } public string RollLargeLookupParameterlessRandom() { return LargeLookup[rnd.Next() % 99999]; } 

现在,我们是否可以进一步优化RollLargeLookupParameterlessRandom实现,同时消除RollLargeLookupParameterlessRandom的偏见问题并使其与其他实现兼容? 事实证明,我们可以。 为了做到这一点,我们需要知道Random.Next(maxValue)实现是这样的:

 return (int)((Next() * (1.0 / int.MaxValue)) * maxValue); 

请注意, 1.0 / int.MaxValue是在编译时评估的常量。 这个想法是用maxValue取代1.0 (在我们的例子中也是常数99999),从而消除了一个乘法。 所以最终的功能是:

 public string RollLargeLookupOptimizedRandom() { return LargeLookup[(int)(rnd.Next() * (99999.0 / int.MaxValue))]; } 

有趣的是,这不仅修复了RollLargeLookupParameterlessRandom问题,而且更快了一点。

实际上,这个优化可以应用于任何其他解决方案,所以最快的非查找实现将是:

 public string FastestOptimizedRandomModded() { return ((int)(rnd.Next() * (99999.0 / int.MaxValue)) + 1) % 100 % 11 == 0 ? CONST_DOUBLES : CONST_NONE; } 

但在显示性能测试之前,请证明结果与Random.Next(maxValue)实现兼容:

 for (int n = 0; n < int.MaxValue; n++) { var n1 = (int)((n * (1.0 / int.MaxValue)) * 99999); var n2 = (int)(n * (99999.0 / int.MaxValue)); Debug.Assert(n1 == n2); } 

最后,我的基准:

64操作系统,发布版本,首选32位=真

 Large Lookup Optimized Random: 149 ms Large Lookup Parameterless Random: 159 ms Large Lookup: 179 ms Lookup Optimized Modded: 231 ms Fastest Optimized Random Modded: 219 ms Fastest Optimized Modded: 251 ms Optimized Modded Const: 412 ms Optimized Modded: 416 ms Modded: 419 ms Simple: 1343 ms Another simple with HashSet: 1805 ms Another Simple: 2690 ms Option (Compiled) Regex: 8538 ms Regex: 14861 ms EndsWith: 39117 ms 

64操作系统,发布构建,首选32位=假

 Large Lookup Optimized Random: 121 ms Large Lookup Parameterless Random: 126 ms Large Lookup: 156 ms Lookup Optimized Modded: 168 ms Fastest Optimized Random Modded: 154 ms Fastest Optimized Modded: 186 ms Optimized Modded Const: 178 ms Optimized Modded: 180 ms Modded: 202 ms Simple: 795 ms Another simple with HashSet: 1287 ms Another Simple: 2178 ms Option (Compiled) Regex: 7246 ms Regex: 17090 ms EndsWith: 36554 ms 

如果对所有可能的值预生成整个查找表,那么可以排除更多的性能。 这将以最快的方法避免两个模块划分,所以会更快一些:

 private string[] LargeLookup; private void Init() { LargeLookup = new string[100000]; for (int i = 0; i < 100000; i++) { LargeLookup[i] = i%100%11 == 0 ? "doubles" : "none"; } } 

而这个方法本身就是:

 public string RollLargeLookup() { return LargeLookup[rnd.Next(99999) + 1]; } 

虽然看起来有点收敛 – 这种方法经常使用。 例如,最快速的扑克牌手评估器预先生成了数千个条目(具有非常聪明的技巧)的庞大阵列,然后在这个阵列上进行几个简单的查找来评估一个扑克手的优势。

通过使用可选的随机数发生器,您甚至可以更快地完成任务。 例如,如果用此 FastRandom类实现(基于xorshift算法)替换System.Random,它将快两倍。

If implement both large lookup table and FastRandom – on my computer it shows 100ms vs 220ms of RollLookupOptimizedModded.

Here is the source code of FastRandom class mentioned in my link above:

 public class FastRandom { // The +1 ensures NextDouble doesn't generate 1.0 const double REAL_UNIT_INT = 1.0 / ((double)int.MaxValue + 1.0); const double REAL_UNIT_UINT = 1.0 / ((double)uint.MaxValue + 1.0); const uint Y = 842502087, Z = 3579807591, W = 273326509; uint x, y, z, w; #region Constructors /// <summary> /// Initialises a new instance using time dependent seed. /// </summary> public FastRandom() { // Initialise using the system tick count. Reinitialise((int)Environment.TickCount); } /// <summary> /// Initialises a new instance using an int value as seed. /// This constructor signature is provided to maintain compatibility with /// System.Random /// </summary> public FastRandom(int seed) { Reinitialise(seed); } #endregion #region Public Methods [Reinitialisation] /// <summary> /// Reinitialises using an int value as a seed. /// </summary> /// <param name="seed"></param> public void Reinitialise(int seed) { // The only stipulation stated for the xorshift RNG is that at least one of // the seeds x,y,z,w is non-zero. We fulfill that requirement by only allowing // resetting of the x seed x = (uint)seed; y = Y; z = Z; w = W; } #endregion #region Public Methods [System.Random functionally equivalent methods] /// <summary> /// Generates a random int over the range 0 to int.MaxValue-1. /// MaxValue is not generated in order to remain functionally equivalent to System.Random.Next(). /// This does slightly eat into some of the performance gain over System.Random, but not much. /// For better performance see: /// /// Call NextInt() for an int over the range 0 to int.MaxValue. /// /// Call NextUInt() and cast the result to an int to generate an int over the full Int32 value range /// including negative values. /// </summary> /// <returns></returns> public int Next() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); // Handle the special case where the value int.MaxValue is generated. This is outside of // the range of permitted values, so we therefore call Next() to try again. uint rtn = w & 0x7FFFFFFF; if (rtn == 0x7FFFFFFF) return Next(); return (int)rtn; } /// <summary> /// Generates a random int over the range 0 to upperBound-1, and not including upperBound. /// </summary> /// <param name="upperBound"></param> /// <returns></returns> public int Next(int upperBound) { if (upperBound < 0) throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=0"); uint t = (x ^ (x << 11)); x = y; y = z; z = w; // The explicit int cast before the first multiplication gives better performance. // See comments in NextDouble. return (int)((REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * upperBound); } /// <summary> /// Generates a random int over the range lowerBound to upperBound-1, and not including upperBound. /// upperBound must be >= lowerBound. lowerBound may be negative. /// </summary> /// <param name="lowerBound"></param> /// <param name="upperBound"></param> /// <returns></returns> public int Next(int lowerBound, int upperBound) { if (lowerBound > upperBound) throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=lowerBound"); uint t = (x ^ (x << 11)); x = y; y = z; z = w; // The explicit int cast before the first multiplication gives better performance. // See comments in NextDouble. int range = upperBound - lowerBound; if (range < 0) { // If range is <0 then an overflow has occured and must resort to using long integer arithmetic instead (slower). // We also must use all 32 bits of precision, instead of the normal 31, which again is slower. return lowerBound + (int)((REAL_UNIT_UINT * (double)(w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))) * (double)((long)upperBound - (long)lowerBound)); } // 31 bits of precision will suffice if range<=int.MaxValue. This allows us to cast to an int and gain // a little more performance. return lowerBound + (int)((REAL_UNIT_INT * (double)(int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * (double)range); } /// <summary> /// Generates a random double. Values returned are from 0.0 up to but not including 1.0. /// </summary> /// <returns></returns> public double NextDouble() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; // Here we can gain a 2x speed improvement by generating a value that can be cast to // an int instead of the more easily available uint. If we then explicitly cast to an // int the compiler will then cast the int to a double to perform the multiplication, // this final cast is a lot faster than casting from a uint to a double. The extra cast // to an int is very fast (the allocated bits remain the same) and so the overall effect // of the extra cast is a significant performance improvement. // // Also note that the loss of one bit of precision is equivalent to what occurs within // System.Random. return (REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))); } /// <summary> /// Fills the provided byte array with random bytes. /// This method is functionally equivalent to System.Random.NextBytes(). /// </summary> /// <param name="buffer"></param> public void NextBytes(byte[] buffer) { // Fill up the bulk of the buffer in chunks of 4 bytes at a time. uint x = this.x, y = this.y, z = this.z, w = this.w; int i = 0; uint t; for (int bound = buffer.Length - 3; i < bound;) { // Generate 4 bytes. // Increased performance is achieved by generating 4 random bytes per loop. // Also note that no mask needs to be applied to zero out the higher order bytes before // casting because the cast ignores thos bytes. Thanks to Stefan Troschьtz for pointing this out. t = (x ^ (x << 11)); x = y; y = z; z = w; w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); buffer[i++] = (byte)w; buffer[i++] = (byte)(w >> 8); buffer[i++] = (byte)(w >> 16); buffer[i++] = (byte)(w >> 24); } // Fill up any remaining bytes in the buffer. if (i < buffer.Length) { // Generate 4 bytes. t = (x ^ (x << 11)); x = y; y = z; z = w; w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); buffer[i++] = (byte)w; if (i < buffer.Length) { buffer[i++] = (byte)(w >> 8); if (i < buffer.Length) { buffer[i++] = (byte)(w >> 16); if (i < buffer.Length) { buffer[i] = (byte)(w >> 24); } } } } this.x = x; this.y = y; this.z = z; this.w = w; } // /// <summary> // /// A version of NextBytes that uses a pointer to set 4 bytes of the byte buffer in one operation // /// thus providing a nice speedup. The loop is also partially unrolled to allow out-of-order-execution, // /// this results in about a x2 speedup on an AMD Athlon. Thus performance may vary wildly on different CPUs // /// depending on the number of execution units available. // /// // /// Another significant speedup is obtained by setting the 4 bytes by indexing pDWord (eg pDWord[i++]=w) // /// instead of adjusting it dereferencing it (eg *pDWord++=w). // /// // /// Note that this routine requires the unsafe compilation flag to be specified and so is commented out by default. // /// </summary> // /// <param name="buffer"></param> // public unsafe void NextBytesUnsafe(byte[] buffer) // { // if(buffer.Length % 8 != 0) // throw new ArgumentException("Buffer length must be divisible by 8", "buffer"); // // uint x=this.x, y=this.y, z=this.z, w=this.w; // // fixed(byte* pByte0 = buffer) // { // uint* pDWord = (uint*)pByte0; // for(int i=0, len=buffer.Length>>2; i < len; i+=2) // { // uint t=(x^(x<<11)); // x=y; y=z; z=w; // pDWord[i] = w = (w^(w>>19))^(t^(t>>8)); // // t=(x^(x<<11)); // x=y; y=z; z=w; // pDWord[i+1] = w = (w^(w>>19))^(t^(t>>8)); // } // } // // this.x=x; this.y=y; this.z=z; this.w=w; // } #endregion #region Public Methods [Methods not present on System.Random] /// <summary> /// Generates a uint. Values returned are over the full range of a uint, /// uint.MinValue to uint.MaxValue, inclusive. /// /// This is the fastest method for generating a single random number because the underlying /// random number generator algorithm generates 32 random bits that can be cast directly to /// a uint. /// </summary> /// <returns></returns> public uint NextUInt() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))); } /// <summary> /// Generates a random int over the range 0 to int.MaxValue, inclusive. /// This method differs from Next() only in that the range is 0 to int.MaxValue /// and not 0 to int.MaxValue-1. /// /// The slight difference in range means this method is slightly faster than Next() /// but is not functionally equivalent to System.Random.Next(). /// </summary> /// <returns></returns> public int NextInt() { uint t = (x ^ (x << 11)); x = y; y = z; z = w; return (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))); } // Buffer 32 bits in bitBuffer, return 1 at a time, keep track of how many have been returned // with bitBufferIdx. uint bitBuffer; uint bitMask = 1; /// <summary> /// Generates a single random bit. /// This method's performance is improved by generating 32 bits in one operation and storing them /// ready for future calls. /// </summary> /// <returns></returns> public bool NextBool() { if (bitMask == 1) { // Generate 32 more bits. uint t = (x ^ (x << 11)); x = y; y = z; z = w; bitBuffer = w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); // Reset the bitMask that tells us which bit to read next. bitMask = 0x80000000; return (bitBuffer & bitMask) == 0; } return (bitBuffer & (bitMask >>= 1)) == 0; } #endregion } 

Then you need to initialize it together with your Random:

 Random rnd = new Random(10000); FastRandom fastRnd = new FastRandom(10000); 

And method becomes:

 public string RollLargeLookup() { return LargeLookup[fastRnd.Next(99999) + 1]; } 

As several others have already pointed out string comparisons for numbers are not efficient.

  public static String RollNumbers() { int roll = rnd.Next(1, 100000); int lastDigit = roll % 10; int secondLastDigit = (roll / 10) % 10; if( lastDigit == secondLastDigit ) { return "doubles"; } else { return "none"; } } 

That will run on my machine in 50ms vs the 1200ms of the original approach. Most time is spent on allocating many small temporary objects. If you can you should get rid of strings in the first place. If that is your hot code path it can help to convert your data structures into something which is more expensive to create but very cheap to query. Lookup tables which have been shown here are a good start.

If you look closely to the LargeLookup implementation you will find that most of its good performance is because it does cheat by not using a string as key but it uses the inital random number with some calculations as index. If you try my solution it will most likely run faster because lookup tables tend to have bad cache coherency which makes memory accesses more expensive.

The fastest that I was able to achieve is optimizing the use of Random with the large lookup method:

 return LargeLookup[rnd.Next() % 100000]; 

And it runs 20% faster than the original since it avoid a division (look at Next() code vs Next(int maxValue) ).


Looking for real fairness IMHO , I changed a bit the way the method where tested.

TL; DR; here's the dasboard:

 |-----------------Name---------------|--Avg--|--Min--|---Max---| |------------------------------------|-------|-------|---------| |RollLargeLookup | 108| 122| 110,2| |RollLookupOptimizedModded | 141| 156| 145,5| |RollOptimizedModdedConst | 156| 159| 156,7| |RollOptimizedModded | 158| 163| 159,8| |RollNumbers | 197| 214| 200,9| |RollSimple | 1 242| 1 304| 1 260,8| |RollSimpleHashSet | 1 635| 1 774| 1 664,6| |RollAnotherSimple | 2 544| 2 732| 2 603,2| |RollOptionRegex | 9 137| 9 605| 9 300,6| |RollRegex | 17 510| 18 873| 17 959 | |RollEndsWith | 20 725| 22 001| 21 196,1| 

I changed a few points:

  • Pre-computed the numbers to test so each method were tested with the same set of numbers (taking out the random generation war and the biais I introduced);
  • Runned each method 10 times in a random order;
  • Introduced a parameter in each function;
  • Removed the dupes.

I created a class MethodToTest:

 public class MethodToTest { public delegate string RollDelegate(int number); public RollDelegate MethodDelegate { get; set; } public List<long> timeSpent { get; set; } public MethodToTest() { timeSpent = new List<long>(); } public string TimeStats() { return string.Format("Min: {0}ms, Max: {1}ms, Avg: {2}ms", timeSpent.Min(), timeSpent.Max(), timeSpent.Average()); } } 

Here's the main content:

 private static void Test() { List<MethodToTest> methodList = new List<MethodToTest> { new MethodToTest{ MethodDelegate = RollNumbers}, new MethodToTest{ MethodDelegate = RollLargeLookup}, new MethodToTest{ MethodDelegate = RollLookupOptimizedModded}, new MethodToTest{ MethodDelegate = RollOptimizedModdedConst}, new MethodToTest{ MethodDelegate = RollOptimizedModded}, new MethodToTest{ MethodDelegate = RollSimple}, new MethodToTest{ MethodDelegate = RollSimpleHashSet}, new MethodToTest{ MethodDelegate = RollAnotherSimple}, new MethodToTest{ MethodDelegate = RollOptionRegex}, new MethodToTest{ MethodDelegate = RollRegex}, new MethodToTest{ MethodDelegate = RollEndsWith}, }; InitLargeLookup(); Stopwatch s = new Stopwatch(); Random rnd = new Random(); List<int> Randoms = new List<int>(); const int trial = 10000000; const int numberOfLoop = 10; for (int j = 0; j < numberOfLoop; j++) { Console.Out.WriteLine("Loop: " + j); Randoms.Clear(); for (int i = 0; i < trial; ++i) Randoms.Add(rnd.Next(1, 100000)); // Shuffle order foreach (MethodToTest method in methodList.OrderBy(m => new Random().Next())) { s.Restart(); for (int i = 0; i < trial; ++i) method.MethodDelegate(Randoms[i]); method.timeSpent.Add(s.ElapsedMilliseconds); Console.Out.WriteLine("\tMethod: " +method.MethodDelegate.Method.Name); } } File.WriteAllLines(@"C:\Users\me\Desktop\out.txt", methodList.OrderBy(m => m.timeSpent.Average()).Select(method => method.MethodDelegate.Method.Name + ": " + method.TimeStats())); } 

这里是功能:

 //OP's Solution 2 public static String RollRegex(int number) { return Regex.IsMatch(number.ToString(), @"(.)\1{1,}$") ? "doubles" : "none"; } //Radin Gospodinov's Solution static readonly Regex OptionRegex = new Regex(@"(.)\1{1,}$", RegexOptions.Compiled); public static String RollOptionRegex(int number) { return OptionRegex.IsMatch(number.ToString()) ? "doubles" : "none"; } //OP's Solution 1 public static String RollEndsWith(int number) { if (number.ToString().EndsWith("11") || number.ToString().EndsWith("22") || number.ToString().EndsWith("33") || number.ToString().EndsWith("44") || number.ToString().EndsWith("55") || number.ToString().EndsWith("66") || number.ToString().EndsWith("77") || number.ToString().EndsWith("88") || number.ToString().EndsWith("99") || number.ToString().EndsWith("00")) { return "doubles"; } return "none"; } //Ian's Solution public static String RollSimple(int number) { string rollString = number.ToString(); return number > 10 && rollString[rollString.Length - 1] == rollString[rollString.Length - 2] ? "doubles" : "none"; } //Ian's Other Solution static List<string> doubles = new List<string>() { "00", "11", "22", "33", "44", "55", "66", "77", "88", "99" }; public static String RollAnotherSimple(int number) { string rollString = number.ToString(); return rollString.Length > 1 && doubles.Contains(rollString.Substring(rollString.Length - 2)) ? "doubles" : "none"; } //Dandré's Solution static HashSet<string> doublesHashset = new HashSet<string>() { "00", "11", "22", "33", "44", "55", "66", "77", "88", "99" }; public static String RollSimpleHashSet(int number) { string rollString = number.ToString(); return rollString.Length > 1 && doublesHashset.Contains(rollString.Substring(rollString.Length - 2)) ? "doubles" : "none"; } //Stian Standahl optimizes modded solution public static string RollOptimizedModded(int number) { return number % 100 % 11 == 0 ? "doubles" : "none"; } //Gjermund Grøneng's method with constant addition private const string CONST_DOUBLES = "doubles"; private const string CONST_NONE = "none"; public static string RollOptimizedModdedConst(int number) { return number % 100 % 11 == 0 ? CONST_DOUBLES : CONST_NONE; } //Corak's Solution, added on Gjermund Grøneng's private static readonly string[] Lookup = { "doubles", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none" }; public static string RollLookupOptimizedModded(int number) { return Lookup[number % 100 % 11]; } //Evk's Solution, large Lookup private static string[] LargeLookup; private static void InitLargeLookup() { LargeLookup = new string[100000]; for (int i = 0; i < 100000; i++) { LargeLookup[i] = i % 100 % 11 == 0 ? "doubles" : "none"; } } public static string RollLargeLookup(int number) { return LargeLookup[number]; } //Alois Kraus's Solution public static string RollNumbers(int number) { int lastDigit = number % 10; int secondLastDigit = (number / 10) % 10; return lastDigit == secondLastDigit ? "doubles" : "none"; }