Fast prime-generators with python

Euler project „Project Euler“ http://projecteuler.net inspired some coders to think about prime generation in python. Even if the primes are already known, the pure python implementations are interesting to get information about fast computation models for big number arrays.

Das „Project Euler“ http://projecteuler.net hat eine Reihe Programmierer inspiriert, sich mit der Frage der Erzeugung von Primzahlen auseinanderzusetzen. Das ist in erster Linie ein akademisches Unterfangen, weil die entsprechenden Primzahlen (<10**10 u.Ä.) natürlich bereits bekannt sind. Trotzdem eine spannende Aufgabe, fand ich, und habe eine Routine programmiert, von der ich hoffte, sie könne im Vergleich mithalten, indem sie durch Deduction die Anzahl der Kalkulationen drückt. Hat nur bedingt geklappt.

Hier ist der Sieger-Code von Robert William Hanks:


def rwh_primes(n):
# http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Returns a list of primes &lt; n """
sieve = [True] * n
for i in xrange(3,int(n**0.5)+1,2):
  if sieve[i]:
  sieve[i*i::2*i]=[False]*((n-i*i-1)/(2*i)+1)
return [2] + [i for i in xrange(3,n,2) if sieve[i]]


def rwh_primes1(n):
# http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Returns a list of primes &lt; n """ 
sieve = [True] * (n/2) 
for i in xrange(3,int(n**0.5)+1,2): 
  if sieve[i/2]: sieve[i*i/2::i] = [False] * ((n-i*i-1)/(2*i)+1)
return [2] + [2*i+1 for i in xrange(1,n/2) if sieve[i]] 


def rwh_primes2(n): 
# http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 
""" Input n&gt;=6, Returns a list of primes, 2 &lt;= p &lt; n """ 
correction = (n%6&gt;1)
n = {0:n,1:n-1,2:n+4,3:n+3,4:n+2,5:n+1}[n%6]
sieve = [True] * (n/3)
sieve[0] = False
for i in xrange(int(n**0.5)/3+1):
  if sieve[i]:
    k=3*i+1|1
  sieve[ ((k*k)/3) ::2*k]=[False]*((n/6-(k*k)/6-1)/k+1)
  sieve[(k*k+4*k-2*k*(i&amp;1))/3::2*k]=[False]*((n/6-(k*k+4*k-2*k*(i&amp;1))/6-1)/k+1)
return [2,3] + [3*i+1|1 for i in xrange(1,n/3-correction) if sieve[i]]

Link zur stackoverflow-Seite

However – meine Lösung ist auf einem MBPro15/i7 als schnellste hinter RWHs Lösungen 1 und 2 dennoch ziemlich gut weggekommen:


def primeSieveSeq(MAX_Int):
    if MAX_Int > 5*10**8:
        import ctypes
        int16Array = ctypes.c_ushort * (MAX_Int >> 1)
        sieve = int16Array()
        #print 'uses ctypes "unsigned short int Array"'
    else:
        sieve = (MAX_Int >> 1) * [False]
        #print 'uses python list() of long long int'
    if MAX_Int < 10**8:
        sieve[4::3] = [True]*((MAX_Int - 8)/6+1)
        sieve[12::5] = [True]*((MAX_Int - 24)/10+1)
    r = [2, 3, 5]
    n = 0
    for i in xrange(int(MAX_Int**0.5)/30+1):
        n += 3
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
        n += 2
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
        n += 1
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
        n += 2
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
        n += 1
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
        n += 2
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
        n += 3
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
        n += 1
        if not sieve[n]:
            n2 = (n << 1) + 1
            r.append(n2)
            n2q = (n2**2) >> 1
            sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1)
    if MAX_Int < 10**8:
        return [2, 3, 5]+[(p << 1) + 1 for p in [n for n in xrange(3, MAX_Int >> 1) if not sieve[n]]]
    n = n >> 1
    try:
        for i in xrange((MAX_Int-2*n)/30 + 1):
            n += 3
            if not sieve[n]:
                r.append((n << 1) + 1)
            n += 2
            if not sieve[n]:
                r.append((n << 1) + 1)
            n += 1
            if not sieve[n]:
                r.append((n << 1) + 1)
            n += 2
            if not sieve[n]:
                r.append((n << 1) + 1)
            n += 1
            if not sieve[n]:
                r.append((n << 1) + 1)
            n += 2
            if not sieve[n]:
                r.append((n << 1) + 1)
            n += 3
            if not sieve[n]:
                r.append((n << 1) + 1)
            n += 1
            if not sieve[n]:
                r.append((n << 1) + 1)
    except:
        pass
    return r

Auf jeden Fall bleibt festzuhalten, dass in python der Fill mit Array Access über Indexranges array[start:stop:step] = [True]*(((MAX_Int >> 1) – n2q – 1) / n2 + 1) jeder Schleifenkontrolle in Bezug auf processing-speed deutlich überlegen ist.