Eventually i needed some prime numbers to play around with Bloom filtering.
The Python – script (see below) will produce primes below 2**x base number by stepping backward using the step parameter.
python primes.py <number of file> <2**x exponent x> <step> <number of primes to produce>
if the <number of file> parameter is a character output to a file will be omitted and the primes will print to stdout instead.
<number of primes to produce> defaults to 10
I tested the performance of a single threaded and multi-threaded approach on some PC of various age. The outcome was interesting:
On one hand the script performance benefits very much from improvements of CPU architecture. Nonetheless clock speed and number of real cores seem to be of highest importance.
#! /usr/bin/python
# procuces scripts < 2**x by testing in a by
import os
import sys
import subprocess
import threading
import pprint
import time
cnt = totcnt = 0
erg = []
max_concurrentThreads = 32
thread_limiter = threading.Semaphore(value=max_concurrentThreads)
# check 4 gmpy
try:
# raise
from gmpy import mpz
gmpy_ready = True
print "Message: Using gmpy/GMP mpz.is_prime instead of openSSL prime. (up to 10times faster)"
except:
gmpy_ready = False
print """Message:
If you try to produce many or really large primes,
consider installing GMP ( http://gmplib.org )
and gmpy/gmpy2 ( http:http://www.aleax.it/gmpy.html )"""
class checkr(threading.Thread):
def __init__(self, num):
threading.Thread.__init__(self)
self.num = num
def run(self):
global erg, cnt
thread_limiter.acquire()
prm = check(self.num)
if prm:
erg.append(prm)
cnt += 1
thread_limiter.release()
def check(num):
r = None
if gmpy_ready:
try:
if mpz(num).is_prime() == 1:
r = num
except:
print '#', num, '#'
else:
e = subprocess.Popen(['/usr/bin/openSSL', 'prime', num], stdout=subprocess.PIPE)
if e.communicate()[0][-9:-7] == "is":
r = num
return r
def longnumget(start, step):
start = str(start+step)
step = 2*step
lenstep = 2*len(str(step))
def numSeperator(numAsString, lenstep):
formatter = '%%0%ii' % lenstep
frst, secnd = numAsString[:-lenstep], start[-lenstep:]
diff = int(secnd)-step
secnd = formatter % diff
r = frst + secnd
if diff < 0:
r = numSeperator(numAsString, lenstep+1)
return r
while True:
start = numSeperator(start, lenstep)
while start[-1] == '5':
start = numSeperator(start, lenstep)
yield start
def shortnumget(start, step):
start = start+step
step = 2*step
while True:
start -= step
while str(start)[-1] == '5':
start -= step
yield str(start)
if __name__ == '__main__':
if len(sys.argv) == 5:
p, seqNum, base2exp, step, count = map(str, sys.argv)
if count:
count = int(count) or 10
step = abs(int(step))
if step % 2 == 0:
print("Step must be odd!")
sys.exit()
if int(base2exp) < 4000:
numget = shortnumget
else:
numget = longnumget
print "custom string-cut partitional substraction"
if gmpy_ready:
startnum = mpz('0b1'+int(base2exp)*'0', 0)
else:
startnum = eval('2**'+base2exp)
nums = numget(startnum, step)
sTime = time.time()
while cnt <= count:
# uncomment the next 2 lines to test of the single threaded approach
# check(nums.next())
# totcnt += 1
#
# uncomment next 5 lines to test the threaded approach
startNewThread = threading.active_count() < max_concurrentThreads
if startNewThread:
t = checkr(nums.next())
t.start()
totcnt += 1
print 'Time elapsed:', time.time() - sTime, 'seconds'
print totcnt, 'numbers checked for prime'
print len(erg), 'primes found (', round(100.0*len(erg)/totcnt, 1), '%)'
try:
seqNum = int(seqNum)
with file('primes_%s_%sK_%s.txt' % (seqNum, count/1000, base2exp), 'wb') as outFle:
for i in xrange(len(erg)):
print >> outFle, '2**%s-%s' % (base2exp, startnum - int(erg[i])), erg[i], hex(int(erg[i]))
except Exception as e:
pp = pprint.PrettyPrinter()
pp.pprint(erg)