Monthly Archives: September 2013

Speed-comparison of two different designs of a prime-number-generator

Eventually i needed some prime numbers to play around with Bloom filtering.
The Python – script (see below) will produce primes below 2**x base number by stepping backward using the step parameter.
python primes.py <number of file> <2**x exponent x> <step> <number of primes to produce>

if the <number of file> parameter is a character output to a file will be omitted and the primes will print to stdout instead.

<number of primes to produce> defaults to 10

I tested the performance of a single threaded and multi-threaded approach on some PC of various age. The outcome was interesting:

primesSpeed

On one hand the script performance benefits very much from improvements of CPU architecture. Nonetheless clock speed and number of real cores seem to be of highest importance.


#! /usr/bin/python
# procuces scripts < 2**x by testing in a by 
import os
import sys
import subprocess
import threading
import pprint
import time

cnt = totcnt = 0
erg = []
max_concurrentThreads = 32
thread_limiter = threading.Semaphore(value=max_concurrentThreads)

# check 4 gmpy 
try:
  # raise
  from gmpy import mpz
  gmpy_ready = True
  print "Message: Using gmpy/GMP mpz.is_prime instead of openSSL prime. (up to 10times faster)"
except:
  gmpy_ready = False
  print """Message: 
  If you try to produce many or really large primes, 
  consider installing GMP ( http://gmplib.org ) 
  and gmpy/gmpy2 ( http:http://www.aleax.it/gmpy.html )"""

class checkr(threading.Thread):

  def __init__(self, num):
    threading.Thread.__init__(self)
    self.num = num

  def run(self):
    global erg, cnt
    thread_limiter.acquire()
    prm = check(self.num)
    if prm:
      erg.append(prm)
      cnt += 1
    thread_limiter.release()

def check(num):
  r = None
  if gmpy_ready:
    try:
      if mpz(num).is_prime() == 1:
        r = num
    except:
      print '#', num, '#'
  else:
    e = subprocess.Popen(['/usr/bin/openSSL', 'prime', num], stdout=subprocess.PIPE)
    if e.communicate()[0][-9:-7] == "is":
      r = num
  return r

def longnumget(start, step):
  start = str(start+step)
  step = 2*step
  lenstep = 2*len(str(step))
  def numSeperator(numAsString, lenstep):
    formatter = '%%0%ii' % lenstep
    frst, secnd = numAsString[:-lenstep], start[-lenstep:]
    diff = int(secnd)-step
    secnd = formatter % diff
    r = frst + secnd
    if diff < 0:
      r = numSeperator(numAsString, lenstep+1)
    return r
  while True:
    start = numSeperator(start, lenstep)
    while start[-1] == '5':
      start = numSeperator(start, lenstep)
    yield start

def shortnumget(start, step):
  start = start+step
  step = 2*step
  while True:
    start -= step
    while str(start)[-1] == '5':
      start -= step
    yield str(start)

if __name__ == '__main__':
  if len(sys.argv) == 5:
    p, seqNum, base2exp, step, count = map(str, sys.argv)
    if count:
      count = int(count) or 10
    step = abs(int(step))
    if step % 2 == 0:
      print("Step must be odd!")
      sys.exit()
    if int(base2exp) < 4000:
      numget = shortnumget
    else:
      numget = longnumget
      print "custom string-cut partitional substraction"
    if gmpy_ready:
      startnum = mpz('0b1'+int(base2exp)*'0', 0)
    else:
      startnum = eval('2**'+base2exp)
    nums = numget(startnum, step)
    sTime = time.time()
    while cnt <= count:
      # uncomment the next 2 lines to test of the single threaded approach
      # check(nums.next())
      # totcnt += 1
      # 
      # uncomment next 5 lines to test the threaded approach 
      startNewThread = threading.active_count() < max_concurrentThreads
      if startNewThread:
        t = checkr(nums.next())
        t.start()
        totcnt += 1

    print 'Time elapsed:', time.time() - sTime, 'seconds'
    print totcnt, 'numbers checked for prime' 
    print len(erg), 'primes found (', round(100.0*len(erg)/totcnt, 1), '%)'

    try:
      seqNum = int(seqNum)
      with file('primes_%s_%sK_%s.txt' % (seqNum, count/1000, base2exp), 'wb') as outFle:
        for i in xrange(len(erg)):
          print >> outFle, '2**%s-%s' % (base2exp, startnum - int(erg[i])), erg[i], hex(int(erg[i]))
    except Exception as e:
      pp = pprint.PrettyPrinter()
      pp.pprint(erg)