Project Euler 153(6)

最初に戻って、素数ガウス整数の素因数を格納するときに、2つ配列を作ってそれぞれに実部と虚部を格納していましたが、よく考えるとそれぞれは10000以下なので、16ビットごとにそれを格納して一つの整数にすればよいですね。
そうすると、それでも250MBくらい、50分かかりました。

from itertools import *
from math import sqrt

def sieve(max_n):
    L = max_n / 100
    a = [ True ] * L
    for p in takewhile(lambda n: n * n < L,
                    (n for n in xrange(2, L) if a[n])):
        for k in xrange(p * 2, L, p):
            a[k] = False
    primes = [ n for n in xrange(2, L) if a[n] ]
    
    for offs in xrange(L, max_n + 1, L):
        a = [ True ] * L
        for p in takewhile(lambda n: n * n < offs + L, primes):
            for m in xrange((offs + p - 1) / p * p, offs + L, p):
                a[m-offs] = False
        primes.extend(k + offs for k in xrange(L)
                                if a[k] and k + offs <= max_n)
    return primes

def gen_pow(n, e):
    m = 1
    yield 1
    for k in xrange(e):
        m *= n
        yield m

def prime_pow_divisors(p, e):
    if p % 4 == 3:
        return ((p ** (e + 1) - 1) / (p - 1),)
    elif p % 4 == 1:
        w = fac_g[(p-5)/6]
        z = (w & 65535) + (w >> 16) * 1j
        a = ((p ** (e + 1) - 1) / (p - 1),)
        for e1 in xrange(1, e + 1):
            w = z ** e1 * (p ** (e - e1 + 1) - 1) / (p - 1)
            a = a + (w, w.conjugate())
        return a
    else:
        return (2 ** (e + 1) - 1, (2 ** e - 1) * (1 + 1j))

def mul_divs(divs1, divs2):
    return [ z1 * z2 for z1 in divs1 for z2 in divs2 ]

def gen_divisors1(n, k0 = 0):
    yield (1, (1,))
    if k0 < len(primes) and n >= primes[k0]:
        g_primes = takewhile(lambda p: p <= n, islice(primes, k0, None))
        for k, p in ((k, p) for k, p in enumerate(g_primes, k0) if p % 4 != 3):
            for e in takewhile(lambda e: p ** e <= n, count(1)):
                divs1 = prime_pow_divisors(p, e)
                for m, divs2 in gen_divisors1(n / p ** e, k + 1):
                    yield p ** e * m, mul_divs(divs1, divs2)

def sum_divisors((n, divs)):
    def s(z):
        if z.imag == 0:
            return int(abs(z.real))
        elif z.real == 0:
            return int(abs(z.imag))
        else:
            return abs(int(z.real)) + abs(int(z.imag))
    
    return sum(s(z) for z in divs) * sum_divisors3(N / n)

def sum_divisors3(n, k0 = 0):
    s = 1
    if k0 < len(primes) and n >= primes[k0]:
        g_primes = takewhile(lambda p: p <= n, islice(primes, k0, None))
        for k, p in ((k, p) for k, p in enumerate(g_primes, k0) if p % 4 == 3):
            for e in takewhile(lambda e: p ** e <= n, count(1)):
                m = p ** e
                s += (m * p - 1) / (p - 1) * sum_divisors3(n / m, k + 1)
    return s

def fac_gauss(N):
    M = int(sqrt(N))
    fac_g = [ 0 ] * (N / 6 + 1)
    for a in xrange(2, M + 1):
        for b in xrange(1, a):
            n = a * a + b * b
            if n <= N and (n % 12 == 1 or n % 12 == 5):
                fac_g[(n-5)/6] = a + (b << 16)
    return fac_g

N = 10 ** 8
primes = sieve(N)
fac_g = fac_gauss(N)
print sum(imap(sum_divisors, gen_divisors1(N)))