Python Random Number Prediction and RSA Factorization
Challenge 1: Reversing Python's Mersenne Twister
The vulnerable code snippet reveals a critical security flaw:
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad
import random
flag = b'WKCTF{}'
pad_flag = pad(flag, 16)
key = random.randbytes(16)
cipher = AES.new(key, AES.MODE_ECB)
print(cipher.encrypt(pad_flag))
with open('random.txt', 'w') as f:
for i in range(2496):
f.write(str(random.getrandbits(8)) + '\n')
The getrandbits(8) function extracts the high 8 bits from cosnecutive 32-bit outputs of the underlying PRNG. This means each output corresponds to the top byte of a getrandbits(32) call.
Internal Mechanics of Python's Random Module
When examining getrandbits(64), it becomes clear that 64-bit outputs are simply two consecutive 32-bit values concatenated, with the later value appearing first in the result.
import random
from Crypto.Util.number import *
random.seed(1)
print(hex(random.getrandbits(32))[2:]) # 2265b1f5
print(hex(random.getrandbits(32))[2:]) # 91b7584a
random.seed(1)
print(hex(random.getrandbits(64))[2:]) # 91b7584a2265b1f5
Similarly, randbytes(4) produces little-endian formatted output from the underlying 32-bit generator, while randbytes(8) is simply two 4-byte blocks concatenated in normal order.
Constructing the State Recovery Matrix
The relationship between the original 624-element state (X), the transformation matrix (T), and the observed 19968-byte leak (Z) follows: XT = Z, where all operations occur in GF(2).
To construct T, we set X to unit vectors representing each bit position and observe the corresponding outputs:
from sage.all import *
from random import Random
from tqdm import *
prng = Random()
length = 19968
def myState():
state = [0] * 624
i = 0
while i < length:
ind = i // 32
expont = i % 32
state[ind] = 1 << (31 - expont)
s = (3, tuple(state + [0]), None)
yield s
state[ind] = 0
i += 1
def getRow():
rng = Random()
gs = myState()
for i in range(length):
s = next(gs)
rng.setstate(s)
data = []
for i in range(length // 8):
data.extend(list(bin(rng.getrandbits(8))[2:].zfill(8)))
data = [int(i) for i in data]
row = vector(GF(2), data)
yield row
def buildBox():
b = matrix(GF(2), length, length)
rg = getRow()
for i in tqdm(range(length)):
b[i] = next(rg)
return b
Reconstructing the Internal State
Once T is constructed, solving XT = Z recovers the bit representation, which we then parse into 624 32-bit state elements:
def recoverState(T, leak):
x = T.solve_left(leak)
x = ''.join([str(i) for i in x.list()])
state = []
for i in range(624):
tmp = int(x[i * 32:(i + 1) * 32], 2)
state.append(tmp)
return state
Since the matrix T may not be full rank, the recovered state has two possible values for the first element. We determine the correct one by testing against the known leaked data:
def backfirst(state):
high = 0x80000000
low = 0x7fffffff
mask = 0x9908b0df
tmp = state[623] ^ state[396]
if tmp & high == high:
tmp ^= mask
tmp <<= 1
tmp |= 1
else:
tmp <<= 1
return (1 << 32 - 1) | tmp & low, tmp & low
With the complete 624-element state, we can predict all future outputs. To recover the original 16-byte AES key, we backtrack from the state:
from random import Random
from Crypto.Cipher import AES
from extend_mt19937_predictor import ExtendMT19937Predictor
state = [2922114156, 2886276701, 1168768544, ...] # Recovered state
predictor = ExtendMT19937Predictor()
for i in range(624):
predictor.setrandbits(state[i], 32)
# Get the 16-byte key (little-endian)
key = predictor.backtrack_getrandbits(16 * 8).to_bytes(16, 'little')
# Decrypt the flag
c = b'a\x93\xdc\xc3\x90\x0cK\xfa\xfb\x1c\x05$y\x16:\xfc\xf3+\xf8+%\xfe\xf9\x86\xa3\x17i+ab\xca\xb6\xcd\r\xa5\x94\xeavm\xdeo\xa7\xdf\xa9D\n\x02\xa3'
aes = AES.new(key, AES.MODE_ECB)
print(aes.decrypt(c))
# WKCTF{3f2af637b773613c18d27694f20d98fd}
Alternative Backtracking Implementation
For environments without ExtendMT19937Predictor, manual backtracking works as follows:
def inverse_right(res, shift, bits=32):
tmp = res
for i in range(bits // shift):
tmp = res ^ tmp >> shift
return tmp
def inverse_left_values(res, shift, mask, bits=32):
tmp = res
for i in range(bits // shift):
tmp = res ^ tmp << shift & mask
return tmp
def invert_temper(m):
m = inverse_right(m, 18)
m = inverse_left_values(m, 15, 4022730752)
m = inverse_left_values(m, 7, 2636928640)
m = inverse_right(m, 11)
return m
def recover_state(out):
return [invert_temper(i) for i in out]
Challenge 2: RSA Factorization via Mathematical Insight
The challenge providse RSA parameters with a specially constructed modulus:
from Crypto.Util.number import *
from sympy import *
table = string.ascii_letters + string.digits + "@?!*"
def myprime():
num = 0
for i in tqdm(permutations(table), total=factorial(len(table))):
temp = "".join(list(i))
if("flag" in temp or "FLAG" in temp or "f14G" in temp or "7!@9" in temp):
num += 1
return nextprime(num)
m = bytes_to_long(flag)
n = myprime() * getPrime(300)
c = pow(m, 65537, n)
The key observation is that the prime p is constructed from permutations containing forbidden substrings. The formula involves:
- 63! × 4 - 60! × 4 + 57!
This simplifies to:
- 1982608315404440064116146708361898137544773690227268628106279599612729753600000000000000 × 4
- minus 8320987112741390144276341183223364380754172606361245952449277696409600000000000000 × 4
- plus 40526919504877216755680601905432322134980384796226602145184481280000000000000
from Crypto.Util.number import *
from sympy import *
calc = 1982608315404440064116146708361898137544773690227268628106279599612729753600000000000000 * 4 - 8320987112741390144276341183223364380754172606361245952449277696409600000000000000 * 4 + 40526919504877216755680601905432322134980384796226602145184481280000000000000
p = nextprime(calc)
n = 10179374723747373757354331803486491859701644330006662145185130847839571647703918266478112837755004588085165750997749893646933873398734236153637724985137304539453062753420396973717
c = 1388132475577742501308652898326761622837921103707698682051295277382930035244575886211234081534946870195081797116999020335515058810721612290772127889245497723680133813796299680596
q = n // p
phi = (p - 1) * (q - 1)
d = inverse(65537, phi)
print(long_to_bytes(pow(c, d, n)))
The mathematical construction of the prime p allows us to factor n and decrypt the flag.