Extracting and Quantizing PyTorch Model Parameters and Activations

Extracting Model Parameters

import os
import torch

os.makedirs('weights', exist_ok=True)
model.load_state_dict(torch.load('model_weights.pth'))

item_counter = 0
for param_name, param_tensor in model.state_dict().items():
    print(f"{param_name}: {param_tensor.shape}")
    with open(f'weights/{item_counter}-{param_name}.txt', 'w') as txt_file:
        print(param_tensor.numpy(), file=txt_file)
    param_tensor.numpy().tofile(f'weights/{item_counter}-{param_name}.bin')
    item_counter += 1

Binary files can be inspected with od -t f4 <filename>.

Capturing Intermediate Activations

import torch
import re
import os

os.makedirs('activations', exist_ok=True)
hook_id = 0

def capture_activations(module, input_data, output_data):
    global hook_id
    mod_label = str(module).replace(' ', '').replace('\n', '')[:200]
    
    if isinstance(input_data, tuple):
        for idx, tensor_in in enumerate(input_data):
            if torch.is_tensor(tensor_in):
                tensor_in.numpy().tofile(f'activations/{hook_id}-{mod_label}-in-{idx}.bin')
    elif torch.is_tensor(input_data):
        input_data.numpy().tofile(f'activations/{hook_id}-{mod_label}-in.bin')
    
    output_data.numpy().tofile(f'activations/{hook_id}-{mod_label}-out.bin')
    hook_id += 1

def attach_hooks(network):
    for _, layer in network.named_modules():
        layer.register_forward_hook(capture_activations)

attach_hooks(model)

Symmetric 8-Bit Quantization

Quantizing Wieghts

import numpy as np
import os

bit_depth = 8
quant_max = 2**(bit_depth - 1) - 1

weight_bins = [f for f in os.listdir('weights') if f.endswith('-weight.bin')]
for file_name in weight_bins:
    arr_float = np.fromfile(f'weights/{file_name}', dtype=np.float32)
    peak = np.max(np.abs(arr_float))
    q_scale = peak / quant_max
    arr_int8 = np.round(arr_float / q_scale).astype(np.int8)
    
    base, ext = os.path.splitext(file_name)
    arr_int8.tofile(f'weights/{base}-q{ext}')
    np.array([q_scale], dtype=np.float32).tofile(f'weights/{base}-s{ext}')

Quentizing Activations

import numpy as np
import os
import re

act_files = os.listdir('activations')
input_act_files = [f for f in act_files if re.search(r'-in-\d+\.bin$', f)]
output_act_files = [f for f in act_files if f.endswith('-out.bin')]
all_act_files = input_act_files + output_act_files

for fname in all_act_files:
    if '-Linear' not in fname and '-Conv' not in fname:
        continue
        
    data_fp32 = np.fromfile(f'activations/{fname}', dtype=np.float32)
    max_val = np.max(np.abs(data_fp32))
    scale_factor = max_val / quant_max
    data_int8 = np.round(data_fp32 / scale_factor).astype(np.int8)
    
    root, extension = os.path.splitext(fname)
    data_int8.tofile(f'activations/{root}-q{extension}')
    np.array([scale_factor], dtype=np.float32).tofile(f'activations/{root}-s{extension}')

Tags: pytorch Model Inspection Quantization Neural Networks python

Posted on Thu, 14 May 2026 22:53:31 +0000 by Stu