Extracting Model Parameters
import os
import torch
os.makedirs('weights', exist_ok=True)
model.load_state_dict(torch.load('model_weights.pth'))
item_counter = 0
for param_name, param_tensor in model.state_dict().items():
print(f"{param_name}: {param_tensor.shape}")
with open(f'weights/{item_counter}-{param_name}.txt', 'w') as txt_file:
print(param_tensor.numpy(), file=txt_file)
param_tensor.numpy().tofile(f'weights/{item_counter}-{param_name}.bin')
item_counter += 1
Binary files can be inspected with od -t f4 <filename>.
Capturing Intermediate Activations
import torch
import re
import os
os.makedirs('activations', exist_ok=True)
hook_id = 0
def capture_activations(module, input_data, output_data):
global hook_id
mod_label = str(module).replace(' ', '').replace('\n', '')[:200]
if isinstance(input_data, tuple):
for idx, tensor_in in enumerate(input_data):
if torch.is_tensor(tensor_in):
tensor_in.numpy().tofile(f'activations/{hook_id}-{mod_label}-in-{idx}.bin')
elif torch.is_tensor(input_data):
input_data.numpy().tofile(f'activations/{hook_id}-{mod_label}-in.bin')
output_data.numpy().tofile(f'activations/{hook_id}-{mod_label}-out.bin')
hook_id += 1
def attach_hooks(network):
for _, layer in network.named_modules():
layer.register_forward_hook(capture_activations)
attach_hooks(model)
Symmetric 8-Bit Quantization
Quantizing Wieghts
import numpy as np
import os
bit_depth = 8
quant_max = 2**(bit_depth - 1) - 1
weight_bins = [f for f in os.listdir('weights') if f.endswith('-weight.bin')]
for file_name in weight_bins:
arr_float = np.fromfile(f'weights/{file_name}', dtype=np.float32)
peak = np.max(np.abs(arr_float))
q_scale = peak / quant_max
arr_int8 = np.round(arr_float / q_scale).astype(np.int8)
base, ext = os.path.splitext(file_name)
arr_int8.tofile(f'weights/{base}-q{ext}')
np.array([q_scale], dtype=np.float32).tofile(f'weights/{base}-s{ext}')
Quentizing Activations
import numpy as np
import os
import re
act_files = os.listdir('activations')
input_act_files = [f for f in act_files if re.search(r'-in-\d+\.bin$', f)]
output_act_files = [f for f in act_files if f.endswith('-out.bin')]
all_act_files = input_act_files + output_act_files
for fname in all_act_files:
if '-Linear' not in fname and '-Conv' not in fname:
continue
data_fp32 = np.fromfile(f'activations/{fname}', dtype=np.float32)
max_val = np.max(np.abs(data_fp32))
scale_factor = max_val / quant_max
data_int8 = np.round(data_fp32 / scale_factor).astype(np.int8)
root, extension = os.path.splitext(fname)
data_int8.tofile(f'activations/{root}-q{extension}')
np.array([scale_factor], dtype=np.float32).tofile(f'activations/{root}-s{extension}')