Data Loading with PyTorch
PyTorch uses torch.utils.data.DataLoader as the primary interface for efficient data loading. This class enables batched, shuffled, and parallelized data access without overwhelming system memory.
from torch.utils.data import DataLoader, Dataset
class CustomDataset(Dataset):
def __init__(self, features, labels):
self.features = torch.FloatTensor(features)
self.labels = torch.LongTensor(labels)
def __len__(self):
return len(self.features)
def __getitem__(self, idx):
return self.features[idx], self.labels[idx]
# Create dataset and dataloader
dataset = CustomDataset(X_data, y_data)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)
Key DataLoader parameters:
batch_size: Number of samples per batch.shuffle: Randomize sample order each epoch.num_workers: Subprocesses for data loading (0 = main process).drop_last: Discard incomplete final batch if dataset size isn't divisible by batch size.collate_fn: Custom function to merge samples into a batch tensor.
Defining Neural Networks
Networks can be built using either nn.Sequential for simple stacks or custom nn.Module subclasses for complex architectures.
Sequential approach:
import torch.nn as nn
from collections import OrderedDict
model = nn.Sequential(OrderedDict([
('linear1', nn.Linear(784, 128)),
('relu1', nn.ReLU()),
('linear2', nn.Linear(128, 10))
]))
Custom module approach:
class FeedForwardNet(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super().__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
x = torch.relu(self.fc1(x))
return self.fc2(x)
net = FeedForwardNet(784, 256, 10)
Training Loop Implementation
Training involves defining loss functions, optimizers, and iterating through data batches:
import torch.nn.functional as F
model.train()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
for epoch in range(num_epochs):
for inputs, targets in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
Validation and Metrics
Evaluation disables gradient computation for efficiency:
model.eval()
total_correct = 0
total_samples = 0
with torch.no_grad():
for inputs, targets in val_loader:
outputs = model(inputs)
predictions = outputs.argmax(dim=1)
total_correct += (predictions == targets).sum().item()
total_samples += targets.size(0)
accuracy = total_correct / total_samples
Model Persistence
Save only model parameters (recommended):
# Save
torch.save(model.state_dict(), 'model_weights.pth')
# Load
model = FeedForwardNet(784, 256, 10)
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()
Alternatively, save the entire model (less flexible):
torch.save(model, 'full_model.pth')
loaded_model = torch.load('full_model.pth')
Early Stopping Implementation
Track validation loss to prevent overfitting:
best_val_loss = float('inf')
patience_counter = 0
for epoch in range(max_epochs):
# ... training step ...
# Validation
val_loss = compute_validation_loss(model, val_loader)
if val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), 'best_model.pth')
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= patience_limit:
break