Problem Overview
Online community comment systems often face the challenge of filtering offensive or inappropriate content. This implementation demonstrates how to build a text classification system using Naive Bayes to automatically identify malicious comments. The classifier categorizes input into two classes: acceptable (0) and offensive (1).
The approach involves converting text documents into numerical feature vectors, then computing conditional probabilities based on these vectors to build an effective classifier.
Converting Text to Feature Vectors
The first step requires transforming raw text into a format suitable for mathematical operations. Each document gets converted into a binary vector where each position corresponds to a unique word from the vocabulary. A value of 1 indicates the word exists in the document, while 0 indicates absence.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
def load_training_data():
"""Load sample comment data with labels."""
comments = [
['my', 'dog', 'has', 'flea', 'problem', 'help', 'please'],
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'garbage'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']
]
labels = [0, 1, 0, 1, 0, 1] # 1: offensive, 0: acceptable
return comments, labels
def build_vocabulary(comments):
"""Extract unique words from all comments."""
unique_words = set()
for comment in comments:
unique_words = unique_words | set(comment)
return list(unique_words)
def convert_to_vector(vocabulary, comment):
"""Convert a comment into a binary vector based on vocabulary."""
vector = [0] * len(vocabulary)
for word in comment:
if word in vocabulary:
vector[vocabulary.index(word)] = 1
else:
print(f"Warning: '{word}' not found in vocabulary")
return vector
Testing the vector conversion:
>>> import spamfilter
>>> sample_comments, sample_labels = spamfilter.load_training_data()
>>> word_list = spamfilter.build_vocabulary(sample_comments)
>>> word_list
['cute', 'love', 'help', 'garbage', 'quit', 'I', 'stop', 'is', 'park',
'flea', 'dalmation', 'licks', 'food', 'not', 'him', 'buying', 'posting',
'has', 'worthless', 'ate', 'to', 'maybe', 'please', 'dog', 'how', 'stupid',
'so', 'take', 'mr', 'problem', 'steak', 'my']
>>> spamfilter.convert_to_vector(word_list, sample_comments[0])
[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1]
Training the Naive Bayes Classifier
With feature vectors prepared, the next phase computes probabilities from the training data. The classifier calculates the probability of each class and the conditional probability of each word given each class.
def train_classifier(feature_matrix, category_labels):
"""Train Naive Bayes classifier and return probability vectors."""
num_documents = len(feature_matrix)
num_features = len(feature_matrix[0])
# Calculate probability of offensive content
prob_offensive = sum(category_labels) / float(num_documents)
# Initialize probability matrices with Laplace smoothing
offensive_counts = np.ones(num_features)
acceptable_counts = np.ones(num_features)
offensive_denom = 2.0
acceptable_denom = 2.0
for i in range(num_documents):
if category_labels[i] == 1:
offensive_counts += feature_matrix[i]
offensive_denom += sum(feature_matrix[i])
else:
acceptable_counts += feature_matrix[i]
acceptable_denom += sum(feature_matrix[i])
# Compute log probabilities to prevent underflow
prob_vector_offensive = np.log(offensive_counts / offensive_denom)
prob_vector_acceptable = np.log(acceptable_counts / acceptable_denom)
return prob_vector_acceptable, prob_vector_offensive, prob_offensive
Examining the internal calculations:
>>> num_docs = len(train_matrix)
>>> num_words = len(train_matrix[0])
>>> np.ones(num_words)
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
>>> for i in range(num_docs):
... if labels[i] == 1:
... offensive_counts += train_matrix[i]
... offensive_denom += sum(train_matrix[i])
... else:
... acceptable_counts += train_matrix[i]
... acceptable_denom += sum(train_matrix[i])
>>> acceptable_counts
array([2., 2., 2., 1., 1., 2., 2., 2., 1., 2., 2., 2., 1., 1., 3., 1.,
1., 2., 1., 2., 2., 1., 2., 2., 2., 1., 2., 1., 2., 2., 2., 4.])
>>> acceptable_denom
26.0
Classification and Testing
The trained classifier can now predict the category of new comments by computing the posterior probability for each class.
def predict_category(comment_vector, prob_acceptable, prob_offensive, prob_class_offensive):
"""Classify a comment vector using Naive Bayes decision rule."""
score_offensive = sum(comment_vector * prob_offensive) + np.log(prob_class_offensive)
score_acceptable = sum(comment_vector * prob_acceptable) + np.log(1.0 - prob_class_offensive)
return 1 if score_offensive > score_acceptable else 0
def evaluate_classifier():
"""Run test cases through the trained classifier."""
comments, labels = load_training_data()
vocabulary = build_vocabulary(comments)
# Build training matrix
training_features = []
for comment in comments:
training_features.append(convert_to_vector(vocabulary, comment))
# Train the classifier
p_acceptable, p_offensive, p_abuse = train_classifier(
np.array(training_features),
np.array(labels)
)
# Test case 1: Positive comment
test_comment = ['love', 'my', 'dalmation']
test_vector = np.array(convert_to_vector(vocabulary, test_comment))
prediction = predict_category(test_vector, p_acceptable, p_offensive, p_abuse)
print(f"{test_comment} classified as: {prediction}")
# Test case 2: Offensive comment
test_comment = ['stupid', 'garbage']
test_vector = np.array(convert_to_vector(vocabulary, test_comment))
prediction = predict_category(test_vector, p_acceptable, p_offensive, p_abuse)
print(f"{test_comment} classified as: {prediction}")
Running the test:
>>> spamfilter.evaluate_classifier()
['love', 'my', 'dalmation'] classified as: 0
['stupid', 'garbage'] classified as: 1
Key Implementation Details
Two modifications improve the classifier's reliability:
Laplace Smoothing: Initializing word counts to 1 instead of 0 prevents zero probabilities that would make the entire product zero. The denominator gets initialized to 2 to account for this adjustment.
Logarithmic Probabilities: Multiplying many small probabilitise causes numerical udnerflow. Taking the logarithm of probabilities transforms multiplication into addition: log(P(A)*P(B)) = log(P(A)) + log(P(B)), preserving the relative ordering of probabilities while avoiding underflow issues.
The classifier successfully distinguishes between acceptable comments about pets and offensive comments containing insulting language.