Deep Learning-Based Segmentation and Recognition of Oracle Bone Inscriptions: A Computer Vision Framework

Problem Context and Image Preprocessing Pipeline

Oracle bone inscriptions represent critical archaeological data, yet their digitization faces challenges from material degradation, scanning artifacts, and complex background textures. The primary task involves developing robust computational methods for automated character extraction and identification from high-resolution rubbings.

Multi-Stage Preprocessing Architecture

The preprocessing workflow employs a cascade of transformations to isolate meaningful glyph regions:

import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

def load_source_images(directory_path):
    """Load and validate input images from specified directory"""
    image_collection = []
    for img_path in Path(directory_path).glob("*.jpg"):
        source_data = cv2.imread(str(img_path))
        if source_data is not None:
            image_collection.append((img_path.stem, source_data))
    return image_collection

def adaptive_preprocessing(image_stack):
    """Apply adaptive filtering and enhancement"""
    processed_stack = []
    
    for identifier, raw_frame in image_stack:
        # Convert to grayscale using perceptual weighting
        luminance_channel = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
        
        # Apply bilateral filtering for edge-preserving smoothing
        denoised = cv2.bilateralFilter(luminance_channel, d=9, sigmaColor=75, sigmaSpace=75)
        
        # Adaptive histogram equalization for contrast enhancement
        clahe_operator = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        enhanced = clahe_operator.apply(denoised)
        
        # Sauvola thresholding for robust binarization
        binary_mask = cv2.ximgproc.niBlackThreshold(
            enhanced, maxValue=255, 
            type=cv2.THRESH_BINARY, 
            blockSize=31, k=0.2, 
            binarizationMethod=cv2.ximgproc.BINARIZATION_SAUVOLA
        )
        
        processed_stack.append((identifier, binary_mask))
    
    return processed_stack

def morphological_cleanup(binary_images):
    """Remove residual artifacts using morphological operations"""
    cleaned_results = []
    kernel_structure = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    
    for img_id, binary_map in binary_images:
        # Opening operation to eliminate small speckles
        opened = cv2.morphologyEx(binary_map, cv2.MORPH_OPEN, kernel_structure, iterations=1)
        
        # Closing operation to fill intraglyph gaps
        closed = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel_structure, iterations=2)
        
        cleaned_results.append((img_id, closed))
    
    return cleaned_results

Feature Engineering for Interference Discrimination

Three primary interference patterns require specialized detection:

  1. Stochastic Dot Noise: Characterized by isolated pixel clusters with high circularity variance
  2. Artificial Scratches: Linear structures with consistent orientation and aspect ratio
  3. Material Texture: Periodic patterns requiring frequency domain analysis
from skimage import measure, morphology
from scipy import fftpack

def interference_detection(cleaned_images):
    """Classify and quantify interference patterns"""
    interference_metrics = {}
    
    for img_id, clean_mask in cleaned_images:
        # Connected component analysis
        labeled_regions = measure.label(clean_mask, connectivity=2)
        region_properties = measure.regionprops(labeled_regions)
        
        noise_indicators = []
        for region in region_properties:
            # Geometric feature extraction
            eccentricity = region.eccentricity
            solidity = region.solidity
            extent = region.extent
            area = region.area
            
            # Heuristic-based interference classification
            if area < 50 and eccentricity < 0.3:
                noise_indicators.append(('dot_noise', region))
            elif area > 500 and solidity < 0.6 and extent > 0.8:
                noise_indicators.append(('linear_artifact', region))
        
        interference_metrics[img_id] = noise_indicators
    
    return interference_metrics

Convolutional Segmentation Network Architecture

The segmentation model employs an encoder-decoder topology with skip connections to preserve glyph structural integrity.

Network Design Specifications

import tensorflow as tf
from tensorflow.keras import layers, Model

def build_glyph_segmenter(input_shape=(512, 512, 1)):
    """Construct U-Net variant for character region segmentation"""
    
    # Encoder pathway
    input_layer = layers.Input(shape=input_shape)
    
    conv_block1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(input_layer)
    conv_block1 = layers.BatchNormalization()(conv_block1)
    conv_block1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv_block1)
    pool1 = layers.MaxPooling2D((2, 2))(conv_block1)
    
    conv_block2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
    conv_block2 = layers.BatchNormalization()(conv_block2)
    conv_block2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv_block2)
    pool2 = layers.MaxPooling2D((2, 2))(conv_block2)
    
    conv_block3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
    conv_block3 = layers.BatchNormalization()(conv_block3)
    conv_block3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv_block3)
    
    # Decoder pathway with skip connections
    up_block1 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv_block3)
    merge1 = layers.concatenate([conv_block2, up_block1], axis=3)
    decode_conv1 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(merge1)
    decode_conv1 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(decode_conv1)
    
    up_block2 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(decode_conv1)
    merge2 = layers.concatenate([conv_block1, up_block2], axis=3)
    decode_conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(merge2)
    decode_conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(decode_conv2)
    
    # Output layer
    output_mask = layers.Conv2D(1, (1, 1), activation='sigmoid')(decode_conv2)
    
    model = Model(inputs=input_layer, outputs=output_mask)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    
    return model

Training Strategy with Data Augmentation

def generate_augmented_samples(image_array, mask_array, batch_size=16):
    """Real-time augmentation pipeline"""
    data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.2,
        horizontal_flip=False,
        fill_mode='nearest'
    )
    
    # Combine image and mask for synchronized augmentation
    combined_generator = data_generator.flow(
        np.concatenate([image_array, mask_array], axis=-1),
        batch_size=batch_size,
        seed=42
    )
    
    while True:
        batch = next(combined_generator)
        yield batch[..., :1], batch[..., 1:]

Evaluation Metrics and Validation Framework

Comprehensive assessment requires multi-dimensional metrics beyond conventional accuracy:

def calculate_glyph_metrics(ground_truth, predictions, threshold=0.5):
    """Compute character-level segmentation metrics"""
    
    binary_predictions = (predictions > threshold).astype(np.uint8)
    
    # Pixel-wise metrics
    true_positives = np.sum((ground_truth == 1) & (binary_predictions == 1))
    false_positives = np.sum((ground_truth == 0) & (binary_predictions == 1))
    false_negatives = np.sum((ground_truth == 1) & (binary_predictions == 0))
    
    pixel_precision = true_positives / (true_positives + false_positives + 1e-7)
    pixel_recall = true_positives / (true_positives + false_negatives + 1e-7)
    pixel_f1 = 2 * (pixel_precision * pixel_recall) / (pixel_precision + pixel_recall + 1e-7)
    
    # Character-level metrics using connected components
    gt_labels = measure.label(ground_truth)
    pred_labels = measure.label(binary_predictions)
    
    gt_regions = measure.regionprops(gt_labels)
    pred_regions = measure.regionprops(pred_labels)
    
    # Calculate Intersection over Union (IoU) for each ground truth region
    iou_scores = []
    for gt_region in gt_regions:
        max_iou = 0
        for pred_region in pred_regions:
            intersection = np.sum((gt_labels == gt_region.label) & 
                                 (pred_labels == pred_region.label))
            union = gt_region.area + pred_region.area - intersection
            iou = intersection / (union + 1e-7)
            max_iou = max(max_iou, iou)
        iou_scores.append(max_iou)
    
    character_accuracy = np.mean([score > 0.5 for score in iou_scores])
    
    return {
        'pixel_precision': pixel_precision,
        'pixel_recall': pixel_recall,
        'pixel_f1': pixel_f1,
        'character_accuracy': character_accuracy,
        'mean_iou': np.mean(iou_scores)
    }

Character Recognition with Hybrid Neural Architecture

The recognition phase combines convolutional feature extraction with sequential modeling for glyph classification.

Feature Extraction and Classification Pipeline

def construct_recognition_network(num_classes=1500, input_shape=(64, 64, 1)):
    """Build ResNet-inspired classifier for oracle bone characters"""
    
    def residual_block(x, filters, kernel_size=3):
        """Bottleneck residual module"""
        shortcut = x
        
        # First convolution
        x = layers.Conv2D(filters, kernel_size, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)
        
        # Second convolution
        x = layers.Conv2D(filters, kernel_size, padding='same')(x)
        x = layers.BatchNormalization()(x)
        
        # Projection shortcut if dimensions mismatch
        if shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, (1, 1), padding='same')(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)
        
        x = layers.Add()([x, shortcut])
        x = layers.ReLU()(x)
        
        return x
    
    # Input layer
    input_tensor = layers.Input(shape=input_shape)
    
    # Initial convolution
    x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same')(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    
    # Residual blocks
    x = residual_block(x, 64)
    x = residual_block(x, 128)
    x = residual_block(x, 256)
    
    # Global pooling and classification
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    output_layer = layers.Dense(num_classes, activation='softmax')(x)
    
    recognition_model = Model(inputs=input_tensor, outputs=output_layer)
    recognition_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=5)]
    )
    
    return recognition_model

Inference and Result Generation

def execute_full_pipeline(test_directory, segmentation_model, recognition_model, output_path):
    """End-to-end processing of test image collection"""
    
    results_catalog = []
    
    for image_path in Path(test_directory).glob("*.jpg"):
        # Load and preprocess
        source_image = cv2.imread(str(image_path))
        preprocessed = adaptive_preprocessing([(image_path.stem, source_image)])[0][1]
        
        # Generate segmentation mask
        input_tensor = np.expand_dims(preprocessed, axis=[0, -1])
        segmentation_mask = segmentation_model.predict(input_tensor, verbose=0)
        
        # Extract character regions
        binary_mask = (segmentation_mask[0, ..., 0] > 0.5).astype(np.uint8) * 255
        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        character_predictions = []
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            
            # Filter by size constraints
            if 20 < w < 200 and 20 < h < 200:
                # Extract character ROI
                char_roi = preprocessed[y:y+h, x:x+w]
                char_roi = cv2.resize(char_roi, (64, 64))
                char_roi = char_roi.astype(np.float32) / 255.0
                
                # Recognize character
                recognition_input = np.expand_dims(char_roi, axis=[0, -1])
                prediction_scores = recognition_model.predict(recognition_input, verbose=0)
                predicted_class = np.argmax(prediction_scores)
                confidence = prediction_scores[0, predicted_class]
                
                character_predictions.append({
                    'bbox': (x, y, w, h),
                    'class_id': predicted_class,
                    'confidence': float(confidence)
                })
        
        results_catalog.append({
            'image_id': image_path.stem,
            'character_count': len(character_predictions),
            'detections': character_predictions
        })
    
    # Export structured results
    import json
    with open(output_path, 'w') as export_file:
        json.dump(results_catalog, export_file, indent=2)
    
    return results_catalog

Handling Variant Character Forms

Oracle bone script exhibits significant glyph variations requiring specialized handling:

def build_variant_aware_loss(margin=0.5):
    """Triplet loss for learning character similarity embeddings"""
    
    def variant_loss(y_true, y_pred):
        # Separate anchor, positive, negative embeddings
        embedding_dim = y_pred.shape[-1] // 3
        anchor = y_pred[:, :embedding_dim]
        positive = y_pred[:, embedding_dim:2*embedding_dim]
        negative = y_pred[:, 2*embedding_dim:]
        
        # Compute distances
        pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=1)
        neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=1)
        
        # Triplet margin loss
        basic_loss = pos_dist - neg_dist + margin
        loss = tf.maximum(basic_loss, 0.0)
        
        return tf.reduce_mean(loss)
    
    return variant_loss

This framework provides a comprehensive solution for oracle bone inscription analysis, addressing preprocessing, segmentation, and recognition challenges through modern deep learning techniques while maintaining computational efficiency and result interpretability.

# Example execution workflow
if __name__ == "__main__":
    # Initialize models
    segmenter = build_glyph_segmenter()
    segmenter.load_weights('oracle_segmentation_weights.h5')
    
    recognizer = construct_recognition_network(num_classes=1200)
    recognizer.load_weights('oracle_recognition_weights.h5')
    
    # Process test collection
    final_results = execute_full_pipeline(
        test_directory='./Test',
        segmentation_model=segmenter,
        recognition_model=recognizer,
        output_path='./recognition_results.json'
    )

Posted on Tue, 09 Jun 2026 16:29:31 +0000 by Michael_zz