Implementing mRMR Feature Selection in MATLAB

Core Implementation Framework

1. Data Preprocessing Module
% Data standardization (Z-score)
function normalized_data = standardize_features(input_data)
    mean_vals = mean(input_data, 1);
    std_devs = std(input_data, 0, 1);
    normalized_data = (input_data - mean_vals) ./ std_devs;
end

% Discretization process (for continuous features)
function discretized_data = convert_to_discrete(input_data, bin_count)
    discretized_data = zeros(size(input_data));
    for feature_idx = 1:size(input_data, 2)
        bin_edges = linspace(min(input_data(:,feature_idx)), max(input_data(:,feature_idx)), bin_count+1);
        discretized_data(:,feature_idx) = discretize(input_data(:,feature_idx), bin_edges);
    end
end

2. Mutual Information Calculation Core Functions
function mi_value = calculate_mutual_info(feature_vector, target_variable)
    % Discrete feature mutual information calculation (based on histogram)
    sample_count = size(feature_vector, 1);
    [joint_distribution, edges_x, edges_y] = histcounts2(feature_vector, target_variable);
    P_joint = joint_distribution / sample_count;
    P_x = histcounts(feature_vector, edges_x) / sample_count;
    P_y = histcounts(target_variable, edges_y) / sample_count;
    
    % Avoid zero probability issues
    P_joint(P_joint < eps) = eps;
    P_x(P_x < eps) = eps;
    P_y(P_y < eps) = eps;
    
    mi_value = sum(sum(P_joint .* log2(P_joint ./ (P_x' * P_y))));
end

% Conditional mutual information calculation (Monte Carlo approximation)
function cond_mi_value = calculate_conditional_mi(feature_vector, target_variable, condition_set)
    sample_count = size(feature_vector, 1);
    iterations = 100; % Monte Carlo sampling iterations
    mi_sum = 0;
    for i = 1:iterations
        random_indices = randperm(sample_count, iterations);
        sampled_features = feature_vector(random_indices, :);
        sampled_targets = target_variable(random_indices);
        sampled_conditions = condition_set(random_indices, :);
        cond_mi_value = cond_mi_value + calculate_mutual_info(sampled_features, sampled_targets) ...
                     - calculate_mutual_info(sampled_conditions, sampled_targets);
    end
    cond_mi_value = cond_mi_value / iterations;
end

3. mRMR Feature Selection Main Program
function [selected_features, relevance_scores] = apply_mrmr_selection(feature_matrix, target_vector, num_features)
    % Input parameters:
    % feature_matrix: n×p feature matrix
    % target_vector: n×1 target variable
    % num_features: number of features to select
    
    [sample_count, feature_count] = size(feature_matrix);
    selected_features = [];
    remaining_features = 1:feature_count;
    
    % Calculate initial mutual information
    mi_values = arrayfun(@(i) calculate_mutual_info(feature_matrix(:,i), target_vector), 1:feature_count);
    
    % Select first feature
    [~, max_idx] = max(mi_values);
    selected_features = [selected_features, max_idx];
    remaining_features(remaining_features == max_idx) = [];
    
    % Iteratively select subsequent features
    for selection_round = 2:num_features
        best_score = -inf;
        best_candidate = 0;
        
        for candidate_idx = remaining_features
            % Calculate conditional mutual information
            cond_mi_sum = 0;
            for selected_idx = selected_features
                cond_mi_sum = cond_mi_sum + calculate_conditional_mi(feature_matrix(:,candidate_idx), target_vector, feature_matrix(:,selected_idx));
            end
            avg_cond_mi = cond_mi_sum / length(selected_features);
            
            % Calculate mRMR score
            current_score = calculate_mutual_info(feature_matrix(:,candidate_idx), target_vector) - avg_cond_mi;
            
            if current_score > best_score
                best_score = current_score;
                best_candidate = candidate_idx;
            end
        end
        
        selected_features = [selected_features, best_candidate];
        remaining_features(remaining_features == best_candidate) = [];
    end
    
    relevance_scores = calculate_mutual_info(feature_matrix, target_vector);
end

Optimization Techniques

1. Computational Efficiency Improvements
  • Parallel computing acceleration: Utilize MATLAB Parallel Computing Toolbox to accelerate conditional mutual information calculations

    parfor candidate_idx = 1:length(remaining_features)
        % Parallel computation of scores for each candidate feature
    end
    
    
  • Approximation algorithms: Use k-nearest neighbors (k-NN) instead of exact probability estimation

    function mi_value = mi_knn_approximation(feature_vector, target_vector, k_neighbors)
        sample_count = size(feature_vector, 1);
        mi_sum = 0;
        for i = 1:sample_count
            % Use k-NN to estimate joint distribution
            [neighbor_indices] = knnsearch(feature_vector, feature_vector(i,:), 'K', k_neighbors+1);
            joint_counts = histcounts2(feature_vector(neighbor_indices(2:end),:), target_vector(neighbor_indices(2:end)));
            feature_counts = histcounts(feature_vector(i,:), linspace(min(feature_vector(:,1)), max(feature_vector(:,1)), k_neighbors));
            target_counts = histcounts(target_vector(i), linspace(min(target_vector), max(target_vector), k_neighbors));
            mi_sum = mi_sum + log2(sum(joint_counts(:)) / (sum(feature_counts)*sum(target_counts)));
        end
        mi_value = mi_sum / sample_count;
    end
    
    
2. High-Dimensional Data Processing
  • Feature pre-screening: First use chi-square test to select top 50% of features

    function preselected_indices = chi2_preselection(feature_matrix, target_vector, selection_ratio)
        h_test = chi2gof(feature_matrix, 'Expected', mean(feature_matrix,1), 'Alpha', 0.05);
        [~, ranking] = sort(h_test.p, 'descend');
        preselected_indices = ranking(1:round(selection_ratio*size(feature_matrix,2)));
    end
    
    
  • Block processing: Divide feature matrix into blocks (100 features per block)

    block_size = 100;
    num_blocks = ceil(feature_count / block_size);
    for block_num = 1:num_blocks
        start_idx = (block_num-1)*block_size + 1;
        end_idx = min(block_num*block_size, feature_count);
        % Process each feature block
    end
    
    

Typical Application Examples

1. Gene Expression Data Analysis
% Load data
load('gene_expression_data.mat');
feature_data = gene_data(:,2:end); % Remove sample ID column
target_labels = gene_data(:,1);     % Disease labels

% Data preprocessing
normalized_features = standardize_features(feature_data);
discrete_features = convert_to_discrete(normalized_features, 10);

% mRMR feature selection
[selected_indices, importance_scores] = apply_mrmr_selection(discrete_features, target_labels, 20);

% Result visualization
bar(importance_scores(selected_indices));
xlabel('Feature Index'); ylabel('Mutual Information Value');
title('mRMR Feature Importance Ranking');

2. Image Texture Feature Selection
% Extract Local Binary Pattern (LBP) features
texture_features = extractLBPFeatures(im2single(image_collection));

% Apply mRMR dimensionality reduction
[selected_indices, ~] = apply_mrmr_selection(texture_features, class_labels, 50);

% Use SVM for classification
classification_model = fitcsvm(texture_features(:,selected_indices), class_labels);

Recommended Toolboxes

  1. Statistics and Machine Learning Toolbox: Contains the fscmrmr function for fast mRMR computation with parlalel computing support

  2. Deep Learning Toolbox:

    • Combines deep feature extraction with feature selection

    • Example code:

      network_layers = [imageInputLayer([28 28 1])
               convolution2dLayer(3,16,'Padding','same')
               reluLayer
               maxPooling2dLayer(2,'Stride',2)
               fullyConnectedLayer(10)
               classificationLayer];
      
      
  3. Image Processing Toolbox: Provides HOG, LBP and other feature extraction functions with automatic image feature dimensionality reduction

For practical applications, it's recommended to use the built-in fscmrmr function for rapid validation before customizing the optimization algorithm based on specific requirements. For extremely large-scale datasets, consider combining Hadoop/Matlab Parallel Server for distributed computing implementation.

Tags: feature selection mRMR algorithm MATLAB mutual information dimensionality reduction

Posted on Tue, 19 May 2026 15:21:56 +0000 by danieliser