Comprehensive Guide to String Operations and Algorithms

Strings are fundamental data structures that store sequences of characters. In C++, strings are zero-indexed and their length can be obtained using len = s.size(). Strings can also be implemented as character arrays with len = strlen(s).

Basic String Operations

Insretion

C++ strings support various insertion methods:

string s = "abcd";
char ch = 'e';
s += ch;  // s becomes "abcde"

string s = "abcd", ch = "wxyz";
s.insert(2, ch);  // Insert at position 2, s becomes "abwxyzcd"
s.insert(2, ch, 1, 3);  // Insert substring of ch, s becomes "abxyzcd"
char ch1 = 'x';
s.insert(2, 3, ch1);  // Insert 3 copies of ch1, s becomes "abxxxcd"

Deletion

string s = "abcdefg";
s.erase(2, 4);  // Delete substring starting at position 2 with length 4, s becomes "abg"

Replacement

string s = "abcdefg", ch = "xyz";
s.replace(2, 4, ch);  // Replace substring with ch, s becomes "abxyzg"

String Algorithms

Hashing

Hash tables map values to indices using a hash function. For integers, we use a prime modulus and handle collisions with chaining:

#include<bits/stdc++.h>
using namespace std;
const int maxn = 1e5 + 10;
int head[maxn];
int A, p, tot = 0;
struct Node {
    int nxt, val;
};
Node h[maxn];

bool isPrime(int x) {
    if (x == 1) return false;
    if (x == 2) return true;
    if (x % 2 == 0) return false;
    for (int j = 3; j <= sqrt(x); j += 2)
        if (x % j == 0) return false;
    return true;
}

void insert(int x) {
    int k = (x % p + p) % p;
    h[++tot].val = x;
    h[tot].nxt = head[k];
    head[k] = tot;
}

bool find(int x) {
    for (int j = head[x % p]; j != -1; j = h[j].nxt)
        if (h[j].val == x) return true;
    return false;
}

int main() {
    scanf("%d", &A);
    memset(head, -1, sizeof(head));
    for (p = A + 1; ; p++)
        if (isPrime(p)) break;
    for (int i = 1; i <= A; i++) {
        int val;
        scanf("%d", &val);
        insert(val);
    }
    return 0;
}

String Hashing

String hashing converts strings to numeric values using a base (typically 131) and handles large numbers with unsigned 64-bit integers:

#include<bits/stdc++.h>
using namespace std;
typedef unsigned long long ull;
const int maxn = 50050;
int mod = 50021;
struct HashNode {
    int nxt;
    ull val;
};
HashNode edge[maxn];
int p = 131, n;
int head[maxn], tot = 0;

ull computeHash(string s) {
    ull num = 0;
    for (int j = 0; j < s.size(); j++)
        num = num * p + (ull)(s[j] - 'a' + 1);
    return num;
}

int hashIndex(ull x) {
    return (x % mod + mod) % mod;
}

bool insertHash(ull x) {
    int k = hashIndex(x);
    for (int j = head[k]; j != -1; j = edge[j].nxt)
        if (edge[j].val == x) return true;
    edge[++tot].val = x;
    edge[tot].nxt = head[k];
    head[k] = tot;
    return false;
}

int main() {
    scanf("%d", &n);
    memset(head, -1, sizeof(head));
    for (int i = 1; i <= n; i++) {
        string x;
        cin >> x;
        ull res = computeHash(x);
        if (insertHash(res))
            printf("%d\n", i);
    }
    return 0;
}

KMP Pattern Matching

KMP algorithm finds pattern occurrences in text with O(n+m) complexity:

void computeNext(string pattern, vector<int>& nxt) {
    int m = pattern.size();
    nxt[1] = 0;
    int j = 0;
    for (int i = 2; i <= m; i++) {
        while (j > 0 && pattern[i-1] != pattern[j])
            j = nxt[j];
        if (pattern[i-1] == pattern[j])
            j++;
        nxt[i] = j;
    }
}

void kmpSearch(string text, string pattern) {
    int n = text.size(), m = pattern.size();
    vector<int> nxt(m+1);
    computeNext(pattern, nxt);
    
    int j = 0, ans = 0;
    for (int i = 1; i <= n; i++) {
        while (j > 0 && (j == m || text[i-1] != pattern[j]))
            j = nxt[j];
        if (text[i-1] == pattern[j])
            j++;
        if (j == m) {
            ans++;
            printf("%d\n", i - m + 1);
        }
    }
    printf("%d\n", ans);
}

Minimum Representation

Finds the lexicographically smallest rotation of a string:

int findMinRepresentation(string s) {
    int n = s.size();
    s += s;  // Create doubled string
    int i = 0, j = 1, k = 0;
    
    while (i < n && j < n && k < n) {
        if (s[i+k] == s[j+k])
            k++;
        else {
            if (s[i+k] > s[j+k])
                i = i + k + 1;
            else
                j = j + k + 1;
            if (i == j) i++;
            k = 0;
        }
    }
    return min(i, j);
}

Trie Tree

A trie enables efficient string storage and retrieval:

const int ALPHABET = 26;
const int MAXN = 1000005;

int trie[MAXN][ALPHABET];
bool isEnd[MAXN];
int tot = 1;

void insert(string s) {
    int p = 1;
    for (int i = 0; i < s.size(); i++) {
        int ch = s[i] - 'a';
        if (trie[p][ch] == 0)
            trie[p][ch] = ++tot;
        p = trie[p][ch];
    }
    isEnd[p] = true;
}

bool search(string s) {
    int p = 1;
    for (int i = 0; i < s.size(); i++) {
        int ch = s[i] - 'a';
        if (trie[p][ch] == 0)
            return false;
        p = trie[p][ch];
    }
    return isEnd[p];
}

01 Trie

A binary trie for maximum XOR queries:

const int MAX_BITS = 32;
const int MAXN = 1000005;

int trie[MAXN][2];
int tot = 1;

void insert(int x) {
    int p = 1;
    for (int i = MAX_BITS; i >= 1; i--) {
        int bit = (x >> (i-1)) & 1;
        if (trie[p][bit] == 0)
            trie[p][bit] = ++tot;
        p = trie[p][bit];
    }
}

int query(int x) {
    int p = 1, result = 0;
    for (int i = MAX_BITS; i >= 1; i--) {
        int bit = (x >> (i-1)) & 1;
        if (trie[p][bit ^ 1] != 0) {
            result += (1 << (i-1));
            p = trie[p][bit ^ 1];
        } else {
            p = trie[p][bit];
        }
    }
    return result;
}

AC Automaton

Combines trie structure with KMP for multi-pattern matching:

const int ALPHABET = 26;
const int MAXN = 1000005;

int trie[MAXN][ALPHABET];
int fail[MAXN];
int flag[MAXN];
int tot = 1;

void insert(string s) {
    int p = 0;
    for (int i = 0; i < s.size(); i++) {
        int ch = s[i] - 'a';
        if (trie[p][ch] == 0)
            trie[p][ch] = ++tot;
        p = trie[p][ch];
    }
    flag[p]++;
}

void buildFail() {
    queue<int> q;
    for (int i = 0; i < ALPHABET; i++)
        if (trie[0][i])
            q.push(trie[0][i]), fail[trie[0][i]] = 0;
    
    while (!q.empty()) {
        int p = q.front(); q.pop();
        for (int i = 0; i < ALPHABET; i++) {
            if (trie[p][i] == 0)
                trie[p][i] = trie[fail[p]][i];
            else {
                fail[trie[p][i]] = trie[fail[p]][i];
                q.push(trie[p][i]);
            }
        }
    }
}

Suffix Arrays and Automata

Advanced string indexing structures for pattern matching and substring analysis.

Tags: string-algorithms KMP hash Trie ac-automaton

Posted on Sat, 27 Jun 2026 17:47:12 +0000 by healthnut