Strings are fundamental data structures that store sequences of characters. In C++, strings are zero-indexed and their length can be obtained using len = s.size(). Strings can also be implemented as character arrays with len = strlen(s).
Basic String Operations
Insretion
C++ strings support various insertion methods:
string s = "abcd";
char ch = 'e';
s += ch; // s becomes "abcde"
string s = "abcd", ch = "wxyz";
s.insert(2, ch); // Insert at position 2, s becomes "abwxyzcd"
s.insert(2, ch, 1, 3); // Insert substring of ch, s becomes "abxyzcd"
char ch1 = 'x';
s.insert(2, 3, ch1); // Insert 3 copies of ch1, s becomes "abxxxcd"
Deletion
string s = "abcdefg";
s.erase(2, 4); // Delete substring starting at position 2 with length 4, s becomes "abg"
Replacement
string s = "abcdefg", ch = "xyz";
s.replace(2, 4, ch); // Replace substring with ch, s becomes "abxyzg"
String Algorithms
Hashing
Hash tables map values to indices using a hash function. For integers, we use a prime modulus and handle collisions with chaining:
#include<bits/stdc++.h>
using namespace std;
const int maxn = 1e5 + 10;
int head[maxn];
int A, p, tot = 0;
struct Node {
int nxt, val;
};
Node h[maxn];
bool isPrime(int x) {
if (x == 1) return false;
if (x == 2) return true;
if (x % 2 == 0) return false;
for (int j = 3; j <= sqrt(x); j += 2)
if (x % j == 0) return false;
return true;
}
void insert(int x) {
int k = (x % p + p) % p;
h[++tot].val = x;
h[tot].nxt = head[k];
head[k] = tot;
}
bool find(int x) {
for (int j = head[x % p]; j != -1; j = h[j].nxt)
if (h[j].val == x) return true;
return false;
}
int main() {
scanf("%d", &A);
memset(head, -1, sizeof(head));
for (p = A + 1; ; p++)
if (isPrime(p)) break;
for (int i = 1; i <= A; i++) {
int val;
scanf("%d", &val);
insert(val);
}
return 0;
}
String Hashing
String hashing converts strings to numeric values using a base (typically 131) and handles large numbers with unsigned 64-bit integers:
#include<bits/stdc++.h>
using namespace std;
typedef unsigned long long ull;
const int maxn = 50050;
int mod = 50021;
struct HashNode {
int nxt;
ull val;
};
HashNode edge[maxn];
int p = 131, n;
int head[maxn], tot = 0;
ull computeHash(string s) {
ull num = 0;
for (int j = 0; j < s.size(); j++)
num = num * p + (ull)(s[j] - 'a' + 1);
return num;
}
int hashIndex(ull x) {
return (x % mod + mod) % mod;
}
bool insertHash(ull x) {
int k = hashIndex(x);
for (int j = head[k]; j != -1; j = edge[j].nxt)
if (edge[j].val == x) return true;
edge[++tot].val = x;
edge[tot].nxt = head[k];
head[k] = tot;
return false;
}
int main() {
scanf("%d", &n);
memset(head, -1, sizeof(head));
for (int i = 1; i <= n; i++) {
string x;
cin >> x;
ull res = computeHash(x);
if (insertHash(res))
printf("%d\n", i);
}
return 0;
}
KMP Pattern Matching
KMP algorithm finds pattern occurrences in text with O(n+m) complexity:
void computeNext(string pattern, vector<int>& nxt) {
int m = pattern.size();
nxt[1] = 0;
int j = 0;
for (int i = 2; i <= m; i++) {
while (j > 0 && pattern[i-1] != pattern[j])
j = nxt[j];
if (pattern[i-1] == pattern[j])
j++;
nxt[i] = j;
}
}
void kmpSearch(string text, string pattern) {
int n = text.size(), m = pattern.size();
vector<int> nxt(m+1);
computeNext(pattern, nxt);
int j = 0, ans = 0;
for (int i = 1; i <= n; i++) {
while (j > 0 && (j == m || text[i-1] != pattern[j]))
j = nxt[j];
if (text[i-1] == pattern[j])
j++;
if (j == m) {
ans++;
printf("%d\n", i - m + 1);
}
}
printf("%d\n", ans);
}
Minimum Representation
Finds the lexicographically smallest rotation of a string:
int findMinRepresentation(string s) {
int n = s.size();
s += s; // Create doubled string
int i = 0, j = 1, k = 0;
while (i < n && j < n && k < n) {
if (s[i+k] == s[j+k])
k++;
else {
if (s[i+k] > s[j+k])
i = i + k + 1;
else
j = j + k + 1;
if (i == j) i++;
k = 0;
}
}
return min(i, j);
}
Trie Tree
A trie enables efficient string storage and retrieval:
const int ALPHABET = 26;
const int MAXN = 1000005;
int trie[MAXN][ALPHABET];
bool isEnd[MAXN];
int tot = 1;
void insert(string s) {
int p = 1;
for (int i = 0; i < s.size(); i++) {
int ch = s[i] - 'a';
if (trie[p][ch] == 0)
trie[p][ch] = ++tot;
p = trie[p][ch];
}
isEnd[p] = true;
}
bool search(string s) {
int p = 1;
for (int i = 0; i < s.size(); i++) {
int ch = s[i] - 'a';
if (trie[p][ch] == 0)
return false;
p = trie[p][ch];
}
return isEnd[p];
}
01 Trie
A binary trie for maximum XOR queries:
const int MAX_BITS = 32;
const int MAXN = 1000005;
int trie[MAXN][2];
int tot = 1;
void insert(int x) {
int p = 1;
for (int i = MAX_BITS; i >= 1; i--) {
int bit = (x >> (i-1)) & 1;
if (trie[p][bit] == 0)
trie[p][bit] = ++tot;
p = trie[p][bit];
}
}
int query(int x) {
int p = 1, result = 0;
for (int i = MAX_BITS; i >= 1; i--) {
int bit = (x >> (i-1)) & 1;
if (trie[p][bit ^ 1] != 0) {
result += (1 << (i-1));
p = trie[p][bit ^ 1];
} else {
p = trie[p][bit];
}
}
return result;
}
AC Automaton
Combines trie structure with KMP for multi-pattern matching:
const int ALPHABET = 26;
const int MAXN = 1000005;
int trie[MAXN][ALPHABET];
int fail[MAXN];
int flag[MAXN];
int tot = 1;
void insert(string s) {
int p = 0;
for (int i = 0; i < s.size(); i++) {
int ch = s[i] - 'a';
if (trie[p][ch] == 0)
trie[p][ch] = ++tot;
p = trie[p][ch];
}
flag[p]++;
}
void buildFail() {
queue<int> q;
for (int i = 0; i < ALPHABET; i++)
if (trie[0][i])
q.push(trie[0][i]), fail[trie[0][i]] = 0;
while (!q.empty()) {
int p = q.front(); q.pop();
for (int i = 0; i < ALPHABET; i++) {
if (trie[p][i] == 0)
trie[p][i] = trie[fail[p]][i];
else {
fail[trie[p][i]] = trie[fail[p]][i];
q.push(trie[p][i]);
}
}
}
}
Suffix Arrays and Automata
Advanced string indexing structures for pattern matching and substring analysis.