DP optimization often feels like an arcane art. The question arises: can anyone actually devise such solutions during a programming contest? (Perhaps I'm still learning this skill.)
The core ideas I've encountered fall into these categories:
When transitioning from state i to i+1, the number of states that change is small, so we can inherit values directly to save computation. Only a limited number of states contribute meaningfully to the answer. By exploiting certain properties, we can eliminate redundant states to maintain acceptable time complexity.
Problem 1: Brave CHAIN (ABC176F)
A straightforward O(n³) DP solution exists: let f[i][a][b] represent the maximum score achievable when processing the first 3×i+2 cards while preserving the pair (a, b). There are three types of transitions:
No replacement: f[i][a][b] = f[i-1][a][b] + [c == d == e] Replace one card: f[i][a][c] = f[i-1][a][b] + [b == d == e] Replace two cards: f[i][c][d] = f[i-1][a][b] + [a == b == e]
Observation: not many states actually get affected by these transitions. When multiple states are affected, it's only because c == d == e holds true. This suggests the set of affected states remains small.
Let's analyze which states f[i][a][b] get updated by each transition type:
For the first type, every state f[i][a][b] depends on whether [c == d == e] is true. Since c, d, and e are fixed values, we can maintain a global additive flag instead of updating all states individually.
For the second type, only states in column c are affected. If [b == d == e] holds, the source state is deterministic. Otherwise, for each fixed a, we update f[i][a][c] to the maximum value from that row—simply maintain row-wise maximums.
For the third type, if [a == b == e] holds, both sides are determined, allowing direct transition. Otherwise, all states should udpate to the global maximum, which we can maintain with a global flag.
Implementation
#include <bits/stdc++.h>
using namespace std;
const int MAXV = 2005;
int N, dp[MAXV][MAXV], cards[MAXV * 3];
inline void updateMax(int& target, int value) {
if (target < value) target = value;
}
struct Transition {
int x, y, val;
};
void applyUpdate(int a, int b, int w) {
updateMax(dp[a][b], w);
updateMax(dp[a][0], w);
updateMax(dp[0][0], w);
}
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
cin >> N;
memset(dp, -0x3f, sizeof(dp));
int bonusCount = 0;
for (int i = 1; i <= 3 * N; i++) {
cin >> cards[i];
}
applyUpdate(cards[1], cards[2], 0);
applyUpdate(cards[2], cards[1], 0);
for (int i = 1; i < N; i++) {
queue<Transition> pending;
int c = cards[3 * i];
int d = cards[3 * i + 1];
int e = cards[3 * i + 2];
int currentBonus = 0;
if (c == d && d == e) {
currentBonus++;
bonusCount++;
}
// Transitions involving column c
for (int a = 1; a <= N; a++) {
if (d == e) {
pending.push({a, c, dp[a][d] + 1});
}
pending.push({a, c, dp[a][0]});
if (c == e) {
pending.push({a, d, dp[a][c] + 1});
}
pending.push({a, d, dp[a][0]});
if (d == c) {
pending.push({a, e, dp[a][d] + 1});
}
pending.push({a, e, dp[a][0]});
}
// Transitions between columns c, d, e
pending.push({c, d, dp[e][e] + 1});
pending.push({c, d, dp[0][0]});
pending.push({c, e, dp[d][d] + 1});
pending.push({c, e, dp[0][0]});
pending.push({d, e, dp[c][c] + 1});
pending.push({d, e, dp[0][0]});
while (!pending.empty()) {
Transition cur = pending.front();
pending.pop();
int a = cur.x, b = cur.y, w = cur.val - currentBonus;
applyUpdate(a, b, w);
applyUpdate(b, a, w);
}
}
int answer = 0;
for (int a = 1; a <= N; a++) {
for (int b = 1; b <= N; b++) {
updateMax(answer, dp[a][b] + (a == b && b == cards[N * 3]));
}
}
cout << answer + bonusCount;
return 0;
}
Problem 2: Shik and Travel (AGC007E)
This is a remarkable problem that demonstrates sophisticated DP optimization!
First, note that the answer exhibits monotonicity, allowing us to binary search on the value V and convert the problem into a decision problem.
Key observation: each edge can only be traversed twice. This means when entering a subtree, you must visit all nodes within before leaving that subtree. When processing a subtree, we complete one child subtree, travel from one leaf to enother leaf in the second child, finish processing that subtree, and then exit.
We only need to track two distances: from node u to the first leaf, and from the last leaf back to u. Define f[u][a][b] as whether there's a valid processing order with cost ≤ V, where the distance from u to the first leaf is a, and the distance from the last leaf to u is b.
The state transition becomes:
f[u][a][b] = f[left][a - da][i - da] & f[right][j - db][b - db] & [i + j ≤ V]
Further optimization requires noticing a subtle but crucial property: if x₁ ≤ x₂ and y₁ ≤ y₂, then f[x₂][y₂] is strictly dominated by f[x₁][y₁] under the partial order. We can safely prune such dominated states, keeping only the Pareto-optimal ones.
For any node u, the remaining useful states satisfy that a increases monotonically while b decreases monotonically. This allows us to merge information from two child subtrees using a two-pointer technique.
What's the resulting time complexity? Let sz[u] denote the number of useful states stored at node u. Each transition produces at most 2 × min(sz[left], sz[right]) new states. This behavier is equivalent to a small-to-large merge (DSU on tree), yielding O(n log n) overall complexity.
Implementation
#include <bits/stdc++.h>
#define ll long long
#define pii pair<ll, ll>
using namespace std;
const int MAXN = 200005;
int nodeCount;
int child[MAXN][2];
ll edgeDist[MAXN][2];
vector<pii> leftStates[MAXN], rightStates[MAXN], merged[MAXN];
ll limitValue;
void clearVec(vector<pii>& v) {
vector<pii>().swap(v);
}
void combineStates(int u) {
int j = 0;
vector<pii> temp;
for (size_t i = 0; i < leftStates[u].size(); i++) {
while (j < (int)rightStates[u].size() &&
(rightStates[u][j].first < leftStates[u][i].first ||
(rightStates[u][j].first == leftStates[u][i].first &&
rightStates[u][j].second < leftStates[u][i].second))) {
temp.push_back(rightStates[u][j++]);
}
temp.push_back(leftStates[u][i]);
}
while (j < (int)rightStates[u].size()) {
temp.push_back(rightStates[u][j++]);
}
if (temp.empty()) return;
int lastIdx = 0;
merged[u].push_back(temp[0]);
for (size_t i = 1; i < temp.size(); i++) {
if (temp[i].second < merged[u][lastIdx].second) {
lastIdx++;
merged[u].push_back(temp[i]);
}
}
}
void solve(int u) {
if (!child[u][1]) {
merged[u].push_back({0LL, 0LL});
return;
}
solve(child[u][0]);
solve(child[u][1]);
int left = child[u][0];
int right = child[u][1];
int ptr = 0;
ll sum = edgeDist[u][0] + edgeDist[u][1];
for (int i = 0; i < (int)merged[left].size(); i++) {
while (ptr < (int)merged[right].size() &&
merged[left][i].second + merged[right][ptr].first + sum <= limitValue) {
ptr++;
}
if (ptr > 0) {
leftStates[u].push_back({
merged[left][i].first + edgeDist[u][0],
merged[right][ptr - 1].second + edgeDist[u][1]
});
}
}
swap(left, right);
ptr = 0;
for (int i = 0; i < (int)merged[left].size(); i++) {
while (ptr < (int)merged[right].size() &&
merged[left][i].second + merged[right][ptr].first + sum <= limitValue) {
ptr++;
}
if (ptr > 0) {
rightStates[u].push_back({
merged[left][i].first + edgeDist[u][1],
merged[right][ptr - 1].second + edgeDist[u][0]
});
}
}
combineStates(u);
clearVec(leftStates[u]);
clearVec(rightStates[u]);
}
bool feasible(ll V) {
for (int i = 1; i <= nodeCount; i++) {
clearVec(leftStates[i]);
clearVec(rightStates[i]);
clearVec(merged[i]);
}
limitValue = V;
solve(1);
return !merged[1].empty();
}
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
cin >> nodeCount;
for (int i = 2; i <= nodeCount; i++) {
int parent, weight;
cin >> parent >> weight;
child[parent][child[parent][1] ? 1 : 0] = i;
edgeDist[parent][child[parent][1] ? 1 : 0] = weight;
child[parent][1]++;
}
ll lo = 0, hi = 3e10, result = 3e10;
while (lo <= hi) {
ll mid = (lo + hi) >> 1;
if (feasible(mid)) {
hi = mid - 1;
result = mid;
} else {
lo = mid + 1;
}
}
cout << result;
return 0;
}
The techniques demonstrated here—selective state inheritance, dominance pruning with Pareto-optimal states, and small-to-large merging—illustrate that unconventional DP optimization often relies on identifying structural properties that limit the effective state space rather than applying generic algorithmic tricks.