The core pipeline for resolving slider-based graphical verification challenges relies on capturing a static interface region, isolating the draggable element from the backgorund track, extracting structural features through edge detection, and correlating spatial offsets via template matching. Final displacement values must be calibrated against platform-specific rendering offsets.
Core Processing Workflow
- Region Acquisition: Capture a bounded screen area containing both the puzzle piece and the target groove.
- ROI Segmentation: Split the grayscale frame into two subsets: the foreground fragment and the residual bakcground matrix.
- Feature Extraction: Apply Gaussian smoothing to suppress texture noise, followed by Canny thresholding to isolate high-contrast boundaries.
- Spatial Correlation: Execute normalized cross-correlation (
TM_CCOEFF_NORMED) to locate the optimal alignment coordinate. - Input Simulation: Translate the calculated offset into mouse events, incorporating micro-adjustments to mimic human interaction pattterns.
Reference Implementation (Basic)
import pyautogui as pg
import cv2
import time
# Calibration constant for specific DOM layouts
LAYOUT_OFFSET = 10
def execute_captcha_solve():
time.sleep(3)
# Define capture boundary: (x, y, width, height)
capture_rect = (790, 391, 320, 200)
region_img = pg.screenshot(region=capture_rect)
region_img.save("capture_region.png")
# Load as monochrome for consistent edge analysis
raw_frame = cv2.imread("capture_region.png", cv2.IMREAD_GRAYSCALE)
h, w = raw_frame.shape[:2]
# Segment puzzle piece and background track
piece_width = 50
piece_roi = raw_frame[0:h, 0:piece_width]
bg_roi = raw_frame[0:h, piece_width:w]
# Preprocessing reduces high-frequency noise
blurred_piece = cv2.GaussianBlur(piece_roi, (5, 5), 0)
blurred_bg = cv2.GaussianBlur(bg_roi, (5, 5), 0)
# Boundary highlighting
piece_edges = cv2.Canny(blurred_piece, 100, 200)
bg_edges = cv2.Canny(blurred_bg, 100, 200)
# Template correlation to find overlap position
match_result = cv2.matchTemplate(bg_edges, piece_edges, cv2.TM_CCOEFF_NORMED)
_, max_confidence, _, max_location = cv2.minMaxLoc(match_result)
# Calculate final displacement from top-left anchor
calculated_offset = max_location[0] + piece_width
actual_drag_distance = calculated_offset + LAYOUT_OFFSET
# Simulate precise mouse interaction
target_button_pos = (821, 633)
pg.moveTo(target_button_pos, duration=0.3)
time.sleep(0.1)
pg.mouseDown()
pg.moveRel(actual_drag_distance, 0, duration=0.3, tween=pg.easeInOutQuad)
pg.moveRel(-5, 0, duration=0.1, tween=pg.easeInOutQuad) # Micro-adjustment
pg.moveRel(5, 0, duration=0.1, tween=pg.easeInOutQuad)
pg.mouseUp()
# Visualization
debug_img = raw_frame.copy()
cv2.rectangle(debug_img, (max_location[0] + piece_width, 0), (max_location[0], h), (0, 0, 255), 1)
cv2.imshow("Processed Regions", debug_img)
cv2.waitKey(0)
return actual_drag_distance
if __name__ == "__main__":
execute_captcha_solve()
Auxiliary Utilities
Coordinate Mapping Helper Captures active cursor positions after a brief initialization delay.
import pyautogui as pg
import time
time.sleep(5)
x, y = pg.position()
print(f"Target coordinates: x={x}, y={y}")
Drag Simulation Routine Standardized motion control with deceleration easing and release handling.
import pyautogui as pg
import time
TARGET_DISTANCE = 192
time.sleep(3)
pg.moveTo(821, 633, duration=0.3)
time.sleep(0.1)
pg.mouseDown()
pg.moveRel(TARGET_DISTANCE, 0, duration=0.3, tween=pg.easeInOutQuad)
pg.moveRel(-5, 0, duration=0.1, tween=pg.easeInOutQuad)
pg.mouseUp()
Interactive Region Selector Records two click points triggered by a keyboard shortcut, computes bounding dimensions, and exports the ROI.
import pyautogui
import cv2
from pynput.keyboard import Key, Listener as KListener, Controller as KController
from pynput.mouse import Button, Listener as MListener, Controller as MController
TRIGGER_KEY = Key.ctrl_l
recorded_coords = []
def handle_mouse_click(x, y, button, pressed):
if pressed:
recorded_coords.extend([x, y])
else:
return False
def handle_key_release(key):
if key == TRIGGER_KEY:
return False
if __name__ == "__main__":
with KListener(on_release=handle_key_release) as k_listener:
k_listener.join()
print("Awaiting two click points...")
with MListener(on_click=handle_mouse_click) as m_listener:
m_listener.join()
x1, y1, x2, y2 = recorded_coords
if abs(x1 - x2) < 2 or abs(y1 - y2) < 2:
raise ValueError("Invalid selection region.")
start_x = min(x1, x2)
start_y = min(y1, y2)
cap_w = abs(x1 - x2)
cap_h = abs(y1 - y2)
snapshot = pyautogui.screenshot(region=[start_x, start_y, cap_w, cap_h])
snapshot.save("selected_area.jpg")
Optimized Implementation (Advanced)
This version introduces multi-threshold evaluation loops and a statistical fallback mechanism to maintain robustness across varying image quality levels. Processing latency typically falls below 25ms.
import cv2
import time
from collections import Counter
START_TIME = time.time()
raw_frame = cv2.imread("capture_region.png", cv2.IMREAD_GRAYSCALE)
h, w = raw_frame.shape[:2]
# Configuration presets: (lower_threshold, upper_threshold)
THRESHOLD_PRESETS = [
(200, 400), (150, 250), (100, 200), # Standard
(10, 150), (80, 150), (10, 80), # Low contrast
(10, 30) # Ultra-low contrast
]
piece_width = 50
piece_roi = raw_frame[0:h, 0:piece_width]
bg_roi = raw_frame[0:h, piece_width:w]
blurred_piece = cv2.GaussianBlur(piece_roi, (5, 5), 1)
blurred_bg = cv2.GaussianBlur(bg_roi, (5, 5), 1)
confidence_scores = []
candidate_offsets = []
best_match_coord = 0
for lower, upper in THRESHOLD_PRESETS:
piece_edge = cv2.Canny(blurred_piece, lower, upper)
bg_edge = cv2.Canny(blurred_bg, lower, upper)
match_data = cv2.matchTemplate(bg_edge, piece_edge, cv2.TM_CCOEFF_NORMED)
_, conf, _, loc = cv2.minMaxLoc(match_data)
confidence_scores.append(conf)
current_offset = loc[0] + piece_width
candidate_offsets.append(current_offset)
best_match_coord = current_offset if conf > 0 else best_match_coord
# Evaluate primary result
primary_conf = max(confidence_scores)
primary_idx = confidence_scores.index(primary_conf)
final_offset = candidate_offsets[primary_idx]
# Fallback aggregation when correlation drops below threshold
if primary_conf < 0.15:
coord_freq = Counter(candidate_offsets)
final_offset = coord_freq.most_common(1)[0][0]
elapsed_ms = round((time.time() - START_TIME) * 1000, 2)
print(f"Peak Confidence: {primary_conf:.4f} | Final Offset: {final_offset} | Latency: {elapsed_ms}ms")
# Debug visualization
draw_canvas = raw_frame.copy()
cv2.rectangle(draw_canvas, (int(final_offset - piece_width), 0), (int(final_offset), h), (0, 0, 255), 2)
cv2.imshow("Primary Frame", raw_frame)
cv2.imshow("Debug Overlay", draw_canvas)
cv2.waitKey(0)
Parameter tuning should prioritize gradient sensitivity over pixel intensity. Edge thresholds may require adjustment based on background complexity and lighting conditions within the target application window.