Python Web Scraping Fundamentals: Request Handling and Network Operations

GET Requests with Dictionary Parameters

When making GET requests with query parameters, we can construct URLs dynamically using dictionaries:


import urllib.request
import urllib.parse
import string

def get_params():
    base_url = "http://www.baidu.com/s"
    
    params = {
        "query": "中文",
        "user": "zhang",
        "action": "search"
    }
    
    encoded_params = urllib.parse.urlencode(params)
    full_url = f"{base_url}?{encoded_params}"
    
    # Encode special characters in URL
    safe_url = urllib.parse.quote(full_url, safe=string.printable)
    
    response = urllib.request.urlopen(safe_url)
    content = response.read().decode("utf-8")
    print(content)

get_params()

URL Ecnoding Techniques

Handling special characters in URLs:

  • For Chinese characters or special symbols: urllib.parse.quote(url, safe=string.printable)
  • For dictionary parameter encoding: urllib.parse.urlencode(params)

POST Request Implemantation

Basic structure for sending POST requests:


import urllib.request
import urllib.parse

def send_post():
    url = "http://example.com/submit"
    
    data = urllib.parse.urlencode({"key": "value"}).encode()
    response = urllib.request.urlopen(url, data=data)
    print(response.read().decode())

send_post()

Request Header Management

Setting and modifying request headers:


import urllib.request

def custom_headers():
    url = "http://www.baidu.com"
    
    request = urllib.request.Request(url)
    request.add_header("User-Agent", 
                      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36")
    
    response = urllib.request.urlopen(request)
    print(response.headers)
    print(request.get_header("User-Agent"))

custom_headers()

Random User-Agent Rotation

Implementing user-agent rotation too avoid detection:


import urllib.request
import random

def random_user_agent():
    agents = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50"
    ]
    
    url = "http://www.baidu.com"
    request = urllib.request.Request(url)
    request.add_header("User-Agent", random.choice(agents))
    
    response = urllib.request.urlopen(request)
    print(request.get_header("User-Agent"))

random_user_agent()

Proxy IP Implementation

Using proxy servers for request anonymization:


import urllib.request

def use_proxy():
    proxy_handler = urllib.request.ProxyHandler({"http": "104.131.109.66:8080"})
    opener = urllib.request.build_opener(proxy_handler)
    
    try:
        response = opener.open("http://www.baidu.com", timeout=5)
        print("Success with proxy")
    except Exception as e:
        print(f"Proxy error: {e}")

use_proxy()

Authentication Handling

Accessing resources requiring basic authentication:


import urllib.request

def auth_access():
    password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
    password_mgr.add_password(None, "http://192.168.179.66", "admin", "admin123")
    
    auth_handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
    opener = urllib.request.build_opener(auth_handler)
    
    response = opener.open("http://192.168.179.66")
    print(f"Response code: {response.getcode()}")

auth_access()

Tags: python web scraping HTTP Requests urllib Proxy Handling

Posted on Sat, 23 May 2026 17:18:26 +0000 by tj71587