To control user access frequency and prevent web scraping, you can implement rate limiting on your API endpoints. Django Rest Framework (DRF) provides mechanisms for this, either through custom implementations or built-in classes.
Custom Rate Limiter
A common scenario is to limit users to a specific number of requests within a given time frame, for example, three requests per minute.
import time
from rest_framework.throttling import BaseThrottle
# Stores visit records {ip: [timestamp1, timestamp2, ...]}
VISIT_RECORD = {}
class VisitThrottle(BaseThrottle):
"""Limits a user to 3 requests per 60 seconds."""
def __init__(self):
self.history = None
def allow_request(self, request, view):
remote_addr = self.get_ident(request)
current_time = time.time()
if remote_addr not in VISIT_RECORD:
VISIT_RECORD[remote_addr] = [current_time]
return True
history = VISIT_RECORD.get(remote_addr)
self.history = history
# Remove timestamps older than 60 seconds
while history and history[-1] < current_time - 60:
history.pop()
# Allow request if fewer than 3 requests have been made in the last minute
if len(history) < 3:
history.insert(0, current_time)
return True
return False
def wait(self):
"""Calculates the time to wait until the next request is allowed."""
current_time = time.time()
if not self.history:
return 0
# Calculate remaining time until the oldest request expires
return 60 - (current_time - self.history[-1])
To use this custom throttle, add it to the throttle_classes attribute of your view:
from rest_framework.views import APIView
from django.http import JsonResponse
# Assuming models and md5 function are defined elsewhere
# from . import models
# from .utils import md5
class AuthView(APIView):
"""Handles user authentication."""
authentication_classes = []
permission_classes = []
throttle_classes = [VisitThrottle]
def post(self, request, *args, **kwargs):
response_data = {'code': 1000, 'msg': None}
try:
username = request.data.get('username')
password = request.data.get('password')
# user = models.UserInfo.objects.filter(username=username, password=password).first()
user = None # Placeholder for actual user retrieval
if not user:
response_data['code'] = 1001
response_data['msg'] = "Invalid username or password"
else:
# token = md5(username)
token = "dummy_token" # Placeholder for token generation
# models.UserToken.objects.update_or_create(user=user, defaults={'token': token})
response_data['token'] = token
except Exception as e:
print(e) # In production, use proper logging
response_data['code'] = 1002
response_data['msg'] = 'An unexpected error occurred'
return JsonResponse(response_data)
When the rate limits exceeded, DRF raises a Throttled exception.
DRF Source Code Analysis for Throttling
DRF's request dispatching process involves several steps, including checking throttle classes.
-
dispatchmethod: The entry point for handling requests.def dispatch(self, request, *args, **kwargs): # ... request initialization ... try: self.initial(request, *args, **kwargs) # ... handler lookup and execution ... except Exception as exc: response = self.handle_exception(exc) # ... response finalization ... return self.response -
initialmethod: This method orchestrates preliminary tasks like authentication, permission checks, and throttle checks.def initial(self, request, *args, **kwargs): # ... content negotiation, versioning ... self.perform_authentication(request) self.check_permissions(request) self.check_throttles(request) # Throttling check happens here -
check_throttlesmethod: Iterates through the throttles defined for the view and calls theirallow_requestmethod.def check_throttles(self, request): for throttle in self.get_throttles(): if not throttle.allow_request(request, self): self.throttled(request, throttle.wait()) -
throttledmethod: Ifallow_requestreturnsFalse, this method is called to raise aThrottledexception.def throttled(self, request, wait): raise exceptions.Throttled(wait) -
get_throttlesmethod: Instantiates the throttle classes specified inself.throttle_classes.def get_throttles(self): return [throttle() for throttle in self.throttle_classes]
This shows why defining throttle_classes and implementing allow_request and wait methods are crucial for custom throttling.
Global Rate Limiter Configuration
Instead of configuring throttle_classes on each view, you can set global defaults in your settings.py.
# settings.py
REST_FRAMEWORK = {
# ... other settings ...
"DEFAULT_THROTTLE_CLASSES": [
'api.utils.throttle.VisitThrottle', # Path to your global throttle class
],
"DEFAULT_THROTTLE_RATES": {
'visit': '3/m', # 'visit' is a custom scope name
}
}
And your VisitThrottle class would look like this:
# api/utils/throttle.py
from rest_framework.throttling import BaseThrottle
import time
VISIT_RECORD = {}
class VisitThrottle(BaseThrottle):
scope = 'visit' # Matches the scope in settings.py
def allow_request(self, request, view):
remote_addr = self.get_ident(request)
current_time = time.time()
if remote_addr not in VISIT_RECORD:
VISIT_RECORD[remote_addr] = [current_time]
return True
history = VISIT_RECORD.get(remote_addr)
# In a real app, consider using a more robust cache like Redis
while history and history[-1] < current_time - 60:
history.pop()
if len(history) < 3: # Using the rate from settings (3/m)
history.insert(0, current_time)
return True
return False
def wait(self):
current_time = time.time()
if not self.history:
return 0
return 60 - (current_time - self.history[-1])
With global configuration, you don't need to specify throttle_classes in individual views unless you want to override the global setting.
Built-in Rate Limiting Classes
DRF provides built-in throttle classes that simplify implementation.
1. BaseThrottle
This is the abstract base class for all throttle classes. You must implement allow_request and optionally wait. The get_ident method is provided to identify the request source (e.g., IP address).
from rest_framework.throttling import BaseThrottle
class BaseThrottle(object):
def allow_request(self, request, view):
raise NotImplementedError('.allow_request() must be overridden')
def get_ident(self, request):
# Logic to get client IP address
xff = request.META.get('HTTP_X_FORWARDED_FOR')
remote_addr = request.META.get('REMOTE_ADDR')
# ... (implementation details based on proxies) ...
return remote_addr
def wait(self):
return None
Our custom VisitThrottle example above inherited from BaseThrottle.
2. SimpleRateThrottle
This class simplifies rate limiting by using a cache and requiring only the implementation of get_cache_key and a scope attribute.
from rest_framework.throttling import BaseThrottle, SimpleRateThrottle
from rest_framework.settings import api_settings
from django.core.cache import default_cache
class SimpleRateThrottle(BaseThrottle):
cache = default_cache
timer = time.time
cache_format = 'throttle_%(scope)s_%(ident)s'
scope = None # Must be defined
THROTTLE_RATES = api_settings.DEFAULT_THROTTLE_RATES
def __init__(self):
if not getattr(self, 'rate', None):
self.rate = self.get_rate()
self.num_requests, self.duration = self.parse_rate(self.rate)
def get_cache_key(self, request, view):
# Must be overridden to provide a unique cache key (e.g., IP address or user ID)
raise NotImplementedError('.get_cache_key() must be overridden')
def get_rate(self):
# Retrieves the rate string (e.g., '10/m') from settings based on the scope.
if not getattr(self, 'scope', None):
raise ImproperlyConfigured("You must set either `.scope` or `.rate`...")
try:
return self.THROTTLE_RATES[self.scope]
except KeyError:
raise ImproperlyConfigured(f"No default throttle rate set for '{self.scope}' scope")
def parse_rate(self, rate):
# Parses rate string like '10/m' into (10, 60)
if rate is None: return (None, None)
num, period = rate.split('/')
num_requests = int(num)
duration_map = {'s': 1, 'm': 60, 'h': 3600, 'd': 86400}
duration = duration_map[period[0]]
return (num_requests, duration)
def allow_request(self, request, view):
if self.rate is None: return True
self.key = self.get_cache_key(request, view)
if self.key is None: return True
self.history = self.cache.get(self.key, [])
self.now = self.timer()
# Remove old timestamps
while self.history and self.history[-1] <= self.now - self.duration:
self.history.pop()
if len(self.history) >= self.num_requests:
return self.throttle_failure()
return self.throttle_success()
def throttle_success(self):
self.history.insert(0, self.now)
self.cache.set(self.key, self.history, self.duration)
return True
def throttle_failure(self):
return False
def wait(self):
if self.history:
remaining_duration = self.duration - (self.now - self.history[-1])
else:
remaining_duration = self.duration
available_requests = self.num_requests - len(self.history) + 1
if available_requests <= 0: return None
return remaining_duration / float(available_requests)
To use SimpleRateThrottle, you need to:
- Define a
scopeattribute in your throttle class. - Implement
get_cache_keyto generate a unique key for throttling (e.g., IP address, username). - Configure
DEFAULT_THROTTLE_RATESinsettings.pyusing the defined scope.
Example using SimpleRateThrottle:
# api/utils/throttle.py
from rest_framework.throttling import SimpleRateThrottle
class VisitThrottle(SimpleRateThrottle):
"""Limits anonymous users by IP to 3 requests per minute."""
scope = 'anon_user'
def get_cache_key(self, request, view):
return self.get_ident(request) # Uses IP address
class UserThrottle(SimpleRateThrottle):
"""Limits logged-in users by username to 10 requests per minute."""
scope = 'authenticated_user'
def get_cache_key(self, request, view):
if request.user and request.user.is_authenticated:
return request.user.username
return None # Do not throttle if not authenticated
And in settings.py:
# settings.py
REST_FRAMEWORK = {
"DEFAULT_THROTTLE_CLASSES": [
'api.utils.throttle.UserThrottle', # Global default for logged-in users
],
"DEFAULT_THROTTLE_RATES": {
'anon_user': '3/m', # Rate for anonymous users (scope matches VisitThrottle)
'authenticated_user': '10/m', # Rate for logged-in users (scope matches UserThrottle)
}
}
In your views.py, you can override the global settings or use specific throttles:
# views.py
from rest_framework.views import APIView
class AuthView(APIView):
# By default, UserThrottle (10/m) applies globally.
# For this view that doesn't require login, we override to use VisitThrottle (3/m).
throttle_classes = [VisitThrottle]
# ... rest of the view ...
Summary of Usage
- Custom Throttle: Inherit from
BaseThrottleand implementallow_requestandwait. - Simplified Throttle: Inherit from
SimpleRateThrottle, implementget_cache_key, definescope, and configureDEFAULT_THROTTLE_RATESinsettings.py.
Configuration Options:
- Global Configuration (settings.py):
REST_FRAMEWORK = { "DEFAULT_THROTTLE_CLASSES": ['path.to.YourThrottle'], "DEFAULT_THROTTLE_RATES": { 'scope_name': 'rate/period', # e.g., '5/m' for 5 requests per minute } } - Local Configuration (views.py):
class YourView(APIView): throttle_classes = [SpecificThrottleClass]