Python Web Scraping for Animated Images Collection

Web Scraping Animated Images with Python

Automatically collecting animated images from websites can be useful when manual downloading is cumbersome, especially when websites restrict right-click functionality. This guide demonstrates how to create a Python script to extract GIFs from online sources.

We'll be scraping images from "FunnyGIFs", a humor-focused website featuring animated content.

Approach

  1. Fetch the HTML content of the target webpage
  2. Parse the HTML to locate URLs of animated images
  3. Download these images to local storage
  4. Implement pagination to scrape multiple pages

Implementation

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib.request
import time
import uuid
import os
import sys
import re
from bs4 import BeautifulSoup

def fetch_page(url):
    """
    Retrieve HTML content from the specified URL
    """
    try:
        print(f"Fetching: {url}")
        response = urllib.request.urlopen(url)
        html = response.read()
        return html
    except Exception as e:
        print(f"Error fetching page: {e}")
        return None

def extract_image_urls(html):
    """
    Extract image URLs and metadata from the parsed HTML
    """
    if not html:
        print('No content to process')
        return []
    
    image_list = []
    soup = BeautifulSoup(html, 'lxml')
    
    # Find all content items on the page
    content_items = soup.find("div", {"class": "main"}).find_all('div', {'class': 'item'})
    
    for item in content_items:
        # Filter out advertisement content
        if item.find('div', {"class": "text"}):
            image_data = {}
            # Extract image URL
            image_url = item.find('div', {"class": "text"}).find('img').get('src')
            image_data['url'] = image_url
            # Extract image title/name
            image_data['name'] = item.find('h3').text.strip()
            image_list.append(image_data)
    
    return image_list

def save_image(image_name, image_url, category, page_number):
    """
    Download and save an image to the local filesystem
    """
    # Create directory structure based on current date
    current_time = time.localtime(time.time())
    date_folder = f"{current_time.tm_year}-{current_time.tm_mon}-{current_time.tm_mday}"
    
    # Define full path for the image
    image_dir = f"GIF_Collection/{date_folder}/{category}/{page_number}"
    unique_id = uuid.uuid1()
    file_extension = image_url.split('.')[-1]
    file_path = f"{image_dir}/{image_name}_{unique_id}.{file_extension}"
    
    # Create directories if they don't exist
    os.makedirs(image_dir, exist_ok=True)
    
    print(f"Saving image to: {file_path}")
    urllib.request.urlretrieve(image_url, file_path)
    print(f"Source URL: {image_url}")
    return file_path

def exit_program():
    """
    Cleanly exit the program
    """
    print("Exiting scraper...")
    sys.exit(0)

def scrape_page(page_number):
    """
    Main function to scrape a specific page
    """
    target_url = f"http://www.funnygifs.com/animations/list_4_{page_number}.html"
    html_content = fetch_page(target_url)
    image_urls = extract_image_urls(html_content)
    
    for image_data in image_urls:
        save_image(
            image_data['name'], 
            image_data['url'], 
            'Funny_Animations', 
            page_number
        )

if __name__ == '__main__':
    print("""
            *****************************************
            **    GIF Web Scraper Program          **
            **      Python Implementation          **
            *****************************************""")
    
    # Get initial page input
    page_input = input("Enter page number to scrape (1-50), or 'quit' to exit: ")
    
    # Validate initial input
    while not page_input.isdigit() or not (1 <= int(page_input) <= 50):
        if page_input.lower() == 'quit':
            exit_program()
        print("Invalid input. Please enter a number between 1-50.")
        page_input = input("Enter page number to scrape: ")
    
    # Scrape the initial page
    scrape_page(page_input)
    
    # Ask for additional pages to scrape
    additional_pages = input("Enter number of additional pages to scrape (1-5000), or 'quit' to exit: ")
    
    # Validate additional pages input
    while not additional_pages.isdigit() or not (1 <= int(additional_pages) <= 5000):
        if additional_pages.lower() == 'quit':
            exit_program()
        print("Invalid input. Please enter a number between 1-5000.")
        additional_pages = input("Enter number of additional pages to scrape: ")
    
    # Scrape multiple pages
    for page_num in range(1, int(additional_pages) + 1):
        scrape_page(page_num)

Sample Output


            *****************************************
            **    GIF Web Scraper Program          **
            **      Python Implementation          **
            *****************************************
Enter page number to scrape (1-50), or 'quit' to exit: 1
1
Fetching: http://www.funnygifs.com/animations/list_4_1.html
Saving image to: GIF_Collection/2023-5-10/Funny_Animations/1/Tough_choices_5f0fe8f6-09f8-11e7-9161-f8bc12753d1e.gif
Source URL: http://www.funnygifs.com/uploads/allimg/170206/10-1F206135ZHJ.gif
Saving image to: GIF_Collection/2023-5-10/Funny_Animations/1/That_will_end_badly_3fa9da88-09f8-11e7-9161-f8bc12753d1e.gif
Source URL: http://www.funnygifs.com/uploads/allimg/170206/10-1F206135H35U.gif
Saving image to: GIF_Collection/2023-5-10/Funny_Animations/1/Definitely_Indian_4064e60c-09f8-11e7-9161-f8bc12753d1e.gif
Source URL: http://www.funnygifs.com/uploads/allimg/170206/10-1F20613543c50.gif
Saving image to: GIF_Collection/2023-5-10/Funny_Animations/1/Genuine_work_face_414b4f52-09f8-11e7-9161-f8bc12753d1e.gif
Source URL: http://www.funnygifs.com/uploads/allimg/170206/10-1F206135250553.gif
Saving image to: GIF_Collection/2023-5-10/Funny_Animations/1/What_is_she_shaking_421afa86-09f8-11e7-9161-f8bc12753d1e.gif
Source URL: http://www.funnygifs.com/uploads/allimg/170206/10-1F20613493N03.gif
Enter number of additional pages to scrape (1-5000), or 'quit' to exit: quit
Exiting scraper...

After execution, the animated images will be saved in organized directories on your local system.

Tags: web scraping python Image Collection Animated GIFs Web Automation

Posted on Thu, 07 May 2026 09:14:37 +0000 by onlinegamesnz