This guide demonstrates how to wrap the YOLOv5 model in a lightweight Flask service that lets users upload an image or a short video, view the detections in the browser, and download the annotated result.
What the service provides
- Drag-and-drop or click-to-upload for images and MP4 videos.
- Real-time preview of the original and processed media.
- Click-to-zoom modal for closer inspection.
- One-click download of the processed file.
Project layout
project/
├── app.py
├── yolov5/ # cloned from Ultralytics repo
├── yolov5s.pt # pre-trained weights
├── static/ # auto-generated results
└── templates/
└── index.html
Back-end implementation (app.py)
import os, datetime, cv2, base64
import numpy as np
import torch
from flask import Flask, request, jsonify, render_template
app = Flask(__name__)
net = None # global model handle
def init_model():
global net
# load from local yolov5 repo
net = torch.hub.load('yolov5', 'custom',
path='yolov5s.pt',
source='local')
def annotate(img_bgr):
"""Return annotated BGR ndarray."""
res = net(img_bgr[..., ::-1]) # RGB expected by YOLOv5
return res.render()[0][..., ::-1] # back to BGR
@app.route('/')
def home():
return render_template('index.html')
@app.route('/infer_image', methods=['POST'])
def infer_image():
file = request.files['image']
buf = np.frombuffer(file.read(), np.uint8)
img = cv2.imdecode(buf, cv2.IMREAD_COLOR)
vis = annotate(img)
_, enc = cv2.imencode('.png', vis)
b64 = base64.b64encode(enc.tobytes()).decode()
ts = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
name, ext = os.path.splitext(file.filename)
out_name = f'{name}_{ts}{ext}'
cv2.imwrite(os.path.join('static', out_name), vis)
return jsonify({'img': b64})
@app.route('/infer_video', methods=['POST'])
def infer_video():
up_file = request.files['video']
tmp_path = 'tmp_upload.mp4'
up_file.save(tmp_path)
cap = cv2.VideoCapture(tmp_path)
fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
ts = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
out_name = f'out_{ts}.mp4'
out_path = os.path.join('static', out_name)
fourcc = cv2.VideoWriter_fourcc(*'avc1')
vw = cv2.VideoWriter(out_path, fourcc, fps, (w, h))
while True:
ok, frame = cap.read()
if not ok:
break
vw.write(annotate(frame))
cap.release()
vw.release()
os.remove(tmp_path)
return jsonify({'video': out_name})
init_model()
if __name__ == '__main__':
app.run(debug=True)
Front-end (templates/index.html)
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>YOLOv5 Web Demo</title>
<style>
body{font-family:Arial;background:#f5f5f5;text-align:center;margin:0;padding:20px}
.pane{display:inline-block;vertical-align:top;margin:10px}
.pane img,.pane video{max-width:100%;border:1px solid #ccc}
#modal{display:none;position:fixed;z-index:9;left:0;top:0;width:100%;height:100%;
background:rgba(0,0,0,.8);cursor:pointer}
#modal img,#modal video{max-height:90vh;margin:auto;display:block}
</style>
</head>
<body>
<h1>YOLOv5 Object Detection</h1>
<div class="pane">
<h3>Image</h3>
<input type="file" id="imgFile" accept="image/*">
<button onclick="sendImg()">Detect</button>
<button onclick="dlImg()" style="display:none" id="dlImgBtn">Download</button>
<br>
<img id="origImg" style="max-width:300px;display:none">
<img id="procImg" style="max-width:300px;display:none">
</div>
<div class="pane">
<h3>Video</h3>
<input type="file" id="vidFile" accept="video/*">
<button onclick="sendVid()">Detect</button>
<button onclick="dlVid()" style="display:none" id="dlVidBtn">Download</button>
<br>
<video id="origVid" controls style="max-width:300px;display:none"></video>
<video id="procVid" controls style="max-width:300px;display:none"></video>
</div>
<div id="modal" onclick="this.style.display='none'"></div>
<script>
function $(sel){return document.querySelector(sel)}
function showModal(el){
const m=$('#modal');
m.innerHTML='';
m.appendChild(el.cloneNode(true));
m.style.display='block';
}
function sendImg(){
const f=$('#imgFile').files[0];
if(!f) return;
const fd=new FormData(); fd.append('image',f);
fetch('/infer_image',{method:'POST',body:fd})
.then(r=>r.json())
.then(j=>{
$('#origImg').src=URL.createObjectURL(f);
$('#origImg').style.display='inline';
$('#origImg').onclick=()=>showModal($('#origImg'));
$('#procImg').src='data:image/png;base64,'+j.img;
$('#procImg').style.display='inline';
$('#procImg').onclick=()=>showModal($('#procImg'));
$('#dlImgBtn').style.display='inline';
});
}
function sendVid(){
const f=$('#vidFile').files[0];
if(!f) return;
const fd=new FormData(); fd.append('video',f);
fetch('/infer_video',{method:'POST',body:fd})
.then(r=>r.json())
.then(j=>{
$('#origVid').src=URL.createObjectURL(f);
$('#origVid').style.display='inline';
$('#procVid').src='/static/'+j.video;
$('#procVid').style.display='inline';
$('#dlVidBtn').style.display='inline';
});
}
function dl(el,name){
const a=document.createElement('a');
a.href=el.src; a.download=name; a.click();
}
function dlImg(){ dl($('#procImg'),'detected.png'); }
function dlVid(){ dl($('#procVid'),'detected.mp4'); }
</script>
</body>
</html>
Runing the service
- Clone the official YOLOv5 repository into the project folder and place
yolov5s.ptnext to it. - Install requirements:
pip install flask torch torchvision opencv-python. - Launch:
python app.py. - Open
http://localhost:5000in any modern browser.
The static/ directory will automatically store every processed file with a timestamp, so users can revisit or share links directly.