Epoll Implementation
The following C program demonstrates a high-performance TCP server using the epoll mechanism on Linux. It utilizes non-blocking I/O and edge-triggered notifications to handle multiple concurrent connections efficiently.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/epoll.h>
#define LISTEN_PORT 9000
#define MAX_EVENTS 32
#define BUF_LEN 2048
int configure_nonblocking(int sock_fd) {
int flags = fcntl(sock_fd, F_GETFL, 0);
if (flags == -1) return -1;
return fcntl(sock_fd, F_SETFL, flags | O_NONBLOCK);
}
int main() {
int listener_sock, client_sock, epoll_instance;
struct sockaddr_in host_addr;
struct epoll_event event, ready_events[MAX_EVENTS];
char data_buf[BUF_LEN];
listener_sock = socket(AF_INET, SOCK_STREAM, 0);
if (listener_sock < 0) {
perror("Failed to create socket");
exit(EXIT_FAILURE);
}
int reuse = 1;
setsockopt(listener_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_addr.s_addr = INADDR_ANY;
host_addr.sin_port = htons(LISTEN_PORT);
if (bind(listener_sock, (struct sockaddr*)&host_addr, sizeof(host_addr)) < 0) {
perror("Bind failed");
close(listener_sock);
exit(EXIT_FAILURE);
}
if (listen(listener_sock, SOMAXCONN) < 0) {
perror("Listen failed");
close(listener_sock);
exit(EXIT_FAILURE);
}
configure_nonblocking(listener_sock);
epoll_instance = epoll_create1(0);
if (epoll_instance < 0) {
perror("epoll_create1 failed");
exit(EXIT_FAILURE);
}
event.events = EPOLLIN;
event.data.fd = listener_sock;
if (epoll_ctl(epoll_instance, EPOLL_CTL_ADD, listener_sock, &event) < 0) {
perror("epoll_ctl: listener");
exit(EXIT_FAILURE);
}
printf("Epoll server running on port %d\n", LISTEN_PORT);
while (1) {
int event_count = epoll_wait(epoll_instance, ready_events, MAX_EVENTS, -1);
if (event_count < 0) {
perror("epoll_wait error");
break;
}
for (int i = 0; i < event_count; i++) {
if (ready_events[i].data.fd == listener_sock) {
while (1) {
client_sock = accept(listener_sock, NULL, NULL);
if (client_sock < 0) {
if (errno != EAGAIN && errno != EWOULDBLOCK) {
perror("accept error");
}
break;
}
configure_nonblocking(client_sock);
event.events = EPOLLIN | EPOLLET;
event.data.fd = client_sock;
if (epoll_ctl(epoll_instance, EPOLL_CTL_ADD, client_sock, &event) < 0) {
perror("epoll_ctl: client");
close(client_sock);
}
printf("New connection established: fd %d\n", client_sock);
}
} else {
int current_fd = ready_events[i].data.fd;
while (1) {
ssize_t bytes_read = read(current_fd, data_buf, BUF_LEN);
if (bytes_read < 0) {
if (errno != EAGAIN && errno != EWOULDBLOCK) {
perror("read error");
close(current_fd);
epoll_ctl(epoll_instance, EPOLL_CTL_DEL, current_fd, NULL);
}
break;
} else if (bytes_read == 0) {
printf("Client fd %d closed connection\n", current_fd);
close(current_fd);
epoll_ctl(epoll_instance, EPOLL_CTL_DEL, current_fd, NULL);
break;
} else {
data_buf[bytes_read] = '\0';
printf("Received %zd bytes from fd %d\n", bytes_read, current_fd);
write(current_fd, data_buf, bytes_read);
}
}
}
}
}
close(listener_sock);
close(epoll_instance);
return 0;
}
Select Implementation
This example illustrates a TCP server built using the standard POSIX select system call. It monitors multiple file descriptors within a single thread to manage client connections.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/select.h>
#include <arpa/inet.h>
#define SERV_PORT 8080
#define MAX_CONN 1024
#define MSG_SIZE 1024
int main() {
int main_sock, new_sock, active_sock;
struct sockaddr_in serv_addr, cli_addr;
socklen_t cli_len;
char msg_buffer[MSG_SIZE];
int client_pool[MAX_CONN];
fd_set master_set, work_set;
int max_fd_index;
int n, i;
main_sock = socket(AF_INET, SOCK_STREAM, 0);
if (main_sock < 0) {
perror("socket creation failed");
exit(EXIT_FAILURE);
}
int opt_val = 1;
setsockopt(main_sock, SOL_SOCKET, SO_REUSEADDR, &opt_val, sizeof(opt_val));
memset(&serv_addr, 0, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(SERV_PORT);
serv_addr.sin_addr.s_addr = INADDR_ANY;
if (bind(main_sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
perror("bind failed");
exit(EXIT_FAILURE);
}
if (listen(main_sock, 10) < 0) {
perror("listen failed");
exit(EXIT_FAILURE);
}
for (i = 0; i < MAX_CONN; i++) client_pool[i] = -1;
FD_ZERO(&master_set);
FD_SET(main_sock, &master_set);
max_fd_index = main_sock;
printf("Select server listening on port %d\n", SERV_PORT);
while (1) {
work_set = master_set;
int activity = select(max_fd_index + 1, &work_set, NULL, NULL, NULL);
if (activity < 0) {
perror("select error");
continue;
}
if (FD_ISSET(main_sock, &work_set)) {
cli_len = sizeof(cli_addr);
new_sock = accept(main_sock, (struct sockaddr *)&cli_addr, &cli_len);
if (new_sock < 0) {
perror("accept failed");
continue;
}
printf("Connection from %s:%d\n", inet_ntoa(cli_addr.sin_addr), ntohs(cli_addr.sin_port));
for (i = 0; i < MAX_CONN; i++) {
if (client_pool[i] == -1) {
client_pool[i] = new_sock;
break;
}
}
if (i == MAX_CONN) {
fprintf(stderr, "Max clients reached\n");
close(new_sock);
continue;
}
FD_SET(new_sock, &master_set);
if (new_sock > max_fd_index) max_fd_index = new_sock;
if (--activity <= 0) continue;
}
for (i = 0; i < MAX_CONN; i++) {
active_sock = client_pool[i];
if (active_sock == -1) continue;
if (FD_ISSET(active_sock, &work_set)) {
memset(msg_buffer, 0, MSG_SIZE);
n = read(active_sock, msg_buffer, MSG_SIZE);
if (n <= 0) {
if (n == 0) printf("Client fd %d disconnected\n", active_sock);
else perror("read error");
close(active_sock);
FD_CLR(active_sock, &master_set);
client_pool[i] = -1;
} else {
printf("Received from fd %d: %s", active_sock, msg_buffer);
write(active_sock, msg_buffer, n);
}
if (--activity <= 0) break;
}
}
}
return 0;
}
The Role of SO_REUSEADDR
In the implementations above, the socket option SO_REUSEADDR is set via setsockopt. This is critical for development and rapid server restarts. When a TCP socket is closed, it typically enters a TIME_WAIT state to ensure any delayed packets in the network are processed. This state persists for a specific timeout duration (typically several minutes). If you attempt to restart the server immediately after stopping it, the bind() call will fail with "Address already in use" because the port is still occupied by the socket in TIME_WAIT. By enabling SO_REUSEADDR, the operating system allows the new socket to bind to the same port even if the previous instance is still waiting in the kernel, thereby facilitating immediate restarts.
Technical Comparison: Select vs. Epoll
| Feature | Select | Epoll |
|---|---|---|
| Platform Support | Universal POSIX standard; available on nearly all Unix-like systems. | Linux-specific (introduced in kernel 2.6); not portable to Windows or BSD natively. |
| Monitoring Mechanism | Polling; iterates through the entire file descriptor set on every call. | Event-driven; the kernel maintains a list and notifies the user space only upon state changes. |
| Connection Limit | Limited by FD_SETSIZE (commonly 1024); increasing it requires recompilation. |
Practically unlimited (bounded only by system RAM and file limits). |
| Performance Complexity | O(n); performance degrades linearly as the number of monitored connections increases. | O(1); performance depends on the number of active events, not the total connections. |
| Memory Overhead | Requires copying the entire fd set from user space to kernel space on every call. | Uses a shared memory area (mmap) or efficient copy mechanisms only when adding/modifying fds. |
| Trigger Modes | Supports Level Triggered (LT) only. | Supports both Level Triggered (LT) and Edge Triggered (ET) for higher efficiency. |
| API Complexity | Simple interface, but repetitive setup required for each loop iteration. | More complex setup (epoll_create, epoll_ctl, epoll_wait), but efficient for long-running processes. |
| Ideal Use Case | Low-concurrency scenarios, cross-platform applications, or simple tools. | High-performance servers handling massive numbers of concurrent connections (C10k problem). |