Blocking and Non-Blocking I/O
1. Fundamental Concepts
Scenarios Causing Blockage
Reading from device or network files (e.g., terminal /dev/tty) often leads to blocking, unlike reading regular files which completes in a finite duration.
- Reading from a terminal without a newline character in the input buffer causes the
readcall to suspend the calling process. - Reading from a network socket blocks if no data packets have arrived.
- The duration of blockage is indeterminate; the process remains suspended until data becomes available.
Writing to regular files typically returns promptly, whereas writing to terminals or network endpoints may encounter delays.
Blocking Definition
When a process invokes a blocking system call, the kernel places it into a sleep state. The CPU is then reassigned to execute other runnable processes. The blocked process only resumes execution when the awaited event occurs, such as the arrival of network data or the expiration of a sleep timer.
Process Execution States
Executing: The CPU operates within the process context. The program counter holds the current instruction address, and general registers store intermediate computational results while accessing the process address space.
Ready: The process requires no specific event and can execute immediately, but waits in a scheduling queue because the CPU is currently assigned to another task. The kernel scheduler allocates CPU time based on priority and time slices, dynamically adjusting these values to ensure fair execution opportunities and maintain responsive user interaction.
Important Note
Blocking and non-blocking are properties of the file descriptor or the underlying file, not of the read/write functions themselves. Terminal devices default to blocking mode, causing read operations on them to suspend the caller.
2. Blocking Read Operation
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
int main(void) {
char input_buf[512];
ssize_t bytes_read = read(STDIN_FILENO, input_buf, sizeof(input_buf));
if (bytes_read < 0) {
perror("Failed to read from stdin");
return EXIT_FAILURE;
}
ssize_t bytes_written = write(STDOUT_FILENO, input_buf, bytes_read);
return 0;
}This program reads from standard input and echoes the data. Because the terminal defaults to blocking, the read call waits indefinitely until a newline is entered.
3. Non-Blocking Read Operation
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
int main(void) {
int current_flags = fcntl(STDIN_FILENO, F_GETFL);
fcntl(STDIN_FILENO, F_SETFL, current_flags | O_NONBLOCK);
sleep(1);
char small_buf[10] = {0};
read(STDIN_FILENO, small_buf, sizeof(small_buf));
printf("%s\n", small_buf);
return 0;
}By setting the O_NONBLOCK flag on standard input, read returns immediately if no data is available, rather than suspending the process.
Persistent Non-Blocking Read with Error Handling
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
int tty_fd = open("/dev/tty", O_RDONLY | O_NONBLOCK);
if (tty_fd < 0) {
perror("open /dev/tty");
exit(EXIT_FAILURE);
}
ssize_t recv_len;
char data_buf[1024];
while (1) {
recv_len = read(tty_fd, data_buf, sizeof(data_buf));
if (recv_len < 0) {
if (errno != EAGAIN) {
perror("read error");
exit(EXIT_FAILURE);
} else {
const char *hint = "Waiting for input...\n";
write(STDOUT_FILENO, hint, strlen(hint));
sleep(2);
}
} else {
write(STDOUT_FILENO, data_buf, recv_len);
break;
}
}
close(tty_fd);
return 0;
}When a non-blocking read encounters no data, it returns -1 and sets errno to EAGAIN (or EWOULDBLOCK). This distinguishes a genuine I/O failure from a temporary unavailability of data.
4. Encapsulating Mode Switching Functions
// io_mode.h
#ifndef IO_MODE_H
#define IO_MODE_H
int set_nonblocking(int file_desc);
int set_blocking(int file_desc);
#endif// io_mode.c
#include <fcntl.h>
#include "io_mode.h"
int set_nonblocking(int file_desc) {
int flags = fcntl(file_desc, F_GETFL);
if (flags < 0) return -1;
return fcntl(file_desc, F_SETFL, flags | O_NONBLOCK);
}
int set_blocking(int file_desc) {
int flags = fcntl(file_desc, F_GETFL);
if (flags < 0) return -1;
return fcntl(file_desc, F_SETFL, flags & ~O_NONBLOCK);
}These utility functions abstract the fcntl operations, making it straightforward to toggle the blocking state of any file descriptor.
5. Non-Blocking Read with Timeout Mechanism
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#define PROMPT_MSG "Please enter data: "
#define TIMEOUT_MSG "Operation timed out."
int main(void) {
int tty_fd = open("/dev/tty", O_RDONLY | O_NONBLOCK);
if (tty_fd < 0) {
perror("open /dev/tty");
exit(EXIT_FAILURE);
}
int attempt_count;
ssize_t recv_len;
char data_buf[1024];
for (attempt_count = 0; attempt_count < 5; attempt_count++) {
recv_len = read(tty_fd, data_buf, sizeof(data_buf));
if (recv_len > 0) break;
if (errno != EAGAIN) {
perror("read failure");
exit(EXIT_FAILURE);
}
write(STDOUT_FILENO, PROMPT_MSG, strlen(PROMPT_MSG));
sleep(2);
}
if (attempt_count == 5) {
write(STDOUT_FILENO, TIMEOUT_MSG, strlen(TIMEOUT_MSG));
} else {
write(STDOUT_FILENO, data_buf, recv_len);
}
close(tty_fd);
return 0;
}This approach uses an active polling loop to repeatedly check for input. Polling is inherently inefficient because it consumes CPU cycles during idle periods. Event-driven mechanisms like select, poll, and epoll offer superior efficiency by notifying the application only when data becomes available.
I/O Multiplexing Overview
1. select
Headers required:
#include <sys/select.h>#include <sys/time.h>#include <sys/types.h>#include <unistd.h>
Function signature:
int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout);Parameters:
nfds: The highest file descriptor number plus one.readfds/writefds/exceptfds: Pointers to descriptor sets monitoring read, write, and exception conditions.timeout: Maximum wait duration.- Returns the number of ready descriptors, 0 on timeout, or -1 on error.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
int main(void) {
int val = 10;
struct timeval wait_time;
wait_time.tv_sec = 10;
wait_time.tv_usec = 0;
fd_set read_set;
FD_ZERO(&read_set);
FD_SET(STDIN_FILENO, &read_set);
int result = select(STDIN_FILENO + 1, &read_set, NULL, NULL, &wait_time);
if (result < 0) {
perror("select failed");
exit(EXIT_FAILURE);
} else if (result > 0) {
scanf("%d", &val);
} else {
fprintf(stderr, "select timed out\n");
}
printf("val = %d remaining_sec = %ld\n", val, wait_time.tv_sec);
return 0;
}2. poll
Header required:
#include <poll.h>
Function signature:
int poll(struct pollfd *fds, nfds_t nfds, int timeout);The pollfd structure:
struct pollfd {
int fd; /* File descriptor */
short events; /* Requested events */
short revents; /* Returned events */
};Parameters:
fds: Array ofpollfdstructures.nfds: Number of elements in the array.timeout: Wait time in milliseconds. 0 returns immediately, negative values wait indefinitely.- Returns the count of descriptors with non-zero
revents, 0 on timeout, or -1 on error.
#include <poll.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define WAIT_SECONDS 5
int main(void) {
int val = 10;
struct pollfd monitor_fds[1];
monitor_fds[0].fd = STDIN_FILENO;
monitor_fds[0].events = POLLIN;
int result = poll(monitor_fds, 1, WAIT_SECONDS * 1000);
if (result < 0) {
perror("poll error");
exit(EXIT_FAILURE);
}
if (result == 0) {
printf("poll timed out\n");
} else if (monitor_fds[0].revents & POLLIN) {
scanf("%d", &val);
}
printf("val = %d\n", val);
return 0;
}Key Differences Between select and poll
selectmodifies the passedtimevalstructure to reflect remaining time, whilepollleaves the timeout parameter unchanged.selectrequires the maximum descriptor number plus one and scans all descriptors from 0 up to that limit, resulting in linear scanning overhead.pollonly monitors the specific descriptors provided in the array, avoiding unnecessary iteration.
3. epoll
epoll provides an advanced, highly scalable I/O multiplexing mechanism optimized for large numbers of concurrent connections, surpassing the performance limitations of both select and poll.