Capturing Network Traffic with Raw Sockets in Go
The most common library for capturing network traffic in Go is github.com/google/gopacket, which requires libpcap and therefore must be compiled with CGO enabled. To reduce environmental dependencies, we can use raw sockets to capture network traffic and then leverage gopacket's protocol parsing capabilities. This approach eliminates the need to implement protocol parsing ourselves while ensuring correctness, and it allows us to disable CGO compilation.
The Cilium project provides a test case for opening raw sockets:
// Both openRawSock and htons are available in
// https://github.com/cilium/ebpf/blob/master/example_sock_elf_test.go.
// MIT license.
func OpenRawSocket(index int) (int, error) {
sock, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, int(htons(syscall.ETH_P_ALL)))
if err != nil {
return 0, err
}
sll := syscall.SockaddrLinklayer{Ifindex: index, Protocol: htons(syscall.ETH_P_ALL)}
if err := syscall.Bind(sock, &sll); err != nil {
syscall.Close(sock)
return 0, err
}
return sock, nil
}
// htons converts the unsigned short integer hostshort from host byte order to network byte order.
func htons(i uint16) uint16 {
b := make([]byte, 2)
binary.BigEndian.PutUint16(b, i)
return *(*uint16)(unsafe.Pointer(&b[0]))
}
However, this example has a limitation: it can only capture traffic originating from the local host.
Capturing Bridged Non-Local Traffic
Using tcpdump, it's possible to capture forwarded traffic passing through network bridges. By analyzing tcpdump with strace, we can observe the system calls it makes:
root@localhost:~# strace -f tcpdump -i b_2_0 arp -nne
...
socket(AF_PACKET, SOCK_RAW, htons(0 /* ETH_P_??? */)) = 4
ioctl(4, SIOCGIFINDEX, {ifr_name="lo", ifr_ifindex=1}) = 0
ioctl(4, SIOCGIFHWADDR, {ifr_name="b_2_0", ifr_hwaddr={sa_family=ARPHRD_ETHER, sa_data=4e:59:d6:32:f6:42}}) = 0
newfstatat(AT_FDCWD, "/sys/class/net/b_2_0/wireless", 0x7ffdf063bc50, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/sys/class/net/b_2_0/dsa/tagging", O_RDONLY) = -1 ENOENT (No such file or directory)
ioctl(4, SIOCGIFINDEX, {ifr_name="b_2_0", ifr_ifindex=6053}) = 0
bind(4, {sa_family=AF_PACKET, sll_protocol=htons(0 /* ETH_P_??? */), sll_ifindex=if_nametoindex("b_2_0"), sll_hatype=ARPHRD_NETROM, sll_pkttype=PACKET_HOST, sll_halen=0}, 20) = 0
getsockopt(4, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
setsockopt(4, SOL_PACKET, PACKET_ADD_MEMBERSHIP, {mr_ifindex=if_nametoindex("b_2_0"), mr_type=PACKET_MR_PROMISC, mr_alen=0, mr_address=4e:59:d6:32:f6:42}, 16) = 0
getsockopt(4, SOL_SOCKET, SO_BPF_EXTENSIONS, [64], [4]) = 0
mmap(NULL, 266240, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fec47cbe000
We notice a setsockopt(PACKET_MR_PROMISC) call, which appears to enable promiscuous mode at the socket level. Research reveals this is a socket-level promiscuous mode setting. Looking at the Suricata project's implementation, we find similar code:
memset(&sock_params, 0, sizeof(sock_params));
sock_params.mr_type = PACKET_MR_PROMISC;
sock_params.mr_ifindex = bind_address.sll_ifindex;
r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
if (r < 0) {
SCLogError("%s: failed to set promisc mode: %s", devname, strerror(errno));
goto socket_err;
}
The Go implementation for setting socket-level promiscuous mode is as follows:
// Set socket level PROMISC mode
err = unix.SetsockoptPacketMreq(sock, syscall.SOL_PACKET, syscall.PACKET_ADD_MEMBERSHIP, &unix.PacketMreq{Type: unix.PACKET_MR_PROMISC, Ifindex: int32(index)})
if err != nil {
syscall.Close(sock)
return 0, err
}
Capturing VLAN Traffic
Currently, we can only capture standard Ethernet traffic. To also capture VLAN IDs, we need to set the PACKET_AUXDATA option. According to the packet man page:
PACKET_AUXDATA (since Linux 2.6.21)
If this binary option is enabled, the packet socket passes
a metadata structure along with each packet in the
recvmsg(2) control field. The structure can be read with
cmsg(3). It is defined as
struct tpacket_auxdata {
__u32 tp_status;
__u32 tp_len; /* packet length */
__u32 tp_snaplen; /* captured length */
__u16 tp_mac;
__u16 tp_net;
__u16 tp_vlan_tci;
__u16 tp_vlan_tpid; /* Since Linux 3.14; earlier, these
were unused padding bytes */
};
The Go implementation is:
// Enable PACKET_AUXDATA option for VLAN
if err := syscall.SetsockoptInt(sock, syscall.SOL_PACKET, unix.PACKET_AUXDATA, 1); err != nil {
syscall.Close(sock)
return 0, err
}
Complete OpenRawSocket Implementation
The complete implemantation combining all these features is:
func OpenRawSocket(index int) (int, error) {
sock, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, int(htons(syscall.ETH_P_ALL)))
if err != nil {
return 0, err
}
// Enable PACKET_AUXDATA option for VLAN
if err := syscall.SetsockoptInt(sock, syscall.SOL_PACKET, unix.PACKET_AUXDATA, 1); err != nil {
syscall.Close(sock)
return 0, err
}
// Set socket level PROMISC mode
err = unix.SetsockoptPacketMreq(sock, syscall.SOL_PACKET, syscall.PACKET_ADD_MEMBERSHIP, &unix.PacketMreq{Type: unix.PACKET_MR_PROMISC, Ifindex: int32(index)})
if err != nil {
syscall.Close(sock)
return 0, err
}
sll := syscall.SockaddrLinklayer{Ifindex: index, Protocol: htons(syscall.ETH_P_ALL)}
if err := syscall.Bind(sock, &sll); err != nil {
syscall.Close(sock)
return 0, err
}
return sock, nil
}
Reading Data from File Descriptor
We can use select(2) to monitor the file descriptor and recvmsg(2) to read data, including VLAN tags. The implementation is as follows:
package pcap
import (
"context"
"syscall"
)
func FD_SET(fd int, p *syscall.FdSet) { p.Bits[fd/64] |= 1 << (uint(fd) % 64) }
func FD_CLR(fd int, p *syscall.FdSet) { p.Bits[fd/64] &^= 1 << (uint(fd) % 64) }
func FD_ISSET(fd int, p *syscall.FdSet) bool { return p.Bits[fd/64]&(1<<(uint(fd)%64)) != 0 }
func FD_ZERO(p *syscall.FdSet) {
for i := range p.Bits {
p.Bits[i] = 0
}
}
type RecvmsgHandler func(buf []byte, n int, oob []byte, oobn int, err error) error
func RecvmsgLoop(ctx context.Context, sockfd int, fn RecvmsgHandler) error {
buf := make([]byte, 1024*64)
oob := make([]byte, syscall.CmsgSpace(1024))
readfds := syscall.FdSet{}
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
FD_ZERO(&readfds)
FD_SET(sockfd, &readfds)
tv := syscall.Timeval{Sec: 0, Usec: 100000} // 100ms
nfds, err := syscall.Select(sockfd+1, &readfds, nil, nil, &tv)
if err != nil {
continue
}
if nfds > 0 && FD_ISSET(sockfd, &readfds) {
n, oobn, _, _, err := syscall.Recvmsg(sockfd, buf, oob, 0)
err = fn(buf, n, oob, oobn, err)
if err != nil {
return err
}
}
}
}
The VLAN data parsing logic is as follows:
func decodeVlanIdByAuxData(oob []byte) (uint16, error) {
msgs, err := syscall.ParseSocketControlMessage(oob)
if err != nil {
return 0, err
}
for _, m := range msgs {
if m.Header.Level == syscall.SOL_PACKET && m.Header.Type == 8 && len(m.Data) >= 20 {
auxdata := unix.TpacketAuxdata{
Status: binary.LittleEndian.Uint32(m.Data[0:4]),
Vlan_tci: binary.LittleEndian.Uint16(m.Data[16:18]),
}
if auxdata.Status&unix.TP_STATUS_VLAN_VALID != 0 {
return auxdata.Vlan_tci, nil
}
}
}
return 0, nil
}
Recent Updates
Recently, it was discovered that gopacket actually implements raw socket packet capture in pcapgo/capture.go. However, pcapgo uses blocking calls, which means each loop adds an operating system thread. Therefore, when monitoring multiple network interfaces, directly workinng with file descriptors provides greater flexibility.
Using syscall.Syscall(unix.SYS_RECVMSG) instead of unix.Recvmsg to avoid sockaddr memory allocation is also a valuable optimization.
Conclusion
The code presented here has been tested in production environments with minor modifications to details and the use of epoll(2) for monitoring. Combined with sync.Pool and streamlined parsing logic, the performance meets our requirements.