mirror of
https://git.zx2c4.com/wireguard-go
synced 2025-10-12 03:50:09 +08:00
tun: implement UDP GSO/GRO for Linux
Implement UDP GSO and GRO for the Linux tun.Device, which is made
possible by virtio extensions in the kernel's TUN driver starting in
v6.2.
secnetperf, a QUIC benchmark utility from microsoft/msquic@8e1eb1a, is
used to demonstrate the effect of this commit between two Linux
computers with i5-12400 CPUs. There is roughly ~13us of round trip
latency between them. secnetperf was invoked with the following command
line options:
-stats:1 -exec:maxtput -test:tput -download:10000 -timed:1 -encrypt:0
The first result is from commit 2e0774f
without UDP GSO/GRO on the TUN.
[conn][0x55739a144980] STATS: EcnCapable=0 RTT=3973 us
SendTotalPackets=55859 SendSuspectedLostPackets=61
SendSpuriousLostPackets=59 SendCongestionCount=27
SendEcnCongestionCount=0 RecvTotalPackets=2779122
RecvReorderedPackets=0 RecvDroppedPackets=0
RecvDuplicatePackets=0 RecvDecryptionFailures=0
Result: 3654977571 bytes @ 2922821 kbps (10003.972 ms).
The second result is with UDP GSO/GRO on the TUN.
[conn][0x56493dfd09a0] STATS: EcnCapable=0 RTT=1216 us
SendTotalPackets=165033 SendSuspectedLostPackets=64
SendSpuriousLostPackets=61 SendCongestionCount=53
SendEcnCongestionCount=0 RecvTotalPackets=11845268
RecvReorderedPackets=25267 RecvDroppedPackets=0
RecvDuplicatePackets=0 RecvDecryptionFailures=0
Result: 15574671184 bytes @ 12458214 kbps (10001.222 ms).
Signed-off-by: Jordan Whited <jordan@tailscale.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
This commit is contained in:

committed by
Jason A. Donenfeld

parent
1cf89f5339
commit
d0bc03c707
@@ -38,6 +38,7 @@ type NativeTun struct {
|
||||
statusListenersShutdown chan struct{}
|
||||
batchSize int
|
||||
vnetHdr bool
|
||||
udpGSO bool
|
||||
|
||||
closeOnce sync.Once
|
||||
|
||||
@@ -48,9 +49,10 @@ type NativeTun struct {
|
||||
readOpMu sync.Mutex // readOpMu guards readBuff
|
||||
readBuff [virtioNetHdrLen + 65535]byte // if vnetHdr every read() is prefixed by virtioNetHdr
|
||||
|
||||
writeOpMu sync.Mutex // writeOpMu guards toWrite, tcp4GROTable, tcp6GROTable
|
||||
toWrite []int
|
||||
tcp4GROTable, tcp6GROTable *tcpGROTable
|
||||
writeOpMu sync.Mutex // writeOpMu guards toWrite, tcpGROTable
|
||||
toWrite []int
|
||||
tcpGROTable *tcpGROTable
|
||||
udpGROTable *udpGROTable
|
||||
}
|
||||
|
||||
func (tun *NativeTun) File() *os.File {
|
||||
@@ -333,8 +335,8 @@ func (tun *NativeTun) nameSlow() (string, error) {
|
||||
func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
|
||||
tun.writeOpMu.Lock()
|
||||
defer func() {
|
||||
tun.tcp4GROTable.reset()
|
||||
tun.tcp6GROTable.reset()
|
||||
tun.tcpGROTable.reset()
|
||||
tun.udpGROTable.reset()
|
||||
tun.writeOpMu.Unlock()
|
||||
}()
|
||||
var (
|
||||
@@ -343,7 +345,7 @@ func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
|
||||
)
|
||||
tun.toWrite = tun.toWrite[:0]
|
||||
if tun.vnetHdr {
|
||||
err := handleGRO(bufs, offset, tun.tcp4GROTable, tun.tcp6GROTable, &tun.toWrite)
|
||||
err := handleGRO(bufs, offset, tun.tcpGROTable, tun.udpGROTable, tun.udpGSO, &tun.toWrite)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@@ -394,37 +396,42 @@ func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, e
|
||||
sizes[0] = n
|
||||
return 1, nil
|
||||
}
|
||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
|
||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||
return 0, fmt.Errorf("unsupported virtio GSO type: %d", hdr.gsoType)
|
||||
}
|
||||
|
||||
ipVersion := in[0] >> 4
|
||||
switch ipVersion {
|
||||
case 4:
|
||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 {
|
||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
||||
}
|
||||
case 6:
|
||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
|
||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
||||
}
|
||||
default:
|
||||
return 0, fmt.Errorf("invalid ip header version: %d", ipVersion)
|
||||
}
|
||||
|
||||
if len(in) <= int(hdr.csumStart+12) {
|
||||
return 0, errors.New("packet is too short")
|
||||
}
|
||||
// Don't trust hdr.hdrLen from the kernel as it can be equal to the length
|
||||
// of the entire first packet when the kernel is handling it as part of a
|
||||
// FORWARD path. Instead, parse the TCP header length and add it onto
|
||||
// FORWARD path. Instead, parse the transport header length and add it onto
|
||||
// csumStart, which is synonymous for IP header length.
|
||||
tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4)
|
||||
if tcpHLen < 20 || tcpHLen > 60 {
|
||||
// A TCP header must be between 20 and 60 bytes in length.
|
||||
return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen)
|
||||
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_UDP_L4 {
|
||||
hdr.hdrLen = hdr.csumStart + 8
|
||||
} else {
|
||||
if len(in) <= int(hdr.csumStart+12) {
|
||||
return 0, errors.New("packet is too short")
|
||||
}
|
||||
|
||||
tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4)
|
||||
if tcpHLen < 20 || tcpHLen > 60 {
|
||||
// A TCP header must be between 20 and 60 bytes in length.
|
||||
return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen)
|
||||
}
|
||||
hdr.hdrLen = hdr.csumStart + tcpHLen
|
||||
}
|
||||
hdr.hdrLen = hdr.csumStart + tcpHLen
|
||||
|
||||
if len(in) < int(hdr.hdrLen) {
|
||||
return 0, fmt.Errorf("length of packet (%d) < virtioNetHdr.hdrLen (%d)", len(in), hdr.hdrLen)
|
||||
@@ -438,7 +445,7 @@ func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, e
|
||||
return 0, fmt.Errorf("end of checksum offset (%d) exceeds packet length (%d)", cSumAt+1, len(in))
|
||||
}
|
||||
|
||||
return tcpTSO(in, hdr, bufs, sizes, offset)
|
||||
return gsoSplit(in, hdr, bufs, sizes, offset, ipVersion == 6)
|
||||
}
|
||||
|
||||
func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) {
|
||||
@@ -497,7 +504,8 @@ func (tun *NativeTun) BatchSize() int {
|
||||
|
||||
const (
|
||||
// TODO: support TSO with ECN bits
|
||||
tunOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6
|
||||
tunTCPOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6
|
||||
tunUDPOffloads = unix.TUN_F_USO4 | unix.TUN_F_USO6
|
||||
)
|
||||
|
||||
func (tun *NativeTun) initFromFlags(name string) error {
|
||||
@@ -519,12 +527,17 @@ func (tun *NativeTun) initFromFlags(name string) error {
|
||||
}
|
||||
got := ifr.Uint16()
|
||||
if got&unix.IFF_VNET_HDR != 0 {
|
||||
err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunOffloads)
|
||||
// tunTCPOffloads were added in Linux v2.6. We require their support
|
||||
// if IFF_VNET_HDR is set.
|
||||
err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
tun.vnetHdr = true
|
||||
tun.batchSize = conn.IdealBatchSize
|
||||
// tunUDPOffloads were added in Linux v6.2. We do not return an
|
||||
// error if they are unsupported at runtime.
|
||||
tun.udpGSO = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads|tunUDPOffloads) == nil
|
||||
} else {
|
||||
tun.batchSize = 1
|
||||
}
|
||||
@@ -575,8 +588,8 @@ func CreateTUNFromFile(file *os.File, mtu int) (Device, error) {
|
||||
events: make(chan Event, 5),
|
||||
errors: make(chan error, 5),
|
||||
statusListenersShutdown: make(chan struct{}),
|
||||
tcp4GROTable: newTCPGROTable(),
|
||||
tcp6GROTable: newTCPGROTable(),
|
||||
tcpGROTable: newTCPGROTable(),
|
||||
udpGROTable: newUDPGROTable(),
|
||||
toWrite: make([]int, 0, conn.IdealBatchSize),
|
||||
}
|
||||
|
||||
@@ -628,12 +641,12 @@ func CreateUnmonitoredTUNFromFD(fd int) (Device, string, error) {
|
||||
}
|
||||
file := os.NewFile(uintptr(fd), "/dev/tun")
|
||||
tun := &NativeTun{
|
||||
tunFile: file,
|
||||
events: make(chan Event, 5),
|
||||
errors: make(chan error, 5),
|
||||
tcp4GROTable: newTCPGROTable(),
|
||||
tcp6GROTable: newTCPGROTable(),
|
||||
toWrite: make([]int, 0, conn.IdealBatchSize),
|
||||
tunFile: file,
|
||||
events: make(chan Event, 5),
|
||||
errors: make(chan error, 5),
|
||||
tcpGROTable: newTCPGROTable(),
|
||||
udpGROTable: newUDPGROTable(),
|
||||
toWrite: make([]int, 0, conn.IdealBatchSize),
|
||||
}
|
||||
name, err := tun.Name()
|
||||
if err != nil {
|
||||
|
Reference in New Issue
Block a user