mirror of
https://github.com/SagerNet/sing-tun.git
synced 2025-11-01 20:32:46 +08:00
Improve darwin tun performance
This commit is contained in:
648
internal/fdbased_darwin/endpoint.go
Normal file
648
internal/fdbased_darwin/endpoint.go
Normal file
@@ -0,0 +1,648 @@
|
|||||||
|
// Copyright 2018 The gVisor Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package fdbased provides the implementation of data-link layer endpoints
|
||||||
|
// backed by boundary-preserving file descriptors (e.g., TUN devices,
|
||||||
|
// seqpacket/datagram sockets).
|
||||||
|
//
|
||||||
|
// FD based endpoints can be used in the networking stack by calling New() to
|
||||||
|
// create a new endpoint, and then passing it as an argument to
|
||||||
|
// Stack.CreateNIC().
|
||||||
|
//
|
||||||
|
// FD based endpoints can use more than one file descriptor to read incoming
|
||||||
|
// packets. If there are more than one FDs specified and the underlying FD is an
|
||||||
|
// AF_PACKET then the endpoint will enable FANOUT mode on the socket so that the
|
||||||
|
// host kernel will consistently hash the packets to the sockets. This ensures
|
||||||
|
// that packets for the same TCP streams are not reordered.
|
||||||
|
//
|
||||||
|
// Similarly if more than one FD's are specified where the underlying FD is not
|
||||||
|
// AF_PACKET then it's the caller's responsibility to ensure that all inbound
|
||||||
|
// packets on the descriptors are consistently 5 tuple hashed to one of the
|
||||||
|
// descriptors to prevent TCP reordering.
|
||||||
|
//
|
||||||
|
// Since netstack today does not compute 5 tuple hashes for outgoing packets we
|
||||||
|
// only use the first FD to write outbound packets. Once 5 tuple hashes for
|
||||||
|
// all outbound packets are available we will make use of all underlying FD's to
|
||||||
|
// write outbound packets.
|
||||||
|
package fdbased
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/sagernet/gvisor/pkg/buffer"
|
||||||
|
"github.com/sagernet/gvisor/pkg/sync"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/header"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/stack"
|
||||||
|
"github.com/sagernet/sing-tun/internal/rawfile_darwin"
|
||||||
|
"github.com/sagernet/sing/common"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// linkDispatcher reads packets from the link FD and dispatches them to the
|
||||||
|
// NetworkDispatcher.
|
||||||
|
type linkDispatcher interface {
|
||||||
|
Stop()
|
||||||
|
dispatch() (bool, tcpip.Error)
|
||||||
|
release()
|
||||||
|
}
|
||||||
|
|
||||||
|
// PacketDispatchMode are the various supported methods of receiving and
|
||||||
|
// dispatching packets from the underlying FD.
|
||||||
|
type PacketDispatchMode int
|
||||||
|
|
||||||
|
// BatchSize is the number of packets to write in each syscall. It is 47
|
||||||
|
// because when GVisorGSO is in use then a single 65KB TCP segment can get
|
||||||
|
// split into 46 segments of 1420 bytes and a single 216 byte segment.
|
||||||
|
const BatchSize = 47
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Readv is the default dispatch mode and is the least performant of the
|
||||||
|
// dispatch options but the one that is supported by all underlying FD
|
||||||
|
// types.
|
||||||
|
Readv PacketDispatchMode = iota
|
||||||
|
)
|
||||||
|
|
||||||
|
func (p PacketDispatchMode) String() string {
|
||||||
|
switch p {
|
||||||
|
case Readv:
|
||||||
|
return "Readv"
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("unknown packet dispatch mode '%d'", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
_ stack.LinkEndpoint = (*endpoint)(nil)
|
||||||
|
_ stack.GSOEndpoint = (*endpoint)(nil)
|
||||||
|
)
|
||||||
|
|
||||||
|
// +stateify savable
|
||||||
|
type fdInfo struct {
|
||||||
|
fd int
|
||||||
|
isSocket bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// +stateify savable
|
||||||
|
type endpoint struct {
|
||||||
|
// fds is the set of file descriptors each identifying one inbound/outbound
|
||||||
|
// channel. The endpoint will dispatch from all inbound channels as well as
|
||||||
|
// hash outbound packets to specific channels based on the packet hash.
|
||||||
|
fds []fdInfo
|
||||||
|
|
||||||
|
// hdrSize specifies the link-layer header size. If set to 0, no header
|
||||||
|
// is added/removed; otherwise an ethernet header is used.
|
||||||
|
hdrSize int
|
||||||
|
|
||||||
|
// caps holds the endpoint capabilities.
|
||||||
|
caps stack.LinkEndpointCapabilities
|
||||||
|
|
||||||
|
// closed is a function to be called when the FD's peer (if any) closes
|
||||||
|
// its end of the communication pipe.
|
||||||
|
closed func(tcpip.Error) `state:"nosave"`
|
||||||
|
|
||||||
|
inboundDispatchers []linkDispatcher
|
||||||
|
|
||||||
|
mu endpointRWMutex `state:"nosave"`
|
||||||
|
// +checklocks:mu
|
||||||
|
dispatcher stack.NetworkDispatcher
|
||||||
|
|
||||||
|
// packetDispatchMode controls the packet dispatcher used by this
|
||||||
|
// endpoint.
|
||||||
|
packetDispatchMode PacketDispatchMode
|
||||||
|
|
||||||
|
// wg keeps track of running goroutines.
|
||||||
|
wg sync.WaitGroup `state:"nosave"`
|
||||||
|
|
||||||
|
// maxSyscallHeaderBytes has the same meaning as
|
||||||
|
// Options.MaxSyscallHeaderBytes.
|
||||||
|
maxSyscallHeaderBytes uintptr
|
||||||
|
|
||||||
|
// writevMaxIovs is the maximum number of iovecs that may be passed to
|
||||||
|
// rawfile.NonBlockingWriteIovec, as possibly limited by
|
||||||
|
// maxSyscallHeaderBytes. (No analogous limit is defined for
|
||||||
|
// rawfile.NonBlockingSendMMsg, since in that case the maximum number of
|
||||||
|
// iovecs also depends on the number of mmsghdrs. Instead, if sendBatch
|
||||||
|
// encounters a packet whose iovec count is limited by
|
||||||
|
// maxSyscallHeaderBytes, it falls back to writing the packet using writev
|
||||||
|
// via WritePacket.)
|
||||||
|
writevMaxIovs int
|
||||||
|
|
||||||
|
// addr is the address of the endpoint.
|
||||||
|
//
|
||||||
|
// +checklocks:mu
|
||||||
|
addr tcpip.LinkAddress
|
||||||
|
|
||||||
|
// mtu (maximum transmission unit) is the maximum size of a packet.
|
||||||
|
// +checklocks:mu
|
||||||
|
mtu uint32
|
||||||
|
|
||||||
|
batchSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Options specify the details about the fd-based endpoint to be created.
|
||||||
|
//
|
||||||
|
// +stateify savable
|
||||||
|
type Options struct {
|
||||||
|
// FDs is a set of FDs used to read/write packets.
|
||||||
|
FDs []int
|
||||||
|
|
||||||
|
// MTU is the mtu to use for this endpoint.
|
||||||
|
MTU uint32
|
||||||
|
|
||||||
|
// EthernetHeader if true, indicates that the endpoint should read/write
|
||||||
|
// ethernet frames instead of IP packets.
|
||||||
|
EthernetHeader bool
|
||||||
|
|
||||||
|
// ClosedFunc is a function to be called when an endpoint's peer (if
|
||||||
|
// any) closes its end of the communication pipe.
|
||||||
|
ClosedFunc func(tcpip.Error)
|
||||||
|
|
||||||
|
// Address is the link address for this endpoint. Only used if
|
||||||
|
// EthernetHeader is true.
|
||||||
|
Address tcpip.LinkAddress
|
||||||
|
|
||||||
|
// SaveRestore if true, indicates that this NIC capability set should
|
||||||
|
// include CapabilitySaveRestore
|
||||||
|
SaveRestore bool
|
||||||
|
|
||||||
|
// DisconnectOk if true, indicates that this NIC capability set should
|
||||||
|
// include CapabilityDisconnectOk.
|
||||||
|
DisconnectOk bool
|
||||||
|
|
||||||
|
// PacketDispatchMode specifies the type of inbound dispatcher to be
|
||||||
|
// used for this endpoint.
|
||||||
|
PacketDispatchMode PacketDispatchMode
|
||||||
|
|
||||||
|
// TXChecksumOffload if true, indicates that this endpoints capability
|
||||||
|
// set should include CapabilityTXChecksumOffload.
|
||||||
|
TXChecksumOffload bool
|
||||||
|
|
||||||
|
// RXChecksumOffload if true, indicates that this endpoints capability
|
||||||
|
// set should include CapabilityRXChecksumOffload.
|
||||||
|
RXChecksumOffload bool
|
||||||
|
|
||||||
|
// If MaxSyscallHeaderBytes is non-zero, it is the maximum number of bytes
|
||||||
|
// of struct iovec, msghdr, and mmsghdr that may be passed by each host
|
||||||
|
// system call.
|
||||||
|
MaxSyscallHeaderBytes int
|
||||||
|
|
||||||
|
// InterfaceIndex is the interface index of the underlying device.
|
||||||
|
InterfaceIndex int
|
||||||
|
|
||||||
|
// ProcessorsPerChannel is the number of goroutines used to handle packets
|
||||||
|
// from each FD.
|
||||||
|
ProcessorsPerChannel int
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates a new fd-based endpoint.
|
||||||
|
//
|
||||||
|
// Makes fd non-blocking, but does not take ownership of fd, which must remain
|
||||||
|
// open for the lifetime of the returned endpoint (until after the endpoint has
|
||||||
|
// stopped being using and Wait returns).
|
||||||
|
func New(opts *Options) (stack.LinkEndpoint, error) {
|
||||||
|
caps := stack.LinkEndpointCapabilities(0)
|
||||||
|
if opts.RXChecksumOffload {
|
||||||
|
caps |= stack.CapabilityRXChecksumOffload
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.TXChecksumOffload {
|
||||||
|
caps |= stack.CapabilityTXChecksumOffload
|
||||||
|
}
|
||||||
|
|
||||||
|
hdrSize := 0
|
||||||
|
if opts.EthernetHeader {
|
||||||
|
hdrSize = header.EthernetMinimumSize
|
||||||
|
caps |= stack.CapabilityResolutionRequired
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.SaveRestore {
|
||||||
|
caps |= stack.CapabilitySaveRestore
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.DisconnectOk {
|
||||||
|
caps |= stack.CapabilityDisconnectOk
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(opts.FDs) == 0 {
|
||||||
|
return nil, fmt.Errorf("opts.FD is empty, at least one FD must be specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.MaxSyscallHeaderBytes < 0 {
|
||||||
|
return nil, fmt.Errorf("opts.MaxSyscallHeaderBytes is negative")
|
||||||
|
}
|
||||||
|
|
||||||
|
e := &endpoint{
|
||||||
|
mtu: opts.MTU,
|
||||||
|
caps: caps,
|
||||||
|
closed: opts.ClosedFunc,
|
||||||
|
addr: opts.Address,
|
||||||
|
hdrSize: hdrSize,
|
||||||
|
packetDispatchMode: opts.PacketDispatchMode,
|
||||||
|
maxSyscallHeaderBytes: uintptr(opts.MaxSyscallHeaderBytes),
|
||||||
|
writevMaxIovs: rawfile.MaxIovs,
|
||||||
|
batchSize: int((512*1024)/(opts.MTU)) + 1,
|
||||||
|
}
|
||||||
|
if e.maxSyscallHeaderBytes != 0 {
|
||||||
|
if max := int(e.maxSyscallHeaderBytes / rawfile.SizeofIovec); max < e.writevMaxIovs {
|
||||||
|
e.writevMaxIovs = max
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create per channel dispatchers.
|
||||||
|
for _, fd := range opts.FDs {
|
||||||
|
if err := unix.SetNonblock(fd, true); err != nil {
|
||||||
|
return nil, fmt.Errorf("unix.SetNonblock(%v) failed: %v", fd, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
e.fds = append(e.fds, fdInfo{fd: fd, isSocket: true})
|
||||||
|
if opts.ProcessorsPerChannel == 0 {
|
||||||
|
opts.ProcessorsPerChannel = common.Max(1, runtime.GOMAXPROCS(0)/len(opts.FDs))
|
||||||
|
}
|
||||||
|
|
||||||
|
inboundDispatcher, err := newRecvMMsgDispatcher(fd, e, opts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("createInboundDispatcher(...) = %v", err)
|
||||||
|
}
|
||||||
|
e.inboundDispatchers = append(e.inboundDispatchers, inboundDispatcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
return e, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSocketFD(fd int) (bool, error) {
|
||||||
|
var stat unix.Stat_t
|
||||||
|
if err := unix.Fstat(fd, &stat); err != nil {
|
||||||
|
return false, fmt.Errorf("unix.Fstat(%v,...) failed: %v", fd, err)
|
||||||
|
}
|
||||||
|
return (stat.Mode & unix.S_IFSOCK) == unix.S_IFSOCK, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach launches the goroutine that reads packets from the file descriptor and
|
||||||
|
// dispatches them via the provided dispatcher. If one is already attached,
|
||||||
|
// then nothing happens.
|
||||||
|
//
|
||||||
|
// Attach implements stack.LinkEndpoint.Attach.
|
||||||
|
func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) {
|
||||||
|
e.mu.Lock()
|
||||||
|
|
||||||
|
// nil means the NIC is being removed.
|
||||||
|
if dispatcher == nil && e.dispatcher != nil {
|
||||||
|
for _, dispatcher := range e.inboundDispatchers {
|
||||||
|
dispatcher.Stop()
|
||||||
|
}
|
||||||
|
e.dispatcher = nil
|
||||||
|
// NOTE(gvisor.dev/issue/11456): Unlock e.mu before e.Wait().
|
||||||
|
e.mu.Unlock()
|
||||||
|
e.Wait()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
if dispatcher != nil && e.dispatcher == nil {
|
||||||
|
e.dispatcher = dispatcher
|
||||||
|
// Link endpoints are not savable. When transportation endpoints are
|
||||||
|
// saved, they stop sending outgoing packets and all incoming packets
|
||||||
|
// are rejected.
|
||||||
|
for i := range e.inboundDispatchers {
|
||||||
|
e.wg.Add(1)
|
||||||
|
go func(i int) { // S/R-SAFE: See above.
|
||||||
|
e.dispatchLoop(e.inboundDispatchers[i])
|
||||||
|
e.wg.Done()
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsAttached implements stack.LinkEndpoint.IsAttached.
|
||||||
|
func (e *endpoint) IsAttached() bool {
|
||||||
|
e.mu.RLock()
|
||||||
|
defer e.mu.RUnlock()
|
||||||
|
return e.dispatcher != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MTU implements stack.LinkEndpoint.MTU.
|
||||||
|
func (e *endpoint) MTU() uint32 {
|
||||||
|
e.mu.RLock()
|
||||||
|
defer e.mu.RUnlock()
|
||||||
|
return e.mtu
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetMTU implements stack.LinkEndpoint.SetMTU.
|
||||||
|
func (e *endpoint) SetMTU(mtu uint32) {
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
e.mtu = mtu
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capabilities implements stack.LinkEndpoint.Capabilities.
|
||||||
|
func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
|
||||||
|
return e.caps
|
||||||
|
}
|
||||||
|
|
||||||
|
// MaxHeaderLength returns the maximum size of the link-layer header.
|
||||||
|
func (e *endpoint) MaxHeaderLength() uint16 {
|
||||||
|
return uint16(e.hdrSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// LinkAddress returns the link address of this endpoint.
|
||||||
|
func (e *endpoint) LinkAddress() tcpip.LinkAddress {
|
||||||
|
e.mu.RLock()
|
||||||
|
defer e.mu.RUnlock()
|
||||||
|
return e.addr
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetLinkAddress implements stack.LinkEndpoint.SetLinkAddress.
|
||||||
|
func (e *endpoint) SetLinkAddress(addr tcpip.LinkAddress) {
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
e.addr = addr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait implements stack.LinkEndpoint.Wait. It waits for the endpoint to stop
|
||||||
|
// reading from its FD.
|
||||||
|
func (e *endpoint) Wait() {
|
||||||
|
e.wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddHeader implements stack.LinkEndpoint.AddHeader.
|
||||||
|
func (e *endpoint) AddHeader(pkt *stack.PacketBuffer) {
|
||||||
|
if e.hdrSize > 0 {
|
||||||
|
// Add ethernet header if needed.
|
||||||
|
eth := header.Ethernet(pkt.LinkHeader().Push(header.EthernetMinimumSize))
|
||||||
|
eth.Encode(&header.EthernetFields{
|
||||||
|
SrcAddr: pkt.EgressRoute.LocalLinkAddress,
|
||||||
|
DstAddr: pkt.EgressRoute.RemoteLinkAddress,
|
||||||
|
Type: pkt.NetworkProtocolNumber,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *endpoint) parseHeader(pkt *stack.PacketBuffer) (header.Ethernet, bool) {
|
||||||
|
if e.hdrSize <= 0 {
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
hdrBytes, ok := pkt.LinkHeader().Consume(e.hdrSize)
|
||||||
|
if !ok {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
hdr := header.Ethernet(hdrBytes)
|
||||||
|
pkt.NetworkProtocolNumber = hdr.Type()
|
||||||
|
return hdr, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseInboundHeader parses the link header of pkt and returns true if the
|
||||||
|
// header is well-formed and sent to this endpoint's MAC or the broadcast
|
||||||
|
// address.
|
||||||
|
func (e *endpoint) parseInboundHeader(pkt *stack.PacketBuffer, wantAddr tcpip.LinkAddress) bool {
|
||||||
|
hdr, ok := e.parseHeader(pkt)
|
||||||
|
if !ok || e.hdrSize <= 0 {
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
dstAddr := hdr.DestinationAddress()
|
||||||
|
// Per RFC 9542 2.1 on the least significant bit of the first octet of
|
||||||
|
// a MAC address: "If it is zero, the MAC address is unicast. If it is
|
||||||
|
// a one, the address is groupcast (multicast or broadcast)." Multicast
|
||||||
|
// and broadcast are the same thing to ethernet; they are both sent to
|
||||||
|
// everyone.
|
||||||
|
return dstAddr == wantAddr || byte(dstAddr[0])&0x01 == 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseHeader implements stack.LinkEndpoint.ParseHeader.
|
||||||
|
func (e *endpoint) ParseHeader(pkt *stack.PacketBuffer) bool {
|
||||||
|
_, ok := e.parseHeader(pkt)
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
packetHeader4 = []byte{0x00, 0x00, 0x00, unix.AF_INET}
|
||||||
|
packetHeader6 = []byte{0x00, 0x00, 0x00, unix.AF_INET6}
|
||||||
|
)
|
||||||
|
|
||||||
|
// writePacket writes outbound packets to the file descriptor. If it is not
|
||||||
|
// currently writable, the packet is dropped.
|
||||||
|
func (e *endpoint) writePacket(pkt *stack.PacketBuffer) tcpip.Error {
|
||||||
|
fdInfo := e.fds[pkt.Hash%uint32(len(e.fds))]
|
||||||
|
fd := fdInfo.fd
|
||||||
|
var vnetHdrBuf []byte
|
||||||
|
if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber {
|
||||||
|
vnetHdrBuf = packetHeader4
|
||||||
|
} else {
|
||||||
|
vnetHdrBuf = packetHeader6
|
||||||
|
}
|
||||||
|
views := pkt.AsSlices()
|
||||||
|
numIovecs := len(views)
|
||||||
|
if len(vnetHdrBuf) != 0 {
|
||||||
|
numIovecs++
|
||||||
|
}
|
||||||
|
if numIovecs > e.writevMaxIovs {
|
||||||
|
numIovecs = e.writevMaxIovs
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate small iovec arrays on the stack.
|
||||||
|
var iovecsArr [8]unix.Iovec
|
||||||
|
iovecs := iovecsArr[:0]
|
||||||
|
if numIovecs > len(iovecsArr) {
|
||||||
|
iovecs = make([]unix.Iovec, 0, numIovecs)
|
||||||
|
}
|
||||||
|
iovecs = rawfile.AppendIovecFromBytes(iovecs, vnetHdrBuf, numIovecs)
|
||||||
|
for _, v := range views {
|
||||||
|
iovecs = rawfile.AppendIovecFromBytes(iovecs, v, numIovecs)
|
||||||
|
}
|
||||||
|
if errno := rawfile.NonBlockingWriteIovec(fd, iovecs); errno != 0 {
|
||||||
|
return TranslateErrno(errno)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *endpoint) sendBatch(batchFDInfo fdInfo, pkts []*stack.PacketBuffer) (int, tcpip.Error) {
|
||||||
|
// Degrade to writePacket if underlying fd is not a socket.
|
||||||
|
if !batchFDInfo.isSocket {
|
||||||
|
var written int
|
||||||
|
var err tcpip.Error
|
||||||
|
for written < len(pkts) {
|
||||||
|
if err = e.writePacket(pkts[written]); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
written++
|
||||||
|
}
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send a batch of packets through batchFD.
|
||||||
|
batchFD := batchFDInfo.fd
|
||||||
|
mmsgHdrsStorage := make([]rawfile.MsgHdrX, 0, len(pkts))
|
||||||
|
packets := 0
|
||||||
|
for packets < len(pkts) {
|
||||||
|
mmsgHdrs := mmsgHdrsStorage
|
||||||
|
batch := pkts[packets:]
|
||||||
|
syscallHeaderBytes := uintptr(0)
|
||||||
|
for _, pkt := range batch {
|
||||||
|
var vnetHdrBuf []byte
|
||||||
|
if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber {
|
||||||
|
vnetHdrBuf = packetHeader4
|
||||||
|
} else {
|
||||||
|
vnetHdrBuf = packetHeader6
|
||||||
|
}
|
||||||
|
views, offset := pkt.AsViewList()
|
||||||
|
var skipped int
|
||||||
|
var view *buffer.View
|
||||||
|
for view = views.Front(); view != nil && offset >= view.Size(); view = view.Next() {
|
||||||
|
offset -= view.Size()
|
||||||
|
skipped++
|
||||||
|
}
|
||||||
|
|
||||||
|
// We've made it to the usable views.
|
||||||
|
numIovecs := views.Len() - skipped
|
||||||
|
if len(vnetHdrBuf) != 0 {
|
||||||
|
numIovecs++
|
||||||
|
}
|
||||||
|
if numIovecs > rawfile.MaxIovs {
|
||||||
|
numIovecs = rawfile.MaxIovs
|
||||||
|
}
|
||||||
|
if e.maxSyscallHeaderBytes != 0 {
|
||||||
|
syscallHeaderBytes += rawfile.SizeofMsgHdrX + uintptr(numIovecs)*rawfile.SizeofIovec
|
||||||
|
if syscallHeaderBytes > e.maxSyscallHeaderBytes {
|
||||||
|
// We can't fit this packet into this call to sendmmsg().
|
||||||
|
// We could potentially do so if we reduced numIovecs
|
||||||
|
// further, but this might incur considerable extra
|
||||||
|
// copying. Leave it to the next batch instead.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can't easily allocate iovec arrays on the stack here since
|
||||||
|
// they will escape this loop iteration via mmsgHdrs.
|
||||||
|
iovecs := make([]unix.Iovec, 0, numIovecs)
|
||||||
|
iovecs = rawfile.AppendIovecFromBytes(iovecs, vnetHdrBuf, numIovecs)
|
||||||
|
// At most one slice has a non-zero offset.
|
||||||
|
iovecs = rawfile.AppendIovecFromBytes(iovecs, view.AsSlice()[offset:], numIovecs)
|
||||||
|
for view = view.Next(); view != nil; view = view.Next() {
|
||||||
|
iovecs = rawfile.AppendIovecFromBytes(iovecs, view.AsSlice(), numIovecs)
|
||||||
|
}
|
||||||
|
|
||||||
|
var mmsgHdr rawfile.MsgHdrX
|
||||||
|
mmsgHdr.Msg.Iov = &iovecs[0]
|
||||||
|
mmsgHdr.Msg.SetIovlen(len(iovecs))
|
||||||
|
// mmsgHdr.DataLen = uint32(len(iovecs))
|
||||||
|
mmsgHdrs = append(mmsgHdrs, mmsgHdr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(mmsgHdrs) == 0 {
|
||||||
|
// We can't fit batch[0] into a mmsghdr while staying under
|
||||||
|
// e.maxSyscallHeaderBytes. Use WritePacket, which will avoid the
|
||||||
|
// mmsghdr (by using writev) and re-buffer iovecs more aggressively
|
||||||
|
// if necessary (by using e.writevMaxIovs instead of
|
||||||
|
// rawfile.MaxIovs).
|
||||||
|
pkt := batch[0]
|
||||||
|
if err := e.writePacket(pkt); err != nil {
|
||||||
|
return packets, err
|
||||||
|
}
|
||||||
|
packets++
|
||||||
|
} else {
|
||||||
|
for len(mmsgHdrs) > 0 {
|
||||||
|
sent, errno := rawfile.NonBlockingSendMMsg(batchFD, mmsgHdrs)
|
||||||
|
if errno != 0 {
|
||||||
|
return packets, TranslateErrno(errno)
|
||||||
|
}
|
||||||
|
packets += sent
|
||||||
|
mmsgHdrs = mmsgHdrs[sent:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return packets, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WritePackets writes outbound packets to the underlying file descriptors. If
|
||||||
|
// one is not currently writable, the packet is dropped.
|
||||||
|
//
|
||||||
|
// Being a batch API, each packet in pkts should have the following
|
||||||
|
// fields populated:
|
||||||
|
// - pkt.EgressRoute
|
||||||
|
// - pkt.GSOOptions
|
||||||
|
// - pkt.NetworkProtocolNumber
|
||||||
|
func (e *endpoint) WritePackets(pkts stack.PacketBufferList) (int, tcpip.Error) {
|
||||||
|
// Preallocate to avoid repeated reallocation as we append to batch.
|
||||||
|
batch := make([]*stack.PacketBuffer, 0, e.batchSize)
|
||||||
|
batchFDInfo := fdInfo{fd: -1, isSocket: false}
|
||||||
|
sentPackets := 0
|
||||||
|
for _, pkt := range pkts.AsSlice() {
|
||||||
|
if len(batch) == 0 {
|
||||||
|
batchFDInfo = e.fds[pkt.Hash%uint32(len(e.fds))]
|
||||||
|
}
|
||||||
|
pktFDInfo := e.fds[pkt.Hash%uint32(len(e.fds))]
|
||||||
|
if sendNow := pktFDInfo != batchFDInfo; !sendNow {
|
||||||
|
batch = append(batch, pkt)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n, err := e.sendBatch(batchFDInfo, batch)
|
||||||
|
sentPackets += n
|
||||||
|
if err != nil {
|
||||||
|
return sentPackets, err
|
||||||
|
}
|
||||||
|
batch = batch[:0]
|
||||||
|
batch = append(batch, pkt)
|
||||||
|
batchFDInfo = pktFDInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(batch) != 0 {
|
||||||
|
n, err := e.sendBatch(batchFDInfo, batch)
|
||||||
|
sentPackets += n
|
||||||
|
if err != nil {
|
||||||
|
return sentPackets, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sentPackets, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// dispatchLoop reads packets from the file descriptor in a loop and dispatches
|
||||||
|
// them to the network stack.
|
||||||
|
func (e *endpoint) dispatchLoop(inboundDispatcher linkDispatcher) tcpip.Error {
|
||||||
|
for {
|
||||||
|
cont, err := inboundDispatcher.dispatch()
|
||||||
|
if err != nil || !cont {
|
||||||
|
if e.closed != nil {
|
||||||
|
e.closed(err)
|
||||||
|
}
|
||||||
|
inboundDispatcher.release()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GSOMaxSize implements stack.GSOEndpoint.
|
||||||
|
func (e *endpoint) GSOMaxSize() uint32 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// SupportedGSO implements stack.GSOEndpoint.
|
||||||
|
func (e *endpoint) SupportedGSO() stack.SupportedGSO {
|
||||||
|
return stack.GSONotSupported
|
||||||
|
}
|
||||||
|
|
||||||
|
// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
|
||||||
|
func (e *endpoint) ARPHardwareType() header.ARPHardwareType {
|
||||||
|
if e.hdrSize > 0 {
|
||||||
|
return header.ARPHardwareEther
|
||||||
|
}
|
||||||
|
return header.ARPHardwareNone
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close implements stack.LinkEndpoint.
|
||||||
|
func (e *endpoint) Close() {}
|
||||||
|
|
||||||
|
// SetOnCloseAction implements stack.LinkEndpoint.
|
||||||
|
func (*endpoint) SetOnCloseAction(func()) {}
|
||||||
96
internal/fdbased_darwin/endpoint_mutex.go
Normal file
96
internal/fdbased_darwin/endpoint_mutex.go
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
package fdbased
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
"github.com/sagernet/gvisor/pkg/sync"
|
||||||
|
"github.com/sagernet/gvisor/pkg/sync/locking"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RWMutex is sync.RWMutex with the correctness validator.
|
||||||
|
type endpointRWMutex struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// lockNames is a list of user-friendly lock names.
|
||||||
|
// Populated in init.
|
||||||
|
var endpointlockNames []string
|
||||||
|
|
||||||
|
// lockNameIndex is used as an index passed to NestedLock and NestedUnlock,
|
||||||
|
// referring to an index within lockNames.
|
||||||
|
// Values are specified using the "consts" field of go_template_instance.
|
||||||
|
type endpointlockNameIndex int
|
||||||
|
|
||||||
|
// DO NOT REMOVE: The following function automatically replaced with lock index constants.
|
||||||
|
// LOCK_NAME_INDEX_CONSTANTS
|
||||||
|
const ()
|
||||||
|
|
||||||
|
// Lock locks m.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) Lock() {
|
||||||
|
locking.AddGLock(endpointprefixIndex, -1)
|
||||||
|
m.mu.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NestedLock locks m knowing that another lock of the same type is held.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) NestedLock(i endpointlockNameIndex) {
|
||||||
|
locking.AddGLock(endpointprefixIndex, int(i))
|
||||||
|
m.mu.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unlock unlocks m.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) Unlock() {
|
||||||
|
m.mu.Unlock()
|
||||||
|
locking.DelGLock(endpointprefixIndex, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NestedUnlock unlocks m knowing that another lock of the same type is held.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) NestedUnlock(i endpointlockNameIndex) {
|
||||||
|
m.mu.Unlock()
|
||||||
|
locking.DelGLock(endpointprefixIndex, int(i))
|
||||||
|
}
|
||||||
|
|
||||||
|
// RLock locks m for reading.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) RLock() {
|
||||||
|
locking.AddGLock(endpointprefixIndex, -1)
|
||||||
|
m.mu.RLock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RUnlock undoes a single RLock call.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) RUnlock() {
|
||||||
|
m.mu.RUnlock()
|
||||||
|
locking.DelGLock(endpointprefixIndex, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RLockBypass locks m for reading without executing the validator.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) RLockBypass() {
|
||||||
|
m.mu.RLock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RUnlockBypass undoes a single RLockBypass call.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) RUnlockBypass() {
|
||||||
|
m.mu.RUnlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// DowngradeLock atomically unlocks rw for writing and locks it for reading.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *endpointRWMutex) DowngradeLock() {
|
||||||
|
m.mu.DowngradeLock()
|
||||||
|
}
|
||||||
|
|
||||||
|
var endpointprefixIndex *locking.MutexClass
|
||||||
|
|
||||||
|
// DO NOT REMOVE: The following function is automatically replaced.
|
||||||
|
func endpointinitLockNames() {}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
endpointinitLockNames()
|
||||||
|
endpointprefixIndex = locking.NewMutexClass(reflect.TypeOf(endpointRWMutex{}), endpointlockNames)
|
||||||
|
}
|
||||||
54
internal/fdbased_darwin/errno.go
Normal file
54
internal/fdbased_darwin/errno.go
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
package fdbased
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TranslateErrno(e unix.Errno) tcpip.Error {
|
||||||
|
switch e {
|
||||||
|
case unix.EEXIST:
|
||||||
|
return &tcpip.ErrDuplicateAddress{}
|
||||||
|
case unix.ENETUNREACH:
|
||||||
|
return &tcpip.ErrHostUnreachable{}
|
||||||
|
case unix.EINVAL:
|
||||||
|
return &tcpip.ErrInvalidEndpointState{}
|
||||||
|
case unix.EALREADY:
|
||||||
|
return &tcpip.ErrAlreadyConnecting{}
|
||||||
|
case unix.EISCONN:
|
||||||
|
return &tcpip.ErrAlreadyConnected{}
|
||||||
|
case unix.EADDRINUSE:
|
||||||
|
return &tcpip.ErrPortInUse{}
|
||||||
|
case unix.EADDRNOTAVAIL:
|
||||||
|
return &tcpip.ErrBadLocalAddress{}
|
||||||
|
case unix.EPIPE:
|
||||||
|
return &tcpip.ErrClosedForSend{}
|
||||||
|
case unix.EWOULDBLOCK:
|
||||||
|
return &tcpip.ErrWouldBlock{}
|
||||||
|
case unix.ECONNREFUSED:
|
||||||
|
return &tcpip.ErrConnectionRefused{}
|
||||||
|
case unix.ETIMEDOUT:
|
||||||
|
return &tcpip.ErrTimeout{}
|
||||||
|
case unix.EINPROGRESS:
|
||||||
|
return &tcpip.ErrConnectStarted{}
|
||||||
|
case unix.EDESTADDRREQ:
|
||||||
|
return &tcpip.ErrDestinationRequired{}
|
||||||
|
case unix.ENOTSUP:
|
||||||
|
return &tcpip.ErrNotSupported{}
|
||||||
|
case unix.ENOTTY:
|
||||||
|
return &tcpip.ErrQueueSizeNotSupported{}
|
||||||
|
case unix.ENOTCONN:
|
||||||
|
return &tcpip.ErrNotConnected{}
|
||||||
|
case unix.ECONNRESET:
|
||||||
|
return &tcpip.ErrConnectionReset{}
|
||||||
|
case unix.ECONNABORTED:
|
||||||
|
return &tcpip.ErrConnectionAborted{}
|
||||||
|
case unix.EMSGSIZE:
|
||||||
|
return &tcpip.ErrMessageTooLong{}
|
||||||
|
case unix.ENOBUFS:
|
||||||
|
return &tcpip.ErrNoBufferSpace{}
|
||||||
|
default:
|
||||||
|
return &tcpip.ErrInvalidEndpointState{}
|
||||||
|
}
|
||||||
|
}
|
||||||
229
internal/fdbased_darwin/packet_dispatchers.go
Normal file
229
internal/fdbased_darwin/packet_dispatchers.go
Normal file
@@ -0,0 +1,229 @@
|
|||||||
|
// Copyright 2018 The gVisor Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fdbased
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/sagernet/gvisor/pkg/buffer"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/stack"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/stack/gro"
|
||||||
|
"github.com/sagernet/sing-tun/internal/rawfile_darwin"
|
||||||
|
"github.com/sagernet/sing-tun/internal/stopfd_darwin"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BufConfig defines the shape of the buffer used to read packets from the NIC.
|
||||||
|
var BufConfig = []int{4, 128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
|
||||||
|
|
||||||
|
// +stateify savable
|
||||||
|
type iovecBuffer struct {
|
||||||
|
// buffer is the actual buffer that holds the packet contents. Some contents
|
||||||
|
// are reused across calls to pullBuffer if number of requested bytes is
|
||||||
|
// smaller than the number of bytes allocated in the buffer.
|
||||||
|
views []*buffer.View
|
||||||
|
|
||||||
|
// iovecs are initialized with base pointers/len of the corresponding
|
||||||
|
// entries in the views defined above, except when GSO is enabled
|
||||||
|
// (skipsVnetHdr) then the first iovec points to a buffer for the vnet header
|
||||||
|
// which is stripped before the views are passed up the stack for further
|
||||||
|
// processing.
|
||||||
|
iovecs []unix.Iovec `state:"nosave"`
|
||||||
|
|
||||||
|
// sizes is an array of buffer sizes for the underlying views. sizes is
|
||||||
|
// immutable.
|
||||||
|
sizes []int
|
||||||
|
|
||||||
|
// pulledIndex is the index of the last []byte buffer pulled from the
|
||||||
|
// underlying buffer storage during a call to pullBuffers. It is -1
|
||||||
|
// if no buffer is pulled.
|
||||||
|
pulledIndex int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newIovecBuffer(sizes []int) *iovecBuffer {
|
||||||
|
b := &iovecBuffer{
|
||||||
|
views: make([]*buffer.View, len(sizes)),
|
||||||
|
iovecs: make([]unix.Iovec, len(sizes)),
|
||||||
|
sizes: sizes,
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *iovecBuffer) nextIovecs() []unix.Iovec {
|
||||||
|
for i := range b.views {
|
||||||
|
if b.views[i] != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v := buffer.NewViewSize(b.sizes[i])
|
||||||
|
b.views[i] = v
|
||||||
|
b.iovecs[i] = unix.Iovec{Base: v.BasePtr()}
|
||||||
|
b.iovecs[i].SetLen(v.Size())
|
||||||
|
}
|
||||||
|
return b.iovecs
|
||||||
|
}
|
||||||
|
|
||||||
|
// pullBuffer extracts the enough underlying storage from b.buffer to hold n
|
||||||
|
// bytes. It removes this storage from b.buffer, returns a new buffer
|
||||||
|
// that holds the storage, and updates pulledIndex to indicate which part
|
||||||
|
// of b.buffer's storage must be reallocated during the next call to
|
||||||
|
// nextIovecs.
|
||||||
|
func (b *iovecBuffer) pullBuffer(n int) buffer.Buffer {
|
||||||
|
var views []*buffer.View
|
||||||
|
c := 0
|
||||||
|
// Remove the used views from the buffer.
|
||||||
|
for i, v := range b.views {
|
||||||
|
c += v.Size()
|
||||||
|
if c >= n {
|
||||||
|
b.views[i].CapLength(v.Size() - (c - n))
|
||||||
|
views = append(views, b.views[:i+1]...)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := range views {
|
||||||
|
b.views[i] = nil
|
||||||
|
}
|
||||||
|
pulled := buffer.Buffer{}
|
||||||
|
for _, v := range views {
|
||||||
|
pulled.Append(v)
|
||||||
|
}
|
||||||
|
pulled.Truncate(int64(n))
|
||||||
|
return pulled
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *iovecBuffer) release() {
|
||||||
|
for _, v := range b.views {
|
||||||
|
if v != nil {
|
||||||
|
v.Release()
|
||||||
|
v = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and
|
||||||
|
// dispatches them.
|
||||||
|
//
|
||||||
|
// +stateify savable
|
||||||
|
type recvMMsgDispatcher struct {
|
||||||
|
stopfd.StopFD
|
||||||
|
// fd is the file descriptor used to send and receive packets.
|
||||||
|
fd int
|
||||||
|
|
||||||
|
// e is the endpoint this dispatcher is attached to.
|
||||||
|
e *endpoint
|
||||||
|
|
||||||
|
// bufs is an array of iovec buffers that contain packet contents.
|
||||||
|
bufs []*iovecBuffer
|
||||||
|
|
||||||
|
// msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to
|
||||||
|
// reference an array of iovecs in the iovecs field defined above. This
|
||||||
|
// array is passed as the parameter to recvmmsg call to retrieve
|
||||||
|
// potentially more than 1 packet per unix.
|
||||||
|
msgHdrs []rawfile.MsgHdrX `state:"nosave"`
|
||||||
|
|
||||||
|
// pkts is reused to avoid allocations.
|
||||||
|
pkts stack.PacketBufferList
|
||||||
|
|
||||||
|
// gro coalesces incoming packets to increase throughput.
|
||||||
|
gro gro.GRO
|
||||||
|
|
||||||
|
// mgr is the processor goroutine manager.
|
||||||
|
mgr *processorManager
|
||||||
|
}
|
||||||
|
|
||||||
|
func newRecvMMsgDispatcher(fd int, e *endpoint, opts *Options) (linkDispatcher, error) {
|
||||||
|
stopFD, err := stopfd.New()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
batchSize := int((512*1024)/(opts.MTU)) + 1
|
||||||
|
d := &recvMMsgDispatcher{
|
||||||
|
StopFD: stopFD,
|
||||||
|
fd: fd,
|
||||||
|
e: e,
|
||||||
|
bufs: make([]*iovecBuffer, batchSize),
|
||||||
|
msgHdrs: make([]rawfile.MsgHdrX, batchSize),
|
||||||
|
}
|
||||||
|
bufConfig := []int{4, int(opts.MTU)}
|
||||||
|
for i := range d.bufs {
|
||||||
|
d.bufs[i] = newIovecBuffer(bufConfig)
|
||||||
|
}
|
||||||
|
d.gro.Init(false)
|
||||||
|
d.mgr = newProcessorManager(opts, e)
|
||||||
|
d.mgr.start()
|
||||||
|
|
||||||
|
return d, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *recvMMsgDispatcher) release() {
|
||||||
|
for _, iov := range d.bufs {
|
||||||
|
iov.release()
|
||||||
|
}
|
||||||
|
d.mgr.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// recvMMsgDispatch reads more than one packet at a time from the file
|
||||||
|
// descriptor and dispatches it.
|
||||||
|
func (d *recvMMsgDispatcher) dispatch() (bool, tcpip.Error) {
|
||||||
|
// Fill message headers.
|
||||||
|
for k := range d.msgHdrs {
|
||||||
|
if d.msgHdrs[k].Msg.Iovlen > 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
iovecs := d.bufs[k].nextIovecs()
|
||||||
|
iovLen := len(iovecs)
|
||||||
|
d.msgHdrs[k].DataLen = 0
|
||||||
|
d.msgHdrs[k].Msg.Iov = &iovecs[0]
|
||||||
|
d.msgHdrs[k].Msg.SetIovlen(iovLen)
|
||||||
|
}
|
||||||
|
|
||||||
|
nMsgs, errno := rawfile.BlockingRecvMMsgUntilStopped(d.ReadFD, d.fd, d.msgHdrs)
|
||||||
|
if errno != 0 {
|
||||||
|
return false, TranslateErrno(errno)
|
||||||
|
}
|
||||||
|
if nMsgs == -1 {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process each of received packets.
|
||||||
|
|
||||||
|
d.e.mu.RLock()
|
||||||
|
addr := d.e.addr
|
||||||
|
dsp := d.e.dispatcher
|
||||||
|
d.e.mu.RUnlock()
|
||||||
|
|
||||||
|
d.gro.Dispatcher = dsp
|
||||||
|
defer d.pkts.Reset()
|
||||||
|
|
||||||
|
for k := 0; k < nMsgs; k++ {
|
||||||
|
n := int(d.msgHdrs[k].DataLen)
|
||||||
|
payload := d.bufs[k].pullBuffer(n)
|
||||||
|
payload.TrimFront(4)
|
||||||
|
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
|
||||||
|
Payload: payload,
|
||||||
|
})
|
||||||
|
d.pkts.PushBack(pkt)
|
||||||
|
|
||||||
|
// Mark that this iovec has been processed.
|
||||||
|
d.msgHdrs[k].Msg.Iovlen = 0
|
||||||
|
|
||||||
|
if d.e.parseInboundHeader(pkt, addr) {
|
||||||
|
pkt.RXChecksumValidated = d.e.caps&stack.CapabilityRXChecksumOffload != 0
|
||||||
|
d.mgr.queuePacket(pkt, d.e.hdrSize > 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.mgr.wakeReady()
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
64
internal/fdbased_darwin/processor_mutex.go
Normal file
64
internal/fdbased_darwin/processor_mutex.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package fdbased
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
"github.com/sagernet/gvisor/pkg/sync"
|
||||||
|
"github.com/sagernet/gvisor/pkg/sync/locking"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Mutex is sync.Mutex with the correctness validator.
|
||||||
|
type processorMutex struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
var processorprefixIndex *locking.MutexClass
|
||||||
|
|
||||||
|
// lockNames is a list of user-friendly lock names.
|
||||||
|
// Populated in init.
|
||||||
|
var processorlockNames []string
|
||||||
|
|
||||||
|
// lockNameIndex is used as an index passed to NestedLock and NestedUnlock,
|
||||||
|
// referring to an index within lockNames.
|
||||||
|
// Values are specified using the "consts" field of go_template_instance.
|
||||||
|
type processorlockNameIndex int
|
||||||
|
|
||||||
|
// DO NOT REMOVE: The following function automatically replaced with lock index constants.
|
||||||
|
// LOCK_NAME_INDEX_CONSTANTS
|
||||||
|
const ()
|
||||||
|
|
||||||
|
// Lock locks m.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *processorMutex) Lock() {
|
||||||
|
locking.AddGLock(processorprefixIndex, -1)
|
||||||
|
m.mu.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NestedLock locks m knowing that another lock of the same type is held.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *processorMutex) NestedLock(i processorlockNameIndex) {
|
||||||
|
locking.AddGLock(processorprefixIndex, int(i))
|
||||||
|
m.mu.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unlock unlocks m.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *processorMutex) Unlock() {
|
||||||
|
locking.DelGLock(processorprefixIndex, -1)
|
||||||
|
m.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NestedUnlock unlocks m knowing that another lock of the same type is held.
|
||||||
|
// +checklocksignore
|
||||||
|
func (m *processorMutex) NestedUnlock(i processorlockNameIndex) {
|
||||||
|
locking.DelGLock(processorprefixIndex, int(i))
|
||||||
|
m.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// DO NOT REMOVE: The following function is automatically replaced.
|
||||||
|
func processorinitLockNames() {}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
processorinitLockNames()
|
||||||
|
processorprefixIndex = locking.NewMutexClass(reflect.TypeOf(processorMutex{}), processorlockNames)
|
||||||
|
}
|
||||||
275
internal/fdbased_darwin/processors.go
Normal file
275
internal/fdbased_darwin/processors.go
Normal file
@@ -0,0 +1,275 @@
|
|||||||
|
// Copyright 2024 The gVisor Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fdbased
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/binary"
|
||||||
|
|
||||||
|
"github.com/sagernet/gvisor/pkg/rand"
|
||||||
|
"github.com/sagernet/gvisor/pkg/sleep"
|
||||||
|
"github.com/sagernet/gvisor/pkg/sync"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/hash/jenkins"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/header"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/stack"
|
||||||
|
"github.com/sagernet/gvisor/pkg/tcpip/stack/gro"
|
||||||
|
)
|
||||||
|
|
||||||
|
// +stateify savable
|
||||||
|
type processor struct {
|
||||||
|
mu processorMutex `state:"nosave"`
|
||||||
|
// +checklocks:mu
|
||||||
|
pkts stack.PacketBufferList
|
||||||
|
|
||||||
|
e *endpoint
|
||||||
|
gro gro.GRO
|
||||||
|
sleeper sleep.Sleeper
|
||||||
|
packetWaker sleep.Waker
|
||||||
|
closeWaker sleep.Waker
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) start(wg *sync.WaitGroup) {
|
||||||
|
defer wg.Done()
|
||||||
|
defer p.sleeper.Done()
|
||||||
|
for {
|
||||||
|
switch w := p.sleeper.Fetch(true); {
|
||||||
|
case w == &p.packetWaker:
|
||||||
|
p.deliverPackets()
|
||||||
|
case w == &p.closeWaker:
|
||||||
|
p.mu.Lock()
|
||||||
|
p.pkts.Reset()
|
||||||
|
p.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) deliverPackets() {
|
||||||
|
p.e.mu.RLock()
|
||||||
|
p.gro.Dispatcher = p.e.dispatcher
|
||||||
|
p.e.mu.RUnlock()
|
||||||
|
if p.gro.Dispatcher == nil {
|
||||||
|
p.mu.Lock()
|
||||||
|
p.pkts.Reset()
|
||||||
|
p.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
p.mu.Lock()
|
||||||
|
for p.pkts.Len() > 0 {
|
||||||
|
pkt := p.pkts.PopFront()
|
||||||
|
p.mu.Unlock()
|
||||||
|
p.gro.Enqueue(pkt)
|
||||||
|
pkt.DecRef()
|
||||||
|
p.mu.Lock()
|
||||||
|
}
|
||||||
|
p.mu.Unlock()
|
||||||
|
p.gro.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// processorManager handles starting, closing, and queuing packets on processor
|
||||||
|
// goroutines.
|
||||||
|
//
|
||||||
|
// +stateify savable
|
||||||
|
type processorManager struct {
|
||||||
|
processors []processor
|
||||||
|
seed uint32
|
||||||
|
wg sync.WaitGroup `state:"nosave"`
|
||||||
|
e *endpoint
|
||||||
|
ready []bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// newProcessorManager creates a new processor manager.
|
||||||
|
func newProcessorManager(opts *Options, e *endpoint) *processorManager {
|
||||||
|
m := &processorManager{}
|
||||||
|
m.seed = rand.Uint32()
|
||||||
|
m.ready = make([]bool, opts.ProcessorsPerChannel)
|
||||||
|
m.processors = make([]processor, opts.ProcessorsPerChannel)
|
||||||
|
m.e = e
|
||||||
|
m.wg.Add(opts.ProcessorsPerChannel)
|
||||||
|
|
||||||
|
for i := range m.processors {
|
||||||
|
p := &m.processors[i]
|
||||||
|
p.sleeper.AddWaker(&p.packetWaker)
|
||||||
|
p.sleeper.AddWaker(&p.closeWaker)
|
||||||
|
p.gro.Init(false)
|
||||||
|
p.e = e
|
||||||
|
}
|
||||||
|
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
// start starts the processor goroutines if the processor manager is configured
|
||||||
|
// with more than one processor.
|
||||||
|
func (m *processorManager) start() {
|
||||||
|
for i := range m.processors {
|
||||||
|
p := &m.processors[i]
|
||||||
|
// Only start processor in a separate goroutine if we have multiple of them.
|
||||||
|
if len(m.processors) > 1 {
|
||||||
|
go p.start(&m.wg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// afterLoad is invoked by stateify.
|
||||||
|
func (m *processorManager) afterLoad(context.Context) {
|
||||||
|
m.wg.Add(len(m.processors))
|
||||||
|
m.start()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *processorManager) connectionHash(cid *connectionID) uint32 {
|
||||||
|
var payload [4]byte
|
||||||
|
binary.LittleEndian.PutUint16(payload[0:], cid.srcPort)
|
||||||
|
binary.LittleEndian.PutUint16(payload[2:], cid.dstPort)
|
||||||
|
|
||||||
|
h := jenkins.Sum32(m.seed)
|
||||||
|
h.Write(payload[:])
|
||||||
|
h.Write(cid.srcAddr)
|
||||||
|
h.Write(cid.dstAddr)
|
||||||
|
return h.Sum32()
|
||||||
|
}
|
||||||
|
|
||||||
|
// queuePacket queues a packet to be delivered to the appropriate processor.
|
||||||
|
func (m *processorManager) queuePacket(pkt *stack.PacketBuffer, hasEthHeader bool) {
|
||||||
|
var pIdx uint32
|
||||||
|
cid, nonConnectionPkt := tcpipConnectionID(pkt)
|
||||||
|
if !hasEthHeader {
|
||||||
|
if nonConnectionPkt {
|
||||||
|
// If there's no eth header this should be a standard tcpip packet. If
|
||||||
|
// it isn't the packet is invalid so drop it.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pkt.NetworkProtocolNumber = cid.proto
|
||||||
|
}
|
||||||
|
if len(m.processors) == 1 || nonConnectionPkt {
|
||||||
|
// If the packet is not associated with an active connection, use the
|
||||||
|
// first processor.
|
||||||
|
pIdx = 0
|
||||||
|
} else {
|
||||||
|
pIdx = m.connectionHash(&cid) % uint32(len(m.processors))
|
||||||
|
}
|
||||||
|
p := &m.processors[pIdx]
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
p.pkts.PushBack(pkt.IncRef())
|
||||||
|
m.ready[pIdx] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
type connectionID struct {
|
||||||
|
srcAddr, dstAddr []byte
|
||||||
|
srcPort, dstPort uint16
|
||||||
|
proto tcpip.NetworkProtocolNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
// tcpipConnectionID returns a tcpip connection id tuple based on the data found
|
||||||
|
// in the packet. It returns true if the packet is not associated with an active
|
||||||
|
// connection (e.g ARP, NDP, etc). The method assumes link headers have already
|
||||||
|
// been processed if they were present.
|
||||||
|
func tcpipConnectionID(pkt *stack.PacketBuffer) (connectionID, bool) {
|
||||||
|
var cid connectionID
|
||||||
|
h, ok := pkt.Data().PullUp(1)
|
||||||
|
if !ok {
|
||||||
|
// Skip this packet.
|
||||||
|
return cid, true
|
||||||
|
}
|
||||||
|
|
||||||
|
const tcpSrcDstPortLen = 4
|
||||||
|
switch header.IPVersion(h) {
|
||||||
|
case header.IPv4Version:
|
||||||
|
hdrLen := header.IPv4(h).HeaderLength()
|
||||||
|
h, ok = pkt.Data().PullUp(int(hdrLen) + tcpSrcDstPortLen)
|
||||||
|
if !ok {
|
||||||
|
return cid, true
|
||||||
|
}
|
||||||
|
ipHdr := header.IPv4(h[:hdrLen])
|
||||||
|
tcpHdr := header.TCP(h[hdrLen:][:tcpSrcDstPortLen])
|
||||||
|
|
||||||
|
cid.srcAddr = ipHdr.SourceAddressSlice()
|
||||||
|
cid.dstAddr = ipHdr.DestinationAddressSlice()
|
||||||
|
// All fragment packets need to be processed by the same goroutine, so
|
||||||
|
// only record the TCP ports if this is not a fragment packet.
|
||||||
|
if ipHdr.IsValid(pkt.Data().Size()) && !ipHdr.More() && ipHdr.FragmentOffset() == 0 {
|
||||||
|
cid.srcPort = tcpHdr.SourcePort()
|
||||||
|
cid.dstPort = tcpHdr.DestinationPort()
|
||||||
|
}
|
||||||
|
cid.proto = header.IPv4ProtocolNumber
|
||||||
|
case header.IPv6Version:
|
||||||
|
h, ok = pkt.Data().PullUp(header.IPv6FixedHeaderSize + tcpSrcDstPortLen)
|
||||||
|
if !ok {
|
||||||
|
return cid, true
|
||||||
|
}
|
||||||
|
ipHdr := header.IPv6(h)
|
||||||
|
|
||||||
|
var tcpHdr header.TCP
|
||||||
|
if tcpip.TransportProtocolNumber(ipHdr.NextHeader()) == header.TCPProtocolNumber {
|
||||||
|
tcpHdr = header.TCP(h[header.IPv6FixedHeaderSize:][:tcpSrcDstPortLen])
|
||||||
|
} else {
|
||||||
|
// Slow path for IPv6 extension headers :(.
|
||||||
|
dataBuf := pkt.Data().ToBuffer()
|
||||||
|
dataBuf.TrimFront(header.IPv6MinimumSize)
|
||||||
|
it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(ipHdr.NextHeader()), dataBuf)
|
||||||
|
defer it.Release()
|
||||||
|
for {
|
||||||
|
hdr, done, err := it.Next()
|
||||||
|
if done || err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
hdr.Release()
|
||||||
|
}
|
||||||
|
h, ok = pkt.Data().PullUp(int(it.HeaderOffset()) + tcpSrcDstPortLen)
|
||||||
|
if !ok {
|
||||||
|
return cid, true
|
||||||
|
}
|
||||||
|
tcpHdr = header.TCP(h[it.HeaderOffset():][:tcpSrcDstPortLen])
|
||||||
|
}
|
||||||
|
cid.srcAddr = ipHdr.SourceAddressSlice()
|
||||||
|
cid.dstAddr = ipHdr.DestinationAddressSlice()
|
||||||
|
cid.srcPort = tcpHdr.SourcePort()
|
||||||
|
cid.dstPort = tcpHdr.DestinationPort()
|
||||||
|
cid.proto = header.IPv6ProtocolNumber
|
||||||
|
default:
|
||||||
|
return cid, true
|
||||||
|
}
|
||||||
|
return cid, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *processorManager) close() {
|
||||||
|
if len(m.processors) < 2 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for i := range m.processors {
|
||||||
|
p := &m.processors[i]
|
||||||
|
p.closeWaker.Assert()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// wakeReady wakes up all processors that have a packet queued. If there is only
|
||||||
|
// one processor, the method delivers the packet inline without waking a
|
||||||
|
// goroutine.
|
||||||
|
func (m *processorManager) wakeReady() {
|
||||||
|
for i, ready := range m.ready {
|
||||||
|
if !ready {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
p := &m.processors[i]
|
||||||
|
if len(m.processors) > 1 {
|
||||||
|
p.packetWaker.Assert()
|
||||||
|
} else {
|
||||||
|
p.deliverPackets()
|
||||||
|
}
|
||||||
|
m.ready[i] = false
|
||||||
|
}
|
||||||
|
}
|
||||||
188
internal/rawfile_darwin/rawfile.go
Normal file
188
internal/rawfile_darwin/rawfile.go
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
package rawfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SizeofIovec is the size of a unix.Iovec in bytes.
|
||||||
|
const SizeofIovec = unsafe.Sizeof(unix.Iovec{})
|
||||||
|
|
||||||
|
// MaxIovs is UIO_MAXIOV, the maximum number of iovecs that may be passed to a
|
||||||
|
// host system call in a single array.
|
||||||
|
const MaxIovs = 1024
|
||||||
|
|
||||||
|
// IovecFromBytes returns a unix.Iovec representing bs.
|
||||||
|
//
|
||||||
|
// Preconditions: len(bs) > 0.
|
||||||
|
func IovecFromBytes(bs []byte) unix.Iovec {
|
||||||
|
iov := unix.Iovec{
|
||||||
|
Base: &bs[0],
|
||||||
|
}
|
||||||
|
iov.SetLen(len(bs))
|
||||||
|
return iov
|
||||||
|
}
|
||||||
|
|
||||||
|
func bytesFromIovec(iov unix.Iovec) (bs []byte) {
|
||||||
|
sh := (*reflect.SliceHeader)(unsafe.Pointer(&bs))
|
||||||
|
sh.Data = uintptr(unsafe.Pointer(iov.Base))
|
||||||
|
sh.Len = int(iov.Len)
|
||||||
|
sh.Cap = int(iov.Len)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// AppendIovecFromBytes returns append(iovs, IovecFromBytes(bs)). If len(bs) ==
|
||||||
|
// 0, AppendIovecFromBytes returns iovs without modification. If len(iovs) >=
|
||||||
|
// max, AppendIovecFromBytes replaces the final iovec in iovs with one that
|
||||||
|
// also includes the contents of bs. Note that this implies that
|
||||||
|
// AppendIovecFromBytes is only usable when the returned iovec slice is used as
|
||||||
|
// the source of a write.
|
||||||
|
func AppendIovecFromBytes(iovs []unix.Iovec, bs []byte, max int) []unix.Iovec {
|
||||||
|
if len(bs) == 0 {
|
||||||
|
return iovs
|
||||||
|
}
|
||||||
|
if len(iovs) < max {
|
||||||
|
return append(iovs, IovecFromBytes(bs))
|
||||||
|
}
|
||||||
|
iovs[len(iovs)-1] = IovecFromBytes(append(bytesFromIovec(iovs[len(iovs)-1]), bs...))
|
||||||
|
return iovs
|
||||||
|
}
|
||||||
|
|
||||||
|
type MsgHdrX struct {
|
||||||
|
Msg unix.Msghdr
|
||||||
|
DataLen uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
func NonBlockingSendMMsg(fd int, msgHdrs []MsgHdrX) (int, unix.Errno) {
|
||||||
|
n, _, e := unix.RawSyscall6(unix.SYS_SENDMSG_X, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), unix.MSG_DONTWAIT, 0, 0)
|
||||||
|
return int(n), e
|
||||||
|
}
|
||||||
|
|
||||||
|
const SizeofMsgHdrX = unsafe.Sizeof(MsgHdrX{})
|
||||||
|
|
||||||
|
// NonBlockingWriteIovec writes iovec to a file descriptor in a single unix.
|
||||||
|
// It fails if partial data is written.
|
||||||
|
func NonBlockingWriteIovec(fd int, iovec []unix.Iovec) unix.Errno {
|
||||||
|
iovecLen := uintptr(len(iovec))
|
||||||
|
_, _, e := unix.RawSyscall(unix.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), iovecLen)
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
func BlockingReadvUntilStopped(efd int, fd int, iovecs []unix.Iovec) (int, unix.Errno) {
|
||||||
|
for {
|
||||||
|
n, _, e := unix.RawSyscall(unix.SYS_READV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs)))
|
||||||
|
if e == 0 {
|
||||||
|
return int(n), 0
|
||||||
|
}
|
||||||
|
if e != 0 && e != unix.EWOULDBLOCK {
|
||||||
|
return 0, e
|
||||||
|
}
|
||||||
|
stopped, e := BlockingPollUntilStopped(efd, fd, unix.POLLIN)
|
||||||
|
if stopped {
|
||||||
|
return -1, e
|
||||||
|
}
|
||||||
|
if e != 0 && e != unix.EINTR {
|
||||||
|
return 0, e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BlockingRecvMMsgUntilStopped(efd int, fd int, msgHdrs []MsgHdrX) (int, unix.Errno) {
|
||||||
|
for {
|
||||||
|
n, _, e := unix.RawSyscall6(unix.SYS_RECVMSG_X, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), unix.MSG_DONTWAIT, 0, 0)
|
||||||
|
if e == 0 {
|
||||||
|
return int(n), e
|
||||||
|
}
|
||||||
|
|
||||||
|
if e != 0 && e != unix.EWOULDBLOCK {
|
||||||
|
return 0, e
|
||||||
|
}
|
||||||
|
|
||||||
|
stopped, e := BlockingPollUntilStopped(efd, fd, unix.POLLIN)
|
||||||
|
if stopped {
|
||||||
|
return -1, e
|
||||||
|
}
|
||||||
|
if e != 0 && e != unix.EINTR {
|
||||||
|
return 0, e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BlockingPollUntilStopped(efd int, fd int, events int16) (bool, unix.Errno) {
|
||||||
|
// Create kqueue
|
||||||
|
kq, err := unix.Kqueue()
|
||||||
|
if err != nil {
|
||||||
|
return false, unix.Errno(err.(unix.Errno))
|
||||||
|
}
|
||||||
|
defer unix.Close(kq)
|
||||||
|
|
||||||
|
// Prepare kevents for registration
|
||||||
|
var kevents []unix.Kevent_t
|
||||||
|
|
||||||
|
// Always monitor efd for read events
|
||||||
|
kevents = append(kevents, unix.Kevent_t{
|
||||||
|
Ident: uint64(efd),
|
||||||
|
Filter: unix.EVFILT_READ,
|
||||||
|
Flags: unix.EV_ADD | unix.EV_ENABLE,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Monitor fd based on requested events
|
||||||
|
// Convert poll events to kqueue filters
|
||||||
|
if events&unix.POLLIN != 0 {
|
||||||
|
kevents = append(kevents, unix.Kevent_t{
|
||||||
|
Ident: uint64(fd),
|
||||||
|
Filter: unix.EVFILT_READ,
|
||||||
|
Flags: unix.EV_ADD | unix.EV_ENABLE,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if events&unix.POLLOUT != 0 {
|
||||||
|
kevents = append(kevents, unix.Kevent_t{
|
||||||
|
Ident: uint64(fd),
|
||||||
|
Filter: unix.EVFILT_WRITE,
|
||||||
|
Flags: unix.EV_ADD | unix.EV_ENABLE,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register events
|
||||||
|
_, err = unix.Kevent(kq, kevents, nil, nil)
|
||||||
|
if err != nil {
|
||||||
|
return false, unix.Errno(err.(unix.Errno))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for events (blocking)
|
||||||
|
revents := make([]unix.Kevent_t, len(kevents))
|
||||||
|
n, err := unix.Kevent(kq, nil, revents, nil)
|
||||||
|
if err != nil {
|
||||||
|
return false, unix.Errno(err.(unix.Errno))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check results
|
||||||
|
var efdHasData bool
|
||||||
|
var errno unix.Errno
|
||||||
|
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
ev := &revents[i]
|
||||||
|
|
||||||
|
if int(ev.Ident) == efd && ev.Filter == unix.EVFILT_READ {
|
||||||
|
efdHasData = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if int(ev.Ident) == fd {
|
||||||
|
// Check for errors or EOF
|
||||||
|
if ev.Flags&unix.EV_EOF != 0 {
|
||||||
|
errno = unix.ECONNRESET
|
||||||
|
} else if ev.Flags&unix.EV_ERROR != 0 {
|
||||||
|
// Extract error from Data field
|
||||||
|
if ev.Data != 0 {
|
||||||
|
errno = unix.Errno(ev.Data)
|
||||||
|
} else {
|
||||||
|
errno = unix.ECONNRESET
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return efdHasData, errno
|
||||||
|
}
|
||||||
61
internal/stopfd_darwin/stopfd.go
Normal file
61
internal/stopfd_darwin/stopfd.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
package stopfd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
type StopFD struct {
|
||||||
|
ReadFD int
|
||||||
|
WriteFD int
|
||||||
|
}
|
||||||
|
|
||||||
|
func New() (StopFD, error) {
|
||||||
|
fds := make([]int, 2)
|
||||||
|
err := unix.Pipe(fds)
|
||||||
|
if err != nil {
|
||||||
|
return StopFD{ReadFD: -1, WriteFD: -1}, fmt.Errorf("failed to create pipe: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := unix.SetNonblock(fds[0], true); err != nil {
|
||||||
|
unix.Close(fds[0])
|
||||||
|
unix.Close(fds[1])
|
||||||
|
return StopFD{ReadFD: -1, WriteFD: -1}, fmt.Errorf("failed to set read end non-blocking: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := unix.SetNonblock(fds[1], true); err != nil {
|
||||||
|
unix.Close(fds[0])
|
||||||
|
unix.Close(fds[1])
|
||||||
|
return StopFD{ReadFD: -1, WriteFD: -1}, fmt.Errorf("failed to set write end non-blocking: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return StopFD{ReadFD: fds[0], WriteFD: fds[1]}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sf *StopFD) Stop() {
|
||||||
|
signal := []byte{1}
|
||||||
|
if n, err := unix.Write(sf.WriteFD, signal); n != len(signal) || err != nil {
|
||||||
|
panic(fmt.Sprintf("write(WriteFD) = (%d, %s), want (%d, nil)", n, err, len(signal)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sf *StopFD) Close() error {
|
||||||
|
var err1, err2 error
|
||||||
|
if sf.ReadFD != -1 {
|
||||||
|
err1 = unix.Close(sf.ReadFD)
|
||||||
|
sf.ReadFD = -1
|
||||||
|
}
|
||||||
|
if sf.WriteFD != -1 {
|
||||||
|
err2 = unix.Close(sf.WriteFD)
|
||||||
|
sf.WriteFD = -1
|
||||||
|
}
|
||||||
|
if err1 != nil {
|
||||||
|
return err1
|
||||||
|
}
|
||||||
|
return err2
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sf *StopFD) EFD() int {
|
||||||
|
return sf.ReadFD
|
||||||
|
}
|
||||||
@@ -40,7 +40,7 @@ type GVisor struct {
|
|||||||
|
|
||||||
type GVisorTun interface {
|
type GVisorTun interface {
|
||||||
Tun
|
Tun
|
||||||
NewEndpoint() (stack.LinkEndpoint, error)
|
NewEndpoint() (stack.LinkEndpoint, stack.NICOptions, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewGVisor(
|
func NewGVisor(
|
||||||
@@ -65,12 +65,12 @@ func NewGVisor(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (t *GVisor) Start() error {
|
func (t *GVisor) Start() error {
|
||||||
linkEndpoint, err := t.tun.NewEndpoint()
|
linkEndpoint, nicOptions, err := t.tun.NewEndpoint()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
linkEndpoint = &LinkEndpointFilter{linkEndpoint, t.broadcastAddr, t.tun}
|
linkEndpoint = &LinkEndpointFilter{linkEndpoint, t.broadcastAddr, t.tun}
|
||||||
ipStack, err := NewGVisorStack(linkEndpoint)
|
ipStack, err := NewGVisorStackWithOptions(linkEndpoint, nicOptions)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -110,6 +110,10 @@ func AddrFromAddress(address tcpip.Address) netip.Addr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewGVisorStack(ep stack.LinkEndpoint) (*stack.Stack, error) {
|
func NewGVisorStack(ep stack.LinkEndpoint) (*stack.Stack, error) {
|
||||||
|
return NewGVisorStackWithOptions(ep, stack.NICOptions{})
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewGVisorStackWithOptions(ep stack.LinkEndpoint, opts stack.NICOptions) (*stack.Stack, error) {
|
||||||
ipStack := stack.New(stack.Options{
|
ipStack := stack.New(stack.Options{
|
||||||
NetworkProtocols: []stack.NetworkProtocolFactory{
|
NetworkProtocols: []stack.NetworkProtocolFactory{
|
||||||
ipv4.NewProtocol,
|
ipv4.NewProtocol,
|
||||||
@@ -122,7 +126,7 @@ func NewGVisorStack(ep stack.LinkEndpoint) (*stack.Stack, error) {
|
|||||||
icmp.NewProtocol6,
|
icmp.NewProtocol6,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
err := ipStack.CreateNIC(DefaultNIC, ep)
|
err := ipStack.CreateNICWithOptions(DefaultNIC, ep, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, gonet.TranslateNetstackError(err)
|
return nil, gonet.TranslateNetstackError(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/sagernet/gvisor/pkg/tcpip/stack"
|
"github.com/sagernet/gvisor/pkg/tcpip/stack"
|
||||||
"github.com/sagernet/gvisor/pkg/tcpip/transport/udp"
|
"github.com/sagernet/gvisor/pkg/tcpip/transport/udp"
|
||||||
"github.com/sagernet/sing-tun/internal/gtcpip/header"
|
"github.com/sagernet/sing-tun/internal/gtcpip/header"
|
||||||
|
"github.com/sagernet/sing/common/buf"
|
||||||
"github.com/sagernet/sing/common/bufio"
|
"github.com/sagernet/sing/common/bufio"
|
||||||
E "github.com/sagernet/sing/common/exceptions"
|
E "github.com/sagernet/sing/common/exceptions"
|
||||||
)
|
)
|
||||||
@@ -72,10 +73,14 @@ func (m *Mixed) tunLoop() {
|
|||||||
m.txChecksumOffload = linuxTUN.TXChecksumOffload()
|
m.txChecksumOffload = linuxTUN.TXChecksumOffload()
|
||||||
batchSize := linuxTUN.BatchSize()
|
batchSize := linuxTUN.BatchSize()
|
||||||
if batchSize > 1 {
|
if batchSize > 1 {
|
||||||
m.batchLoop(linuxTUN, batchSize)
|
m.batchLoopLinux(linuxTUN, batchSize)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if darwinTUN, isDarwinTUN := m.tun.(DarwinTUN); isDarwinTUN {
|
||||||
|
m.batchLoopDarwin(darwinTUN)
|
||||||
|
return
|
||||||
|
}
|
||||||
packetBuffer := make([]byte, m.mtu+PacketOffset)
|
packetBuffer := make([]byte, m.mtu+PacketOffset)
|
||||||
for {
|
for {
|
||||||
n, err := m.tun.Read(packetBuffer)
|
n, err := m.tun.Read(packetBuffer)
|
||||||
@@ -119,12 +124,12 @@ func (m *Mixed) wintunLoop(winTun WinTun) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Mixed) batchLoop(linuxTUN LinuxTUN, batchSize int) {
|
func (m *Mixed) batchLoopLinux(linuxTUN LinuxTUN, batchSize int) {
|
||||||
packetBuffers := make([][]byte, batchSize)
|
packetBuffers := make([][]byte, batchSize)
|
||||||
writeBuffers := make([][]byte, batchSize)
|
writeBuffers := make([][]byte, batchSize)
|
||||||
packetSizes := make([]int, batchSize)
|
packetSizes := make([]int, batchSize)
|
||||||
for i := range packetBuffers {
|
for i := range packetBuffers {
|
||||||
packetBuffers[i] = make([]byte, m.mtu+m.frontHeadroom)
|
packetBuffers[i] = make([]byte, m.mtu+PacketOffset+m.frontHeadroom)
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
n, err := linuxTUN.BatchRead(packetBuffers, m.frontHeadroom, packetSizes)
|
n, err := linuxTUN.BatchRead(packetBuffers, m.frontHeadroom, packetSizes)
|
||||||
@@ -158,6 +163,40 @@ func (m *Mixed) batchLoop(linuxTUN LinuxTUN, batchSize int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Mixed) batchLoopDarwin(darwinTUN DarwinTUN) {
|
||||||
|
var writeBuffers []*buf.Buffer
|
||||||
|
for {
|
||||||
|
buffers, err := darwinTUN.BatchRead()
|
||||||
|
if err != nil {
|
||||||
|
if E.IsClosed(err) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.logger.Error(E.Cause(err, "batch read packet"))
|
||||||
|
}
|
||||||
|
if len(buffers) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
writeBuffers = writeBuffers[:0]
|
||||||
|
for _, buffer := range buffers {
|
||||||
|
packetSize := buffer.Len()
|
||||||
|
if packetSize < header.IPv4MinimumSize {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m.processPacket(buffer.Bytes()) {
|
||||||
|
writeBuffers = append(writeBuffers, buffer)
|
||||||
|
} else {
|
||||||
|
buffer.Release()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(writeBuffers) > 0 {
|
||||||
|
err = darwinTUN.BatchWrite(writeBuffers)
|
||||||
|
if err != nil {
|
||||||
|
m.logger.Trace(E.Cause(err, "batch write packet"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Mixed) processPacket(packet []byte) bool {
|
func (m *Mixed) processPacket(packet []byte) bool {
|
||||||
var (
|
var (
|
||||||
writeBack bool
|
writeBack bool
|
||||||
|
|||||||
@@ -170,10 +170,14 @@ func (s *System) tunLoop() {
|
|||||||
s.txChecksumOffload = linuxTUN.TXChecksumOffload()
|
s.txChecksumOffload = linuxTUN.TXChecksumOffload()
|
||||||
batchSize := linuxTUN.BatchSize()
|
batchSize := linuxTUN.BatchSize()
|
||||||
if batchSize > 1 {
|
if batchSize > 1 {
|
||||||
s.batchLoop(linuxTUN, batchSize)
|
s.batchLoopLinux(linuxTUN, batchSize)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if darwinTUN, isDarwinTUN := s.tun.(DarwinTUN); isDarwinTUN {
|
||||||
|
s.batchLoopDarwin(darwinTUN)
|
||||||
|
return
|
||||||
|
}
|
||||||
packetBuffer := make([]byte, s.mtu+PacketOffset)
|
packetBuffer := make([]byte, s.mtu+PacketOffset)
|
||||||
for {
|
for {
|
||||||
n, err := s.tun.Read(packetBuffer)
|
n, err := s.tun.Read(packetBuffer)
|
||||||
@@ -217,7 +221,7 @@ func (s *System) wintunLoop(winTun WinTun) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) batchLoop(linuxTUN LinuxTUN, batchSize int) {
|
func (s *System) batchLoopLinux(linuxTUN LinuxTUN, batchSize int) {
|
||||||
packetBuffers := make([][]byte, batchSize)
|
packetBuffers := make([][]byte, batchSize)
|
||||||
writeBuffers := make([][]byte, batchSize)
|
writeBuffers := make([][]byte, batchSize)
|
||||||
packetSizes := make([]int, batchSize)
|
packetSizes := make([]int, batchSize)
|
||||||
@@ -256,6 +260,40 @@ func (s *System) batchLoop(linuxTUN LinuxTUN, batchSize int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *System) batchLoopDarwin(darwinTUN DarwinTUN) {
|
||||||
|
var writeBuffers []*buf.Buffer
|
||||||
|
for {
|
||||||
|
buffers, err := darwinTUN.BatchRead()
|
||||||
|
if err != nil {
|
||||||
|
if E.IsClosed(err) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.logger.Error(E.Cause(err, "batch read packet"))
|
||||||
|
}
|
||||||
|
if len(buffers) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
writeBuffers = writeBuffers[:0]
|
||||||
|
for _, buffer := range buffers {
|
||||||
|
packetSize := buffer.Len()
|
||||||
|
if packetSize < header.IPv4MinimumSize {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if s.processPacket(buffer.Bytes()) {
|
||||||
|
writeBuffers = append(writeBuffers, buffer)
|
||||||
|
} else {
|
||||||
|
buffer.Release()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(writeBuffers) > 0 {
|
||||||
|
err = darwinTUN.BatchWrite(writeBuffers)
|
||||||
|
if err != nil {
|
||||||
|
s.logger.Trace(E.Cause(err, "batch write packet"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (s *System) processPacket(packet []byte) bool {
|
func (s *System) processPacket(packet []byte) bool {
|
||||||
var (
|
var (
|
||||||
writeBack bool
|
writeBack bool
|
||||||
|
|||||||
7
tun.go
7
tun.go
@@ -8,6 +8,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/sagernet/sing/common/buf"
|
||||||
"github.com/sagernet/sing/common/control"
|
"github.com/sagernet/sing/common/control"
|
||||||
F "github.com/sagernet/sing/common/format"
|
F "github.com/sagernet/sing/common/format"
|
||||||
"github.com/sagernet/sing/common/logger"
|
"github.com/sagernet/sing/common/logger"
|
||||||
@@ -45,6 +46,12 @@ type LinuxTUN interface {
|
|||||||
TXChecksumOffload() bool
|
TXChecksumOffload() bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type DarwinTUN interface {
|
||||||
|
Tun
|
||||||
|
BatchRead() ([]*buf.Buffer, error)
|
||||||
|
BatchWrite(buffers []*buf.Buffer) error
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
DefaultIPRoute2TableIndex = 2022
|
DefaultIPRoute2TableIndex = 2022
|
||||||
DefaultIPRoute2RuleIndex = 9000
|
DefaultIPRoute2RuleIndex = 9000
|
||||||
|
|||||||
168
tun_darwin.go
168
tun_darwin.go
@@ -10,6 +10,8 @@ import (
|
|||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/sagernet/sing-tun/internal/gtcpip/header"
|
"github.com/sagernet/sing-tun/internal/gtcpip/header"
|
||||||
|
"github.com/sagernet/sing-tun/internal/rawfile_darwin"
|
||||||
|
"github.com/sagernet/sing-tun/internal/stopfd_darwin"
|
||||||
"github.com/sagernet/sing/common"
|
"github.com/sagernet/sing/common"
|
||||||
"github.com/sagernet/sing/common/buf"
|
"github.com/sagernet/sing/common/buf"
|
||||||
"github.com/sagernet/sing/common/bufio"
|
"github.com/sagernet/sing/common/bufio"
|
||||||
@@ -21,15 +23,64 @@ import (
|
|||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var _ DarwinTUN = (*NativeTun)(nil)
|
||||||
|
|
||||||
const PacketOffset = 4
|
const PacketOffset = 4
|
||||||
|
|
||||||
type NativeTun struct {
|
type NativeTun struct {
|
||||||
tunFile *os.File
|
tunFd int
|
||||||
tunWriter N.VectorisedWriter
|
tunFile *os.File
|
||||||
options Options
|
batchSize int
|
||||||
inet4Address [4]byte
|
iovecs []iovecBuffer
|
||||||
inet6Address [16]byte
|
iovecsOutput []iovecBuffer
|
||||||
routeSet bool
|
msgHdrs []rawfile.MsgHdrX
|
||||||
|
msgHdrsOutput []rawfile.MsgHdrX
|
||||||
|
buffers []*buf.Buffer
|
||||||
|
stopFd stopfd.StopFD
|
||||||
|
tunWriter N.VectorisedWriter
|
||||||
|
options Options
|
||||||
|
inet4Address [4]byte
|
||||||
|
inet6Address [16]byte
|
||||||
|
routeSet bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type iovecBuffer struct {
|
||||||
|
mtu int
|
||||||
|
buffer *buf.Buffer
|
||||||
|
iovecs []unix.Iovec
|
||||||
|
}
|
||||||
|
|
||||||
|
func newIovecBuffer(mtu int) iovecBuffer {
|
||||||
|
return iovecBuffer{
|
||||||
|
mtu: mtu,
|
||||||
|
iovecs: make([]unix.Iovec, 2),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *iovecBuffer) nextIovecs() []unix.Iovec {
|
||||||
|
if b.iovecs[0].Len == 0 {
|
||||||
|
headBuffer := make([]byte, PacketOffset)
|
||||||
|
b.iovecs[0].Base = &headBuffer[0]
|
||||||
|
b.iovecs[0].SetLen(PacketOffset)
|
||||||
|
}
|
||||||
|
if b.buffer == nil {
|
||||||
|
b.buffer = buf.NewSize(b.mtu)
|
||||||
|
b.iovecs[1].Base = &b.buffer.FreeBytes()[0]
|
||||||
|
b.iovecs[1].SetLen(b.mtu)
|
||||||
|
}
|
||||||
|
return b.iovecs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *iovecBuffer) nextIovecsOutput(buffer *buf.Buffer) []unix.Iovec {
|
||||||
|
switch header.IPVersion(buffer.Bytes()) {
|
||||||
|
case header.IPv4Version:
|
||||||
|
b.iovecs[0] = packetHeaderVec4
|
||||||
|
case header.IPv6Version:
|
||||||
|
b.iovecs[0] = packetHeaderVec6
|
||||||
|
}
|
||||||
|
b.iovecs[1].Base = &buffer.Bytes()[0]
|
||||||
|
b.iovecs[1].SetLen(buffer.Len())
|
||||||
|
return b.iovecs
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *NativeTun) Name() (string, error) {
|
func (t *NativeTun) Name() (string, error) {
|
||||||
@@ -42,6 +93,7 @@ func (t *NativeTun) Name() (string, error) {
|
|||||||
|
|
||||||
func New(options Options) (Tun, error) {
|
func New(options Options) (Tun, error) {
|
||||||
var tunFd int
|
var tunFd int
|
||||||
|
batchSize := ((512 * 1024) / int(options.MTU)) + 1
|
||||||
if options.FileDescriptor == 0 {
|
if options.FileDescriptor == 0 {
|
||||||
ifIndex := -1
|
ifIndex := -1
|
||||||
_, err := fmt.Sscanf(options.Name, "utun%d", &ifIndex)
|
_, err := fmt.Sscanf(options.Name, "utun%d", &ifIndex)
|
||||||
@@ -54,18 +106,37 @@ func New(options Options) (Tun, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = configure(tunFd, ifIndex, options.Name, options)
|
err = create(tunFd, ifIndex, options.Name, options)
|
||||||
|
if err != nil {
|
||||||
|
unix.Close(tunFd)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = configure(tunFd, batchSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
unix.Close(tunFd)
|
unix.Close(tunFd)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tunFd = options.FileDescriptor
|
tunFd = options.FileDescriptor
|
||||||
|
err := configure(tunFd, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nativeTun := &NativeTun{
|
nativeTun := &NativeTun{
|
||||||
tunFile: os.NewFile(uintptr(tunFd), "utun"),
|
tunFd: tunFd,
|
||||||
options: options,
|
tunFile: os.NewFile(uintptr(tunFd), "utun"),
|
||||||
|
options: options,
|
||||||
|
batchSize: batchSize,
|
||||||
|
iovecs: make([]iovecBuffer, batchSize),
|
||||||
|
iovecsOutput: make([]iovecBuffer, batchSize),
|
||||||
|
msgHdrs: make([]rawfile.MsgHdrX, batchSize),
|
||||||
|
msgHdrsOutput: make([]rawfile.MsgHdrX, batchSize),
|
||||||
|
stopFd: common.Must1(stopfd.New()),
|
||||||
|
}
|
||||||
|
for i := 0; i < batchSize; i++ {
|
||||||
|
nativeTun.iovecs[i] = newIovecBuffer(int(options.MTU))
|
||||||
|
nativeTun.iovecsOutput[i] = newIovecBuffer(int(options.MTU))
|
||||||
}
|
}
|
||||||
if len(options.Inet4Address) > 0 {
|
if len(options.Inet4Address) > 0 {
|
||||||
nativeTun.inet4Address = options.Inet4Address[0].Addr().As4()
|
nativeTun.inet4Address = options.Inet4Address[0].Addr().As4()
|
||||||
@@ -100,10 +171,17 @@ func (t *NativeTun) Write(p []byte) (n int, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
packetHeader4 = [4]byte{0x00, 0x00, 0x00, unix.AF_INET}
|
packetHeader4 = []byte{0x00, 0x00, 0x00, unix.AF_INET}
|
||||||
packetHeader6 = [4]byte{0x00, 0x00, 0x00, unix.AF_INET6}
|
packetHeader6 = []byte{0x00, 0x00, 0x00, unix.AF_INET6}
|
||||||
|
packetHeaderVec4 = unix.Iovec{Base: &packetHeader4[0]}
|
||||||
|
packetHeaderVec6 = unix.Iovec{Base: &packetHeader6[0]}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
packetHeaderVec4.SetLen(4)
|
||||||
|
packetHeaderVec6.SetLen(4)
|
||||||
|
}
|
||||||
|
|
||||||
func (t *NativeTun) WriteVectorised(buffers []*buf.Buffer) error {
|
func (t *NativeTun) WriteVectorised(buffers []*buf.Buffer) error {
|
||||||
var packetHeader []byte
|
var packetHeader []byte
|
||||||
switch header.IPVersion(buffers[0].Bytes()) {
|
switch header.IPVersion(buffers[0].Bytes()) {
|
||||||
@@ -147,7 +225,7 @@ type addrLifetime6 struct {
|
|||||||
Pltime uint32
|
Pltime uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
func configure(tunFd int, ifIndex int, name string, options Options) error {
|
func create(tunFd int, ifIndex int, name string, options Options) error {
|
||||||
ctlInfo := &unix.CtlInfo{}
|
ctlInfo := &unix.CtlInfo{}
|
||||||
copy(ctlInfo.Name[:], utunControlName)
|
copy(ctlInfo.Name[:], utunControlName)
|
||||||
err := unix.IoctlCtlInfo(tunFd, ctlInfo)
|
err := unix.IoctlCtlInfo(tunFd, ctlInfo)
|
||||||
@@ -163,11 +241,6 @@ func configure(tunFd int, ifIndex int, name string, options Options) error {
|
|||||||
return os.NewSyscallError("Connect", err)
|
return os.NewSyscallError("Connect", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = unix.SetNonblock(tunFd, true)
|
|
||||||
if err != nil {
|
|
||||||
return os.NewSyscallError("SetNonblock", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = useSocket(unix.AF_INET, unix.SOCK_DGRAM, 0, func(socketFd int) error {
|
err = useSocket(unix.AF_INET, unix.SOCK_DGRAM, 0, func(socketFd int) error {
|
||||||
var ifr unix.IfreqMTU
|
var ifr unix.IfreqMTU
|
||||||
copy(ifr.Name[:], name)
|
copy(ifr.Name[:], name)
|
||||||
@@ -259,6 +332,65 @@ func configure(tunFd int, ifIndex int, name string, options Options) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func configure(tunFd int, batchSize int) error {
|
||||||
|
err := unix.SetNonblock(tunFd, true)
|
||||||
|
if err != nil {
|
||||||
|
return os.NewSyscallError("SetNonblock", err)
|
||||||
|
}
|
||||||
|
const UTUN_OPT_MAX_PENDING_PACKETS = 16
|
||||||
|
err = unix.SetsockoptInt(tunFd, 2, UTUN_OPT_MAX_PENDING_PACKETS, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
return os.NewSyscallError("SetsockoptInt UTUN_OPT_MAX_PENDING_PACKETS", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *NativeTun) BatchSize() int {
|
||||||
|
return t.batchSize
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *NativeTun) BatchRead() ([]*buf.Buffer, error) {
|
||||||
|
for i := 0; i < t.batchSize; i++ {
|
||||||
|
iovecs := t.iovecs[i].nextIovecs()
|
||||||
|
t.msgHdrs[i].DataLen = 0
|
||||||
|
t.msgHdrs[i].Msg.Iov = &iovecs[0]
|
||||||
|
t.msgHdrs[i].Msg.Iovlen = 2
|
||||||
|
}
|
||||||
|
n, errno := rawfile.BlockingRecvMMsgUntilStopped(t.stopFd.ReadFD, t.tunFd, t.msgHdrs)
|
||||||
|
if errno != 0 {
|
||||||
|
return nil, errno
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
buffers := t.buffers
|
||||||
|
for k := 0; k < n; k++ {
|
||||||
|
buffer := t.iovecs[k].buffer
|
||||||
|
t.iovecs[k].buffer = nil
|
||||||
|
buffer.Truncate(int(t.msgHdrs[k].DataLen) - PacketOffset)
|
||||||
|
buffers = append(buffers, buffer)
|
||||||
|
}
|
||||||
|
t.buffers = buffers[:0]
|
||||||
|
return buffers, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *NativeTun) BatchWrite(buffers []*buf.Buffer) error {
|
||||||
|
for i, buffer := range buffers {
|
||||||
|
iovecs := t.iovecsOutput[i].nextIovecsOutput(buffer)
|
||||||
|
t.msgHdrsOutput[i].Msg.Iov = &iovecs[0]
|
||||||
|
t.msgHdrsOutput[i].Msg.Iovlen = 2
|
||||||
|
}
|
||||||
|
_, errno := rawfile.NonBlockingSendMMsg(t.tunFd, t.msgHdrsOutput[:len(buffers)])
|
||||||
|
if errno != 0 {
|
||||||
|
return errno
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *NativeTun) TXChecksumOffload() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (t *NativeTun) UpdateRouteOptions(tunOptions Options) error {
|
func (t *NativeTun) UpdateRouteOptions(tunOptions Options) error {
|
||||||
err := t.unsetRoutes()
|
err := t.unsetRoutes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -3,132 +3,23 @@
|
|||||||
package tun
|
package tun
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/sagernet/gvisor/pkg/buffer"
|
"github.com/sagernet/gvisor/pkg/tcpip/link/qdisc/fifo"
|
||||||
"github.com/sagernet/gvisor/pkg/tcpip"
|
|
||||||
"github.com/sagernet/gvisor/pkg/tcpip/header"
|
|
||||||
"github.com/sagernet/gvisor/pkg/tcpip/stack"
|
"github.com/sagernet/gvisor/pkg/tcpip/stack"
|
||||||
"github.com/sagernet/sing/common/bufio"
|
"github.com/sagernet/sing-tun/internal/fdbased_darwin"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ GVisorTun = (*NativeTun)(nil)
|
var _ GVisorTun = (*NativeTun)(nil)
|
||||||
|
|
||||||
func (t *NativeTun) NewEndpoint() (stack.LinkEndpoint, error) {
|
func (t *NativeTun) NewEndpoint() (stack.LinkEndpoint, stack.NICOptions, error) {
|
||||||
return &DarwinEndpoint{tun: t}, nil
|
ep, err := fdbased.New(&fdbased.Options{
|
||||||
}
|
FDs: []int{t.tunFd},
|
||||||
|
MTU: t.options.MTU,
|
||||||
var _ stack.LinkEndpoint = (*DarwinEndpoint)(nil)
|
RXChecksumOffload: true,
|
||||||
|
})
|
||||||
type DarwinEndpoint struct {
|
if err != nil {
|
||||||
tun *NativeTun
|
return nil, stack.NICOptions{}, err
|
||||||
dispatcher stack.NetworkDispatcher
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) MTU() uint32 {
|
|
||||||
return e.tun.options.MTU
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) SetMTU(mtu uint32) {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) MaxHeaderLength() uint16 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) LinkAddress() tcpip.LinkAddress {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) SetLinkAddress(addr tcpip.LinkAddress) {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) Capabilities() stack.LinkEndpointCapabilities {
|
|
||||||
return stack.CapabilityRXChecksumOffload
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
|
|
||||||
if dispatcher == nil && e.dispatcher != nil {
|
|
||||||
e.dispatcher = nil
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if dispatcher != nil && e.dispatcher == nil {
|
|
||||||
e.dispatcher = dispatcher
|
|
||||||
go e.dispatchLoop()
|
|
||||||
}
|
}
|
||||||
}
|
return ep, stack.NICOptions{
|
||||||
|
QDisc: fifo.New(ep, 1, 1000),
|
||||||
func (e *DarwinEndpoint) dispatchLoop() {
|
}, nil
|
||||||
packetBuffer := make([]byte, e.tun.options.MTU+PacketOffset)
|
|
||||||
for {
|
|
||||||
n, err := e.tun.tunFile.Read(packetBuffer)
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
packet := packetBuffer[PacketOffset:n]
|
|
||||||
var networkProtocol tcpip.NetworkProtocolNumber
|
|
||||||
switch header.IPVersion(packet) {
|
|
||||||
case header.IPv4Version:
|
|
||||||
networkProtocol = header.IPv4ProtocolNumber
|
|
||||||
if header.IPv4(packet).DestinationAddress().As4() == e.tun.inet4Address {
|
|
||||||
e.tun.tunFile.Write(packetBuffer[:n])
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
case header.IPv6Version:
|
|
||||||
networkProtocol = header.IPv6ProtocolNumber
|
|
||||||
if header.IPv6(packet).DestinationAddress().As16() == e.tun.inet6Address {
|
|
||||||
e.tun.tunFile.Write(packetBuffer[:n])
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
e.tun.tunFile.Write(packetBuffer[:n])
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
|
|
||||||
Payload: buffer.MakeWithData(packetBuffer[4:n]),
|
|
||||||
IsForwardedPacket: true,
|
|
||||||
})
|
|
||||||
pkt.NetworkProtocolNumber = networkProtocol
|
|
||||||
dispatcher := e.dispatcher
|
|
||||||
if dispatcher == nil {
|
|
||||||
pkt.DecRef()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
dispatcher.DeliverNetworkPacket(networkProtocol, pkt)
|
|
||||||
pkt.DecRef()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) IsAttached() bool {
|
|
||||||
return e.dispatcher != nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) Wait() {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) ARPHardwareType() header.ARPHardwareType {
|
|
||||||
return header.ARPHardwareNone
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) AddHeader(buffer *stack.PacketBuffer) {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) ParseHeader(ptr *stack.PacketBuffer) bool {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) WritePackets(packetBufferList stack.PacketBufferList) (int, tcpip.Error) {
|
|
||||||
var n int
|
|
||||||
for _, packet := range packetBufferList.AsSlice() {
|
|
||||||
_, err := bufio.WriteVectorised(e.tun, packet.AsSlices())
|
|
||||||
if err != nil {
|
|
||||||
return n, &tcpip.ErrAborted{}
|
|
||||||
}
|
|
||||||
n++
|
|
||||||
}
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) Close() {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *DarwinEndpoint) SetOnCloseAction(f func()) {
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ import (
|
|||||||
|
|
||||||
var _ GVisorTun = (*NativeTun)(nil)
|
var _ GVisorTun = (*NativeTun)(nil)
|
||||||
|
|
||||||
func (t *NativeTun) NewEndpoint() (stack.LinkEndpoint, error) {
|
func (t *NativeTun) NewEndpoint() (stack.LinkEndpoint, stack.NICOptions, error) {
|
||||||
if t.vnetHdr {
|
if t.vnetHdr {
|
||||||
return fdbased.New(&fdbased.Options{
|
ep, err := fdbased.New(&fdbased.Options{
|
||||||
FDs: []int{t.tunFd},
|
FDs: []int{t.tunFd},
|
||||||
MTU: t.options.MTU,
|
MTU: t.options.MTU,
|
||||||
GSOMaxSize: gsoMaxSize,
|
GSOMaxSize: gsoMaxSize,
|
||||||
@@ -19,11 +19,20 @@ func (t *NativeTun) NewEndpoint() (stack.LinkEndpoint, error) {
|
|||||||
RXChecksumOffload: true,
|
RXChecksumOffload: true,
|
||||||
TXChecksumOffload: t.txChecksumOffload,
|
TXChecksumOffload: t.txChecksumOffload,
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, stack.NICOptions{}, err
|
||||||
|
}
|
||||||
|
return ep, stack.NICOptions{}, nil
|
||||||
|
} else {
|
||||||
|
ep, err := fdbased.New(&fdbased.Options{
|
||||||
|
FDs: []int{t.tunFd},
|
||||||
|
MTU: t.options.MTU,
|
||||||
|
RXChecksumOffload: true,
|
||||||
|
TXChecksumOffload: t.txChecksumOffload,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, stack.NICOptions{}, err
|
||||||
|
}
|
||||||
|
return ep, stack.NICOptions{}, nil
|
||||||
}
|
}
|
||||||
return fdbased.New(&fdbased.Options{
|
|
||||||
FDs: []int{t.tunFd},
|
|
||||||
MTU: t.options.MTU,
|
|
||||||
RXChecksumOffload: true,
|
|
||||||
TXChecksumOffload: t.txChecksumOffload,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ import (
|
|||||||
|
|
||||||
var _ GVisorTun = (*NativeTun)(nil)
|
var _ GVisorTun = (*NativeTun)(nil)
|
||||||
|
|
||||||
func (t *NativeTun) NewEndpoint() (stack.LinkEndpoint, error) {
|
func (t *NativeTun) NewEndpoint() (stack.LinkEndpoint, stack.NICOptions, error) {
|
||||||
return &WintunEndpoint{tun: t}, nil
|
return &WintunEndpoint{tun: t}, stack.NICOptions{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ stack.LinkEndpoint = (*WintunEndpoint)(nil)
|
var _ stack.LinkEndpoint = (*WintunEndpoint)(nil)
|
||||||
|
|||||||
Reference in New Issue
Block a user