mirror of
https://github.com/impact-eintr/netstack.git
synced 2025-10-05 12:56:55 +08:00
1398 lines
40 KiB
Go
1398 lines
40 KiB
Go
package tcp
|
||
|
||
import (
|
||
"crypto/rand"
|
||
"fmt"
|
||
"log"
|
||
"math"
|
||
"netstack/logger"
|
||
"netstack/sleep"
|
||
"netstack/tcpip"
|
||
"netstack/tcpip/buffer"
|
||
"netstack/tcpip/header"
|
||
"netstack/tcpip/seqnum"
|
||
"netstack/tcpip/stack"
|
||
"netstack/tmutex"
|
||
"netstack/waiter"
|
||
"sync"
|
||
"sync/atomic"
|
||
"time"
|
||
)
|
||
|
||
// tcp状态机的状态
|
||
type endpointState int
|
||
|
||
// tcp 状态机的各种状态
|
||
const (
|
||
stateInitial endpointState = iota
|
||
stateBound
|
||
stateListen
|
||
stateConnecting
|
||
stateConnected
|
||
stateClosed
|
||
stateError
|
||
)
|
||
|
||
// Reasons for notifying the protocol goroutine.
|
||
const (
|
||
notifyNonZeroReceiveWindow = 1 << iota
|
||
notifyReceiveWindowChanged
|
||
notifyClose
|
||
notifyMTUChanged
|
||
notifyDrain
|
||
notifyReset
|
||
notifyKeepaliveChanged
|
||
)
|
||
|
||
// SACKInfo holds TCP SACK related information for a given endpoint.
|
||
//
|
||
// +stateify savable
|
||
type SACKInfo struct {
|
||
// Blocks is the maximum number of SACK blocks we track
|
||
// per endpoint.
|
||
Blocks [MaxSACKBlocks]header.SACKBlock
|
||
|
||
// NumBlocks is the number of valid SACK blocks stored in the
|
||
// blocks array above.
|
||
NumBlocks int
|
||
}
|
||
|
||
// keepalive is a synchronization wrapper used to appease stateify. See the
|
||
// comment in endpoint, where it is used.
|
||
// KeepAlive默认情况下是关闭的,可以被上层应用开启和关闭
|
||
// tcp_keepalive_probes: 在tcp_keepalive_time之后,没有接收到对方确认,继续发送保活探测包次数,默认值为9(次)
|
||
// +stateify savable
|
||
type keepalive struct {
|
||
sync.Mutex
|
||
enabled bool
|
||
// KeepAlive的空闲时长,或者说每次正常发送心跳的周期,默认值为7200s(2小时)
|
||
idle time.Duration
|
||
// KeepAlive探测包的发送间隔,默认值为75s
|
||
interval time.Duration
|
||
count int
|
||
unacked int
|
||
timer timer
|
||
waker sleep.Waker
|
||
}
|
||
|
||
// endpoint 表示TCP端点。该结构用作端点用户和协议实现之间的接口;让并发goroutine调用端点是合法的,
|
||
// 它们是正确同步的。然而,协议实现在单个goroutine中运行。
|
||
type endpoint struct {
|
||
workMu tmutex.Mutex
|
||
|
||
stack *stack.Stack // 网络协议栈
|
||
netProto tcpip.NetworkProtocolNumber // 网络协议号 ipv4 ipv6
|
||
waiterQueue *waiter.Queue // 事件驱动机制
|
||
|
||
// lastError represents the last error that the endpoint reported;
|
||
// access to it is protected by the following mutex.
|
||
lastErrorMu sync.Mutex
|
||
lastError *tcpip.Error
|
||
|
||
// rcvListMu can be taken after the endpoint mu below.
|
||
rcvListMu sync.Mutex
|
||
rcvList segmentList
|
||
rcvClosed bool
|
||
rcvBufSize int
|
||
rcvBufUsed int
|
||
|
||
// The following fields are protected by the mutex.
|
||
mu sync.RWMutex
|
||
id stack.TransportEndpointID // tcp端在网络协议栈的唯一ID
|
||
state endpointState // 目前tcp状态机的状态
|
||
isPortReserved bool // 是否已经分配端口
|
||
isRegistered bool // 是否已经注册在网络协议栈
|
||
boundNICID tcpip.NICID
|
||
route stack.Route // tcp端在网络协议栈中的路由地址
|
||
v6only bool // 是否仅仅支持ipv6
|
||
isConnectNotified bool
|
||
|
||
// effectiveNetProtos contains the network protocols actually in use. In
|
||
// most cases it will only contain "netProto", but in cases like IPv6
|
||
// endpoints with v6only set to false, this could include multiple
|
||
// protocols (e.g., IPv6 and IPv4) or a single different protocol (e.g.,
|
||
// IPv4 when IPv6 endpoint is bound or connected to an IPv4 mapped
|
||
// address).
|
||
effectiveNetProtos []tcpip.NetworkProtocolNumber
|
||
|
||
hardError *tcpip.Error
|
||
|
||
// workerRunning specifies if a worker goroutine is running.
|
||
workerRunning bool
|
||
|
||
// workerCleanup specifies if the worker goroutine must perform cleanup
|
||
// before exitting. This can only be set to true when workerRunning is
|
||
// also true, and they're both protected by the mutex.
|
||
workerCleanup bool
|
||
|
||
// sendTSOk is used to indicate when the TS Option has been negotiated.
|
||
// When sendTSOk is true every non-RST segment should carry a TS as per
|
||
// RFC7323#section-1.1
|
||
sendTSOk bool
|
||
|
||
// recentTS is the timestamp that should be sent in the TSEcr field of
|
||
// the timestamp for future segments sent by the endpoint. This field is
|
||
// updated if required when a new segment is received by this endpoint.
|
||
// recentTS 是应该在端点发送的未来段的时间戳的 TSEcr 字段中发送的时间戳。
|
||
// 当此端点接收到新段时,如果需要,此字段会更新。
|
||
recentTS uint32
|
||
|
||
// tsOffset is a randomized offset added to the value of the
|
||
// TSVal field in the timestamp option.
|
||
tsOffset uint32
|
||
|
||
// shutdownFlags represent the current shutdown state of the endpoint.
|
||
shutdownFlags tcpip.ShutdownFlags
|
||
|
||
// sackPermitted is set to true if the peer sends the TCPSACKPermitted
|
||
// option in the SYN/SYN-ACK.
|
||
sackPermitted bool
|
||
|
||
sack SACKInfo
|
||
|
||
segmentQueue segmentQueue
|
||
|
||
// When the send side is closed, the protocol goroutine is notified via
|
||
// sndCloseWaker, and sndClosed is set to true.
|
||
sndBufMu sync.Mutex
|
||
sndBufSize int
|
||
sndBufUsed int
|
||
sndClosed bool
|
||
sndBufInQueue seqnum.Size
|
||
sndQueue segmentList
|
||
sndWaker sleep.Waker
|
||
sndCloseWaker sleep.Waker
|
||
|
||
// cc stores the name of the Congestion Control algorithm to use for
|
||
// this endpoint.
|
||
cc CongestionControlOption
|
||
|
||
// The following are used when a "packet too big" control packet is
|
||
// received. They are protected by sndBufMu. They are used to
|
||
// communicate to the main protocol goroutine how many such control
|
||
// messages have been received since the last notification was processed
|
||
// and what was the smallest MTU seen
|
||
packetTooBigCount int
|
||
sndMTU int
|
||
|
||
// newSegmentWaker is used to indicate to the protocol goroutine that
|
||
// it needs to wake up and handle new segments queued to it.
|
||
// HandlePacket收到segment后通知处理的事件驱动器
|
||
newSegmentWaker sleep.Waker
|
||
|
||
// notificationWaker is used to indicate to the protocol goroutine that
|
||
// it needs to wake up and check for notifications.
|
||
notificationWaker sleep.Waker
|
||
|
||
// notifyFlags is a bitmask of flags used to indicate to the protocol
|
||
// goroutine what it was notified; this is only accessed atomically.
|
||
notifyFlags uint32
|
||
|
||
// acceptedChan is used by a listening endpoint protocol goroutine to
|
||
// send newly accepted connections to the endpoint so that they can be
|
||
// read by Accept() calls.
|
||
acceptedChan chan *endpoint
|
||
|
||
keepalive keepalive
|
||
|
||
// The following are only used from the protocol goroutine, and
|
||
// therefore don't need locks to protect them.
|
||
rcv *receiver
|
||
snd *sender
|
||
|
||
// probe if not nil is invoked on every received segment. It is passed
|
||
// a copy of the current state of the endpoint.
|
||
probe stack.TCPProbeFunc
|
||
|
||
// The following are only used to assist the restore run to re-connect.
|
||
bindAddress tcpip.Address
|
||
connectingAddress tcpip.Address
|
||
}
|
||
|
||
func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint {
|
||
e := &endpoint{
|
||
stack: stack,
|
||
netProto: netProto,
|
||
waiterQueue: waiterQueue,
|
||
rcvBufSize: DefaultBufferSize,
|
||
sndBufSize: DefaultBufferSize,
|
||
sndMTU: int(math.MaxInt32),
|
||
keepalive: keepalive{
|
||
// Linux defaults.
|
||
idle: 2 * time.Hour,
|
||
interval: 75 * time.Second,
|
||
count: 9,
|
||
},
|
||
}
|
||
|
||
var ss SendBufferSizeOption
|
||
if err := stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
|
||
e.sndBufSize = ss.Default
|
||
}
|
||
|
||
var rs ReceiveBufferSizeOption
|
||
if err := stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
|
||
e.rcvBufSize = rs.Default
|
||
}
|
||
|
||
var cs CongestionControlOption
|
||
if err := stack.TransportProtocolOption(ProtocolNumber, &cs); err == nil {
|
||
e.cc = cs
|
||
}
|
||
|
||
e.segmentQueue.setLimit(2 * e.rcvBufSize)
|
||
e.workMu.Init()
|
||
e.workMu.Lock()
|
||
e.tsOffset = timeStampOffset() // 随机偏移
|
||
return e
|
||
}
|
||
|
||
func (e *endpoint) fetchNotifications() uint32 {
|
||
return atomic.SwapUint32(&e.notifyFlags, 0)
|
||
}
|
||
|
||
// 通知订阅消息的任务开始工作
|
||
func (e *endpoint) notifyProtocolGoroutine(n uint32) {
|
||
for {
|
||
v := atomic.LoadUint32(&e.notifyFlags)
|
||
if v&n == n {
|
||
// The flags are already set.
|
||
return
|
||
}
|
||
|
||
if atomic.CompareAndSwapUint32(&e.notifyFlags, v, v|n) {
|
||
if v == 0 {
|
||
// We are causing a transition from no flags to
|
||
// at least one flag set, so we must cause the
|
||
// protocol goroutine to wake up.
|
||
e.notificationWaker.Assert()
|
||
}
|
||
return
|
||
}
|
||
}
|
||
}
|
||
|
||
func (e *endpoint) Close() {
|
||
e.Shutdown(tcpip.ShutdownWrite | tcpip.ShutdownRead)
|
||
e.mu.Lock()
|
||
|
||
// We always release ports inline so that they are immediately available
|
||
// for reuse after Close() is called. If also registered, it means this
|
||
// is a listening socket, so we must unregister as well otherwise the
|
||
// next user would fail in Listen() when trying to register.
|
||
// 释放绑定端口 客户端释放随机绑定的port
|
||
// 注销协议栈中的端点
|
||
if e.isPortReserved {
|
||
e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.id.LocalAddress, e.id.LocalPort)
|
||
e.isPortReserved = false
|
||
|
||
if e.isRegistered {
|
||
e.stack.UnregisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.id)
|
||
e.isRegistered = false
|
||
}
|
||
}
|
||
|
||
tcpip.AddDanglingEndpoint(e)
|
||
if !e.workerRunning { // workerRunning 监听者 客户端 tcp连接 都会设置
|
||
e.cleanupLocked()
|
||
} else {
|
||
e.workerCleanup = true // 在端点调用了 Close 后将会走这个分支
|
||
e.notifyProtocolGoroutine(notifyClose)
|
||
}
|
||
e.mu.Unlock()
|
||
}
|
||
|
||
// cleanupLocked frees all resources associated with the endpoint. It is called
|
||
// after Close() is called and the worker goroutine (if any) is done with its
|
||
// work.
|
||
func (e *endpoint) cleanupLocked() {
|
||
// Close all endpoints that might have been accepted by TCP but not by
|
||
// the client.
|
||
if e.acceptedChan != nil { // 监听者
|
||
close(e.acceptedChan)
|
||
for n := range e.acceptedChan {
|
||
n.mu.Lock()
|
||
n.resetConnectionLocked(tcpip.ErrConnectionAborted)
|
||
n.mu.Unlock()
|
||
n.Close()
|
||
}
|
||
e.acceptedChan = nil
|
||
}
|
||
e.workerCleanup = false
|
||
|
||
// 注销掉这个端点
|
||
if e.isRegistered {
|
||
e.stack.UnregisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.id)
|
||
}
|
||
|
||
// 释放掉这个路由
|
||
e.route.Release()
|
||
tcpip.DeleteDanglingEndpoint(e)
|
||
}
|
||
|
||
// Read 从tcp的接收队列中读取数据
|
||
func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
|
||
e.mu.RLock()
|
||
|
||
e.rcvListMu.Lock()
|
||
bufUsed := e.rcvBufUsed
|
||
if s := e.state; s != stateConnected && s != stateClosed && bufUsed == 0 {
|
||
e.rcvListMu.Unlock()
|
||
he := e.hardError
|
||
e.mu.RUnlock()
|
||
if s == stateError {
|
||
return buffer.View{}, tcpip.ControlMessages{}, he
|
||
}
|
||
return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidEndpointState
|
||
}
|
||
|
||
v, err := e.readLocked()
|
||
e.rcvListMu.Unlock()
|
||
e.mu.RUnlock()
|
||
return v, tcpip.ControlMessages{}, err
|
||
}
|
||
|
||
// 从tcp的接收队列中读取数据,并从接收队列中删除已读数据
|
||
func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
|
||
if e.rcvBufUsed == 0 {
|
||
if e.rcvClosed || e.state != stateConnected {
|
||
return buffer.View{}, tcpip.ErrClosedForReceive
|
||
}
|
||
return buffer.View{}, tcpip.ErrWouldBlock
|
||
}
|
||
s := e.rcvList.Front()
|
||
views := s.data.Views()
|
||
v := views[s.viewToDeliver]
|
||
s.viewToDeliver++
|
||
|
||
if s.viewToDeliver >= len(views) {
|
||
e.rcvList.Remove(s)
|
||
s.decRef()
|
||
}
|
||
|
||
scale := e.rcv.rcvWndScale
|
||
// 检测接收窗口是否为0
|
||
wasZero := e.zeroReceiveWindow(scale) // 取用数据前是否有空闲
|
||
e.rcvBufUsed -= len(v)
|
||
if wasZero && !e.zeroReceiveWindow(scale) { // 之前没空闲 现在有了 告知一下对端
|
||
e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
|
||
logger.NOTICE("通知上层有了空间")
|
||
}
|
||
|
||
return v, nil
|
||
}
|
||
|
||
// Write 接收上层的数据,通过tcp连接发送到对端
|
||
func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-chan struct{}, *tcpip.Error) {
|
||
e.mu.RLock()
|
||
defer e.mu.RUnlock()
|
||
// 判断tcp状态,必须已经建立了连接才能发送数据
|
||
if e.state != stateConnected {
|
||
switch e.state {
|
||
case stateError:
|
||
return 0, nil, e.hardError
|
||
default:
|
||
return 0, nil, tcpip.ErrClosedForSend
|
||
}
|
||
}
|
||
// 检查负载的长度,如果为0,直接返回
|
||
if p.Size() == 0 {
|
||
return 0, nil, nil
|
||
}
|
||
e.sndBufMu.Lock()
|
||
// Check if the connection has already been closed for sends.
|
||
if e.sndClosed {
|
||
e.sndBufMu.Unlock()
|
||
return 0, nil, tcpip.ErrClosedForSend
|
||
}
|
||
|
||
// tcp流量控制:未被占用发送缓存还剩多少,如果发送缓存已经被用光了,返回 ErrWouldBlock
|
||
avail := e.sndBufSize - e.sndBufUsed
|
||
if avail <= 0 {
|
||
e.sndBufMu.Unlock()
|
||
return 0, nil, tcpip.ErrWouldBlock
|
||
}
|
||
|
||
v, perr := p.Get(avail)
|
||
if perr != nil {
|
||
e.sndBufMu.Unlock()
|
||
return 0, nil, perr
|
||
}
|
||
var err *tcpip.Error
|
||
if p.Size() > avail { // 给的数据 缓存不足以容纳
|
||
err = tcpip.ErrWouldBlock
|
||
}
|
||
l := len(v)
|
||
s := newSegmentFromView(&e.route, e.id, v) // 分段
|
||
// 插入发送队列
|
||
e.sndBufUsed += l
|
||
e.sndBufInQueue += seqnum.Size(l)
|
||
e.sndQueue.PushBack(s)
|
||
|
||
e.sndBufMu.Unlock()
|
||
|
||
// 发送数据,最终会调用 sender sendData 来发送数据
|
||
if e.workMu.TryLock() {
|
||
// Do the work inline.
|
||
e.handleWrite()
|
||
e.workMu.Unlock()
|
||
} else {
|
||
// Let the protocol goroutine do the work.
|
||
e.sndWaker.Assert()
|
||
}
|
||
|
||
return uintptr(l), nil, err
|
||
}
|
||
|
||
func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) {
|
||
return 0, tcpip.ControlMessages{}, nil
|
||
}
|
||
|
||
func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocolNumber, *tcpip.Error) {
|
||
netProto := e.netProto
|
||
if header.IsV4MappedAddress(addr.Addr) {
|
||
// Fail if using a v4 mapped address on a v6only endpoint.
|
||
if e.v6only {
|
||
return 0, tcpip.ErrNoRoute
|
||
}
|
||
|
||
netProto = header.IPv4ProtocolNumber
|
||
addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:]
|
||
if addr.Addr == "\x00\x00\x00\x00" {
|
||
addr.Addr = ""
|
||
}
|
||
}
|
||
|
||
// Fail if we're bound to an address length different from the one we're
|
||
// checking.
|
||
if l := len(e.id.LocalAddress); l != 0 && len(addr.Addr) != 0 && l != len(addr.Addr) {
|
||
return 0, tcpip.ErrInvalidEndpointState
|
||
}
|
||
|
||
return netProto, nil
|
||
}
|
||
|
||
// Connect 这是客户端用的吧
|
||
func (e *endpoint) Connect(address tcpip.FullAddress) *tcpip.Error {
|
||
return e.connect(address, true, true)
|
||
}
|
||
|
||
// connect将端点连接到其对等端。在正常的非S/R情况下,新连接应该运行主goroutine并执行握手。
|
||
// 在恢复先前连接的端点时,将被动地创建两端(因此不会进行新的握手);对于应用程序尚未接受的堆栈接受连接,
|
||
// 它们将在不运行主goroutine的情况下进行恢复。
|
||
func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) (err *tcpip.Error) {
|
||
e.mu.Lock()
|
||
defer e.mu.Unlock()
|
||
defer func() {
|
||
if err != nil && !err.IgnoreStats() {
|
||
e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
|
||
}
|
||
}()
|
||
|
||
connectingAddr := addr.Addr
|
||
|
||
// 检查ipv4是否映射到ipv6
|
||
netProto, err := e.checkV4Mapped(&addr)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
nicid := addr.NIC
|
||
// 判断连接的状态
|
||
switch e.state {
|
||
case stateBound:
|
||
// If we're already bound to a NIC but the caller is requesting
|
||
// that we use a different one now, we cannot proceed.
|
||
if e.boundNICID == 0 {
|
||
break
|
||
}
|
||
|
||
if nicid != 0 && nicid != e.boundNICID {
|
||
return tcpip.ErrNoRoute
|
||
}
|
||
|
||
nicid = e.boundNICID
|
||
|
||
case stateInitial:
|
||
// Nothing to do. We'll eventually fill-in the gaps in the ID
|
||
// (if any) when we find a route.
|
||
|
||
case stateConnecting:
|
||
// A connection request has already been issued but hasn't
|
||
// completed yet.
|
||
return tcpip.ErrAlreadyConnecting
|
||
|
||
case stateConnected:
|
||
// The endpoint is already connected. If caller hasn't been notified yet, return success.
|
||
if !e.isConnectNotified {
|
||
e.isConnectNotified = true
|
||
return nil
|
||
}
|
||
// Otherwise return that it's already connected.
|
||
return tcpip.ErrAlreadyConnected
|
||
|
||
case stateError:
|
||
return e.hardError
|
||
|
||
default:
|
||
return tcpip.ErrInvalidEndpointState
|
||
}
|
||
|
||
// Find a route to the desired destination.
|
||
// 根据目标ip查找路由信息
|
||
r, err := e.stack.FindRoute(nicid, e.id.LocalAddress, addr.Addr, netProto)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
defer r.Release()
|
||
|
||
origID := e.id
|
||
|
||
netProtos := []tcpip.NetworkProtocolNumber{netProto}
|
||
e.id.LocalAddress = r.LocalAddress
|
||
e.id.RemoteAddress = r.RemoteAddress
|
||
e.id.RemotePort = addr.Port
|
||
|
||
if e.id.LocalPort != 0 {
|
||
// 记录和检查原端口是否已被使用
|
||
// The endpoint is bound to a port, attempt to register it.
|
||
err := e.stack.RegisterTransportEndpoint(nicid, netProtos, ProtocolNumber, e.id, e)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
} else {
|
||
// 端点还没有本地端口,所以尝试获取一个端口。确保它不会导致本地和远程的相同地址/端口(否则此端点将尝试连接到自身)
|
||
// 远端地址和本地地址是否相同
|
||
// NOTE 这段代码值得借鉴
|
||
sameAddr := e.id.LocalAddress == e.id.RemoteAddress
|
||
if _, err := e.stack.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) {
|
||
if sameAddr && p == e.id.RemotePort { // 同样的ip同样的port 打咩捏
|
||
return false, nil
|
||
}
|
||
if !e.stack.IsPortAvailable(netProtos, ProtocolNumber, e.id.LocalAddress, p) { // 端口被占用打咩
|
||
return false, nil
|
||
}
|
||
id := e.id
|
||
id.LocalPort = p
|
||
switch e.stack.RegisterTransportEndpoint(nicid, netProtos, ProtocolNumber, id, e) {
|
||
case nil:
|
||
e.id = id
|
||
return true, nil
|
||
case tcpip.ErrPortInUse:
|
||
return false, nil
|
||
default:
|
||
return false, err
|
||
}
|
||
}); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
// Remove the port reservation. This can happen when Bind is called
|
||
// before Connect: in such a case we don't want to hold on to
|
||
// reservations anymore.
|
||
if e.isPortReserved {
|
||
e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, origID.LocalAddress, origID.LocalPort)
|
||
e.isPortReserved = false
|
||
}
|
||
|
||
// 记录该端点的参数
|
||
e.isRegistered = true
|
||
e.state = stateConnecting
|
||
e.route = r.Clone()
|
||
e.boundNICID = nicid
|
||
e.effectiveNetProtos = netProtos
|
||
e.connectingAddress = connectingAddr
|
||
|
||
// Connect in the restore phase does not perform handshake. Restore its
|
||
// connection setting here.
|
||
if !handshake {
|
||
e.segmentQueue.mu.Lock()
|
||
for _, l := range []segmentList{e.segmentQueue.list, e.sndQueue, e.snd.writeList} {
|
||
for s := l.Front(); s != nil; s = s.Next() {
|
||
s.id = e.id
|
||
s.route = r.Clone()
|
||
e.sndWaker.Assert()
|
||
}
|
||
}
|
||
e.segmentQueue.mu.Unlock()
|
||
e.snd.updateMaxPayloadSize(int(e.route.MTU()), 0)
|
||
e.state = stateConnected
|
||
}
|
||
|
||
if run {
|
||
e.workerRunning = true
|
||
e.stack.Stats().TCP.ActiveConnectionOpenings.Increment()
|
||
// tcp的主函数
|
||
go e.protocolMainLoop(handshake)
|
||
}
|
||
|
||
return tcpip.ErrConnectStarted
|
||
}
|
||
|
||
func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
|
||
e.mu.Lock()
|
||
defer e.mu.Unlock()
|
||
e.shutdownFlags |= flags
|
||
|
||
|
||
switch e.state {
|
||
case stateConnected: // tcp连接关闭
|
||
// 不能直接关闭读数据包,因为关闭连接的时候四次挥手还需要读取报文。
|
||
if (e.shutdownFlags & tcpip.ShutdownWrite) != 0 {
|
||
e.rcvListMu.Lock()
|
||
rcvBufUsed := e.rcvBufUsed
|
||
e.rcvListMu.Unlock()
|
||
if rcvBufUsed > 0 {
|
||
// 如果接收队列中还有数据 通知对端RESET
|
||
e.notifyProtocolGoroutine(notifyReset)
|
||
return nil
|
||
}
|
||
}
|
||
|
||
e.sndBufMu.Lock()
|
||
if e.sndClosed {
|
||
// Already closed.
|
||
e.sndBufMu.Unlock()
|
||
break
|
||
}
|
||
|
||
// 发送一个 FIN 报文 告知对面关闭上层用户程序
|
||
// Queue fin segment.
|
||
s := newSegmentFromView(&e.route, e.id, nil)
|
||
e.sndQueue.PushBack(s)
|
||
e.sndBufInQueue++ // 仅仅占用一个字节位置
|
||
// Mark endpoint as closed.
|
||
e.sndClosed = true
|
||
e.sndBufMu.Unlock()
|
||
|
||
// 触发调用 handleClose
|
||
e.sndCloseWaker.Assert()
|
||
|
||
case stateListen: // 监听器关闭
|
||
// Tell protocolListenLoop to stop.
|
||
if flags&tcpip.ShutdownRead != 0 {
|
||
e.notifyProtocolGoroutine(notifyClose)
|
||
}
|
||
|
||
default:
|
||
return tcpip.ErrNotConnected
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (e *endpoint) Listen(backlog int) (err *tcpip.Error) {
|
||
e.mu.Lock()
|
||
defer e.mu.Unlock()
|
||
defer func() {
|
||
if err != nil && err.IgnoreStats() {
|
||
e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
|
||
}
|
||
}()
|
||
|
||
// 如果端点未关闭,则允许调整 backlog。
|
||
// 当端点关闭时,它将workerCleanup设置为true,从那时起,
|
||
// acceptedChan 负责 cleanup 方法(并且不应该在其他任何地方触及,包括此处)
|
||
if e.state == stateListen && !e.workerCleanup {
|
||
// Adjust the size of the channel iff we can fix existing
|
||
// pending connections into the new one.
|
||
if len(e.acceptedChan) > backlog { // 非法的调整 接收队列的端点比目标值小
|
||
return tcpip.ErrInvalidEndpointState
|
||
}
|
||
if cap(e.acceptedChan) == backlog {
|
||
return nil
|
||
}
|
||
origChan := e.acceptedChan
|
||
e.acceptedChan = make(chan *endpoint, backlog)
|
||
close(origChan)
|
||
for ep := range origChan {
|
||
e.acceptedChan <- ep
|
||
}
|
||
return nil
|
||
}
|
||
// 在调用 Listen 之前,必须先 Bind
|
||
if e.state != stateBound {
|
||
return tcpip.ErrInvalidEndpointState
|
||
}
|
||
// 注册该端点,这样网络层在分发数据包的时候就可以根据 id 来找到这个端点,接着把报文发送给这个端点。
|
||
if err := e.stack.RegisterTransportEndpoint(e.boundNICID,
|
||
e.effectiveNetProtos, ProtocolNumber, e.id, e); err != nil {
|
||
return err
|
||
}
|
||
|
||
e.isRegistered = true
|
||
e.state = stateListen
|
||
if e.acceptedChan == nil {
|
||
e.acceptedChan = make(chan *endpoint, backlog) // 全连接队列长度
|
||
}
|
||
e.workerRunning = true
|
||
|
||
e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
|
||
// tcp服务端实现的主循环,这个函数很重要,用一个goroutine来服务
|
||
go e.protocolListenLoop(seqnum.Size(e.receiveBufferAvailable()))
|
||
|
||
return nil
|
||
}
|
||
|
||
// startAcceptedLoop sets up required state and starts a goroutine with the
|
||
// main loop for accepted connections.
|
||
func (e *endpoint) startAcceptedLoop(waiterQueue *waiter.Queue) {
|
||
e.waiterQueue = waiterQueue
|
||
e.workerRunning = true
|
||
go e.protocolMainLoop(false)
|
||
}
|
||
|
||
func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
|
||
e.mu.RLock()
|
||
defer e.mu.RUnlock()
|
||
|
||
// Endpoint must be in listen state before it can accept connections.
|
||
if e.state != stateListen {
|
||
return nil, nil, tcpip.ErrInvalidEndpointState
|
||
}
|
||
|
||
var n *endpoint
|
||
select {
|
||
case n = <-e.acceptedChan: // 外部再次调用后尝试取出ep
|
||
logger.GetInstance().Info(logger.TCP, func() {
|
||
log.Println("监听者进行一个新连接的分发", n.id)
|
||
})
|
||
default:
|
||
return nil, nil, tcpip.ErrWouldBlock
|
||
}
|
||
wq := &waiter.Queue{}
|
||
n.startAcceptedLoop(wq)
|
||
return n, wq, nil
|
||
}
|
||
|
||
// Bind binds the endpoint to a specific local port and optionally address.
|
||
// 将端点绑定到特定的本地端口和可选的地址。
|
||
func (e *endpoint) Bind(addr tcpip.FullAddress, commit func() *tcpip.Error) *tcpip.Error {
|
||
e.mu.Lock()
|
||
defer e.mu.Unlock()
|
||
|
||
// 如果端点不是处于初始状态,则不允许绑定。这是因为一旦端点进入连接或监听状态,它就已经绑定了。
|
||
if e.state != stateInitial {
|
||
return tcpip.ErrAlreadyBound
|
||
}
|
||
// 确定tcp端的绑定ip
|
||
e.bindAddress = addr.Addr
|
||
netProto, err := e.checkV4Mapped(&addr)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
// 确定tcp支持的网络层协议
|
||
netProtos := []tcpip.NetworkProtocolNumber{netProto}
|
||
if netProto == header.IPv6ProtocolNumber && !e.v6only && addr.Addr == "" {
|
||
netProtos = []tcpip.NetworkProtocolNumber{
|
||
header.IPv6ProtocolNumber,
|
||
header.IPv4ProtocolNumber,
|
||
}
|
||
}
|
||
// 绑定端口
|
||
port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
e.isPortReserved = true
|
||
e.effectiveNetProtos = netProtos
|
||
e.id.LocalPort = port
|
||
|
||
defer func() {
|
||
// 如果有错,在退出的时候应该解除端口绑定
|
||
if err != nil {
|
||
e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port)
|
||
e.isPortReserved = false
|
||
e.effectiveNetProtos = nil
|
||
e.id.LocalPort = 0
|
||
e.id.LocalAddress = ""
|
||
e.boundNICID = 0
|
||
}
|
||
}()
|
||
// 如果指定了ip地址 需要检查一下这个ip地址本地是否绑定过
|
||
if len(addr.Addr) != 0 {
|
||
nic := e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr)
|
||
if nic == 0 {
|
||
return tcpip.ErrBadLocalAddress
|
||
}
|
||
|
||
e.boundNICID = nic
|
||
e.id.LocalAddress = addr.Addr
|
||
}
|
||
|
||
// Check the commit function.
|
||
if commit != nil {
|
||
if err := commit(); err != nil {
|
||
// The defer takes care of unwind.
|
||
return err
|
||
}
|
||
}
|
||
// 标记状态为 stateBound
|
||
e.state = stateBound
|
||
|
||
return nil
|
||
}
|
||
|
||
func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
|
||
e.mu.RLock()
|
||
defer e.mu.RUnlock()
|
||
|
||
return tcpip.FullAddress{
|
||
Addr: e.id.LocalAddress,
|
||
Port: e.id.LocalPort,
|
||
NIC: e.boundNICID,
|
||
}, nil
|
||
}
|
||
|
||
func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
|
||
e.mu.RLock()
|
||
defer e.mu.RUnlock()
|
||
|
||
if e.state != stateConnected {
|
||
return tcpip.FullAddress{}, tcpip.ErrNotConnected
|
||
}
|
||
|
||
return tcpip.FullAddress{
|
||
Addr: e.id.RemoteAddress,
|
||
Port: e.id.RemotePort,
|
||
NIC: e.boundNICID,
|
||
}, nil
|
||
}
|
||
|
||
func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
|
||
result := waiter.EventMask(0)
|
||
|
||
e.mu.RLock()
|
||
defer e.mu.RUnlock()
|
||
|
||
switch e.state {
|
||
case stateInitial, stateBound, stateConnecting:
|
||
// Ready for nothing.
|
||
|
||
case stateClosed, stateError:
|
||
// Ready for anything.
|
||
result = mask
|
||
|
||
case stateListen:
|
||
// Check if there's anything in the accepted channel.
|
||
if (mask & waiter.EventIn) != 0 {
|
||
if len(e.acceptedChan) > 0 {
|
||
result |= waiter.EventIn
|
||
}
|
||
}
|
||
|
||
case stateConnected:
|
||
// Determine if the endpoint is writable if requested.
|
||
if (mask & waiter.EventOut) != 0 {
|
||
e.sndBufMu.Lock()
|
||
if e.sndClosed || e.sndBufUsed < e.sndBufSize {
|
||
result |= waiter.EventOut
|
||
}
|
||
e.sndBufMu.Unlock()
|
||
}
|
||
|
||
// Determine if the endpoint is readable if requested.
|
||
if (mask & waiter.EventIn) != 0 {
|
||
e.rcvListMu.Lock()
|
||
if e.rcvBufUsed > 0 || e.rcvClosed {
|
||
result |= waiter.EventIn
|
||
}
|
||
e.rcvListMu.Unlock()
|
||
}
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
// zeroReceiveWindow 根据可用缓冲区的数量和接收窗口缩放,检查现在要宣布的接收窗口是否为零。
|
||
func (e *endpoint) zeroReceiveWindow(scale uint8) bool {
|
||
if e.rcvBufUsed >= e.rcvBufSize { // 接收方没接收空间了
|
||
return true
|
||
}
|
||
return ((e.rcvBufSize - e.rcvBufUsed) >> scale) == 0 // 接收方接收空间告急
|
||
}
|
||
|
||
// SetSockOpt sets a socket option
|
||
func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
|
||
switch v := opt.(type) {
|
||
case tcpip.KeepaliveEnabledOption:
|
||
e.keepalive.Lock()
|
||
e.keepalive.enabled = v != 0
|
||
e.keepalive.Unlock()
|
||
e.notifyProtocolGoroutine(notifyKeepaliveChanged)
|
||
|
||
case tcpip.KeepaliveIdleOption:
|
||
e.keepalive.Lock()
|
||
e.keepalive.idle = time.Duration(v)
|
||
e.keepalive.Unlock()
|
||
e.notifyProtocolGoroutine(notifyKeepaliveChanged)
|
||
|
||
case tcpip.KeepaliveIntervalOption:
|
||
e.keepalive.Lock()
|
||
e.keepalive.interval = time.Duration(v)
|
||
e.keepalive.Unlock()
|
||
e.notifyProtocolGoroutine(notifyKeepaliveChanged)
|
||
|
||
case tcpip.KeepaliveCountOption:
|
||
e.keepalive.Lock()
|
||
e.keepalive.count = int(v)
|
||
e.keepalive.Unlock()
|
||
e.notifyProtocolGoroutine(notifyKeepaliveChanged)
|
||
|
||
//case tcpip.NoDelayOption:
|
||
// e.mu.Lock()
|
||
// e.noDelay = v != 0
|
||
// e.mu.Unlock()
|
||
// return nil
|
||
|
||
//case tcpip.ReuseAddressOption:
|
||
// e.mu.Lock()
|
||
// e.reuseAddr = v != 0
|
||
// e.mu.Unlock()
|
||
// return nil
|
||
|
||
//case tcpip.ReceiveBufferSizeOption:
|
||
// // Make sure the receive buffer size is within the min and max
|
||
// // allowed.
|
||
// var rs ReceiveBufferSizeOption
|
||
// size := int(v)
|
||
// if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
|
||
// if size < rs.Min {
|
||
// size = rs.Min
|
||
// }
|
||
// if size > rs.Max {
|
||
// size = rs.Max
|
||
// }
|
||
// }
|
||
|
||
// mask := uint32(notifyReceiveWindowChanged)
|
||
|
||
// e.rcvListMu.Lock()
|
||
|
||
// // Make sure the receive buffer size allows us to send a
|
||
// // non-zero window size.
|
||
// scale := uint8(0)
|
||
// if e.rcv != nil {
|
||
// scale = e.rcv.rcvWndScale
|
||
// }
|
||
// if size>>scale == 0 {
|
||
// size = 1 << scale
|
||
// }
|
||
|
||
// // Make sure 2*size doesn't overflow.
|
||
// if size > math.MaxInt32/2 {
|
||
// size = math.MaxInt32 / 2
|
||
// }
|
||
|
||
// wasZero := e.zeroReceiveWindow(scale)
|
||
// e.rcvBufSize = size
|
||
// if wasZero && !e.zeroReceiveWindow(scale) {
|
||
// mask |= notifyNonZeroReceiveWindow
|
||
// }
|
||
// e.rcvListMu.Unlock()
|
||
|
||
// e.segmentQueue.setLimit(2 * size)
|
||
|
||
// e.notifyProtocolGoroutine(mask)
|
||
// return nil
|
||
|
||
//case tcpip.SendBufferSizeOption:
|
||
// // Make sure the send buffer size is within the min and max
|
||
// // allowed.
|
||
// size := int(v)
|
||
// var ss SendBufferSizeOption
|
||
// if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
|
||
// if size < ss.Min {
|
||
// size = ss.Min
|
||
// }
|
||
// if size > ss.Max {
|
||
// size = ss.Max
|
||
// }
|
||
// }
|
||
|
||
// e.sndBufMu.Lock()
|
||
// e.sndBufSize = size
|
||
// e.sndBufMu.Unlock()
|
||
|
||
// return nil
|
||
|
||
//case tcpip.V6OnlyOption:
|
||
// // We only recognize this option on v6 endpoints.
|
||
// if e.netProto != header.IPv6ProtocolNumber {
|
||
// return tcpip.ErrInvalidEndpointState
|
||
// }
|
||
|
||
// e.mu.Lock()
|
||
// defer e.mu.Unlock()
|
||
|
||
// // We only allow this to be set when we're in the initial state.
|
||
// if e.state != stateInitial {
|
||
// return tcpip.ErrInvalidEndpointState
|
||
// }
|
||
|
||
// e.v6only = v != 0
|
||
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
|
||
switch o := opt.(type) {
|
||
case tcpip.ErrorOption:
|
||
e.lastErrorMu.Lock()
|
||
err := e.lastError
|
||
e.lastError = nil
|
||
e.lastErrorMu.Unlock()
|
||
return err
|
||
|
||
case *tcpip.SendBufferSizeOption:
|
||
e.sndBufMu.Lock()
|
||
*o = tcpip.SendBufferSizeOption(e.sndBufSize)
|
||
e.sndBufMu.Unlock()
|
||
return nil
|
||
|
||
case *tcpip.ReceiveBufferSizeOption:
|
||
e.rcvListMu.Lock()
|
||
*o = tcpip.ReceiveBufferSizeOption(e.rcvBufSize)
|
||
e.rcvListMu.Unlock()
|
||
return nil
|
||
|
||
//case *tcpip.ReceiveQueueSizeOption:
|
||
// v, err := e.readyReceiveSize()
|
||
// if err != nil {
|
||
// return err
|
||
// }
|
||
|
||
// *o = tcpip.ReceiveQueueSizeOption(v)
|
||
// return nil
|
||
|
||
//case *tcpip.NoDelayOption:
|
||
// e.mu.RLock()
|
||
// v := e.noDelay
|
||
// e.mu.RUnlock()
|
||
|
||
// *o = 0
|
||
// if v {
|
||
// *o = 1
|
||
// }
|
||
// return nil
|
||
|
||
//case *tcpip.ReuseAddressOption:
|
||
// e.mu.RLock()
|
||
// v := e.reuseAddr
|
||
// e.mu.RUnlock()
|
||
|
||
// *o = 0
|
||
// if v {
|
||
// *o = 1
|
||
// }
|
||
// return nil
|
||
|
||
case *tcpip.V6OnlyOption:
|
||
// We only recognize this option on v6 endpoints.
|
||
if e.netProto != header.IPv6ProtocolNumber {
|
||
return tcpip.ErrUnknownProtocolOption
|
||
}
|
||
|
||
e.mu.Lock()
|
||
v := e.v6only
|
||
e.mu.Unlock()
|
||
|
||
*o = 0
|
||
if v {
|
||
*o = 1
|
||
}
|
||
return nil
|
||
|
||
case *tcpip.TCPInfoOption:
|
||
*o = tcpip.TCPInfoOption{}
|
||
e.mu.RLock()
|
||
snd := e.snd
|
||
e.mu.RUnlock()
|
||
if snd != nil {
|
||
snd.rtt.Lock()
|
||
o.RTT = snd.rtt.srtt
|
||
o.RTTVar = snd.rtt.rttvar
|
||
snd.rtt.Unlock()
|
||
}
|
||
|
||
return nil
|
||
|
||
case *tcpip.KeepaliveEnabledOption:
|
||
e.keepalive.Lock()
|
||
v := e.keepalive.enabled
|
||
e.keepalive.Unlock()
|
||
|
||
*o = 0
|
||
if v {
|
||
*o = 1
|
||
}
|
||
|
||
case *tcpip.KeepaliveIdleOption:
|
||
e.keepalive.Lock()
|
||
*o = tcpip.KeepaliveIdleOption(e.keepalive.idle)
|
||
e.keepalive.Unlock()
|
||
|
||
case *tcpip.KeepaliveIntervalOption:
|
||
e.keepalive.Lock()
|
||
*o = tcpip.KeepaliveIntervalOption(e.keepalive.interval)
|
||
e.keepalive.Unlock()
|
||
|
||
case *tcpip.KeepaliveCountOption:
|
||
e.keepalive.Lock()
|
||
*o = tcpip.KeepaliveCountOption(e.keepalive.count)
|
||
e.keepalive.Unlock()
|
||
|
||
}
|
||
|
||
return tcpip.ErrUnknownProtocolOption
|
||
}
|
||
|
||
func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv buffer.VectorisedView) {
|
||
s := newSegment(r, id, vv)
|
||
// 解析tcp段,如果解析失败,丢弃该报文
|
||
if !s.parse() {
|
||
e.stack.Stats().MalformedRcvdPackets.Increment()
|
||
e.stack.Stats().TCP.InvalidSegmentsReceived.Increment()
|
||
s.decRef()
|
||
return
|
||
}
|
||
|
||
e.stack.Stats().TCP.ValidSegmentsReceived.Increment() // 有效报文喜加一
|
||
if (s.flags & flagRst) != 0 { // RST报文需要拒绝
|
||
e.stack.Stats().TCP.ResetsReceived.Increment()
|
||
}
|
||
// Send packet to worker goroutine.
|
||
if e.segmentQueue.enqueue(s) {
|
||
var prefix string = "tcp连接"
|
||
if _, err := e.GetRemoteAddress(); err != nil {
|
||
prefix = "监听者"
|
||
}
|
||
logger.GetInstance().Info(logger.TCP, func() {
|
||
log.Printf(prefix+"收到 tcp [%s] 报文片段 from %s, seq: %d, ack: |%d|",
|
||
flagString(s.flags), fmt.Sprintf("%s:%d", s.id.RemoteAddress, s.id.RemotePort),
|
||
s.sequenceNumber, s.ackNumber)
|
||
})
|
||
|
||
// 对于 端口监听者 listener 而言这里唤醒的是 protocolListenLoop
|
||
// 对于普通tcp连接 conn 而言这里唤醒的是 protocolMainLoop
|
||
e.newSegmentWaker.Assert()
|
||
} else {
|
||
// The queue is full, so we drop the segment.
|
||
e.stack.Stats().DroppedPackets.Increment()
|
||
s.decRef()
|
||
}
|
||
}
|
||
|
||
func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, vv buffer.VectorisedView) {
|
||
|
||
}
|
||
|
||
// 当收到ack确认时 需要更新发送确认缓冲占用
|
||
func (e *endpoint) updateSndBufferUsage(v int) {
|
||
e.sndBufMu.Lock()
|
||
notify := e.sndBufUsed >= e.sndBufSize>>1
|
||
e.sndBufUsed -= v
|
||
notify = notify && e.sndBufUsed < e.sndBufSize>>1
|
||
e.sndBufMu.Unlock()
|
||
if notify { // 如果缓存中剩余的数据过多是不需要补充的
|
||
e.waiterQueue.Notify(waiter.EventOut)
|
||
log.Println("提醒 用户层的 Write() 继续写入")
|
||
}
|
||
}
|
||
|
||
func (e *endpoint) readyToRead(s *segment) {
|
||
e.rcvListMu.Lock()
|
||
if s != nil {
|
||
s.incRef()
|
||
e.rcvBufUsed += s.data.Size()
|
||
e.rcvList.PushBack(s)
|
||
} else {
|
||
e.rcvClosed = true
|
||
}
|
||
e.rcvListMu.Unlock()
|
||
|
||
e.waiterQueue.Notify(waiter.EventIn)
|
||
}
|
||
|
||
// receiveBufferAvailable calculates how many bytes are still available in the
|
||
// receive buffer.
|
||
// tcp流量控制:计算未被占用的接收缓存大小
|
||
func (e *endpoint) receiveBufferAvailable() int {
|
||
e.rcvListMu.Lock()
|
||
size := e.rcvBufSize
|
||
used := e.rcvBufUsed
|
||
e.rcvListMu.Unlock()
|
||
// We may use more bytes than the buffer size when the receive buffer
|
||
// shrinks.
|
||
if used >= size {
|
||
return 0
|
||
}
|
||
return size - used
|
||
}
|
||
|
||
func (e *endpoint) receiveBufferSize() int {
|
||
e.rcvListMu.Lock()
|
||
size := e.rcvBufSize
|
||
e.rcvListMu.Unlock()
|
||
return size
|
||
}
|
||
|
||
// updateRecentTimestamp updates the recent timestamp using the algorithm
|
||
// described in https://tools.ietf.org/html/rfc7323#section-4.3
|
||
func (e *endpoint) updateRecentTimestamp(tsVal uint32, maxSentAck seqnum.Value, segSeq seqnum.Value) {
|
||
if e.sendTSOk && seqnum.Value(e.recentTS).LessThan(seqnum.Value(tsVal)) && segSeq. LessThanEq(maxSentAck) {
|
||
e.recentTS = tsVal
|
||
}
|
||
}
|
||
|
||
// maybeEnableTimestamp marks the timestamp option enabled for this endpoint if
|
||
// the SYN options indicate that timestamp option was negotiated. It also
|
||
// initializes the recentTS with the value provided in synOpts.TSval.
|
||
func (e *endpoint) maybeEnableTimestamp(synOpts *header.TCPSynOptions) {
|
||
if synOpts.TS {
|
||
e.sendTSOk = true
|
||
e.recentTS = synOpts.TSVal
|
||
}
|
||
}
|
||
|
||
// timestamp returns the timestamp value to be used in the TSVal field of the
|
||
// timestamp option for outgoing TCP segments for a given endpoint.
|
||
func (e *endpoint) timestamp() uint32 {
|
||
return tcpTimeStamp(e.tsOffset)
|
||
}
|
||
|
||
// tcpTimeStamp returns a timestamp offset by the provided offset. This is
|
||
// not inlined above as it's used when SYN cookies are in use and endpoint
|
||
// is not created at the time when the SYN cookie is sent.
|
||
func tcpTimeStamp(offset uint32) uint32 {
|
||
now := time.Now()
|
||
return uint32(now.Unix()*1000+int64(now.Nanosecond()/1e6)) + offset
|
||
}
|
||
|
||
// timeStampOffset returns a randomized timestamp offset to be used when sending
|
||
// timestamp values in a timestamp option for a TCP segment.
|
||
func timeStampOffset() uint32 {
|
||
b := make([]byte, 4)
|
||
if _, err := rand.Read(b); err != nil {
|
||
panic(err)
|
||
}
|
||
// Initialize a random tsOffset that will be added to the recentTS
|
||
// everytime the timestamp is sent when the Timestamp option is enabled.
|
||
//
|
||
// See https://tools.ietf.org/html/rfc7323#section-5.4 for details on
|
||
// why this is required.
|
||
//
|
||
// NOTE: This is not completely to spec as normally this should be
|
||
// initialized in a manner analogous to how sequence numbers are
|
||
// randomized per connection basis. But for now this is sufficient.
|
||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||
}
|
||
|
||
// maybeEnableSACKPermitted marks the SACKPermitted option enabled for this endpoint
|
||
// if the SYN options indicate that the SACK option was negotiated and the TCP
|
||
// stack is configured to enable TCP SACK option.
|
||
func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) {
|
||
var v SACKEnabled
|
||
if err := e.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil {
|
||
// Stack doesn't support SACK. So just return.
|
||
return
|
||
}
|
||
if bool(v) && synOpts.SACKPermitted {
|
||
e.sackPermitted = true
|
||
}
|
||
}
|
||
|
||
// completeState makes a full copy of the endpoint and returns it. This is used
|
||
// before invoking the probe. The state returned may not be fully consistent if
|
||
// there are intervening syscalls when the state is being copied.
|
||
func (e *endpoint) completeState() stack.TCPEndpointState {
|
||
var s stack.TCPEndpointState
|
||
s.SegTime = time.Now()
|
||
|
||
// Copy EndpointID.
|
||
e.mu.Lock()
|
||
s.ID = stack.TCPEndpointID(e.id)
|
||
e.mu.Unlock()
|
||
|
||
// Copy endpoint rcv state.
|
||
e.rcvListMu.Lock()
|
||
s.RcvBufSize = e.rcvBufSize
|
||
s.RcvBufUsed = e.rcvBufUsed
|
||
s.RcvClosed = e.rcvClosed
|
||
e.rcvListMu.Unlock()
|
||
|
||
// Endpoint TCP Option state.
|
||
s.SendTSOk = e.sendTSOk
|
||
s.RecentTS = e.recentTS
|
||
s.TSOffset = e.tsOffset
|
||
s.SACKPermitted = e.sackPermitted
|
||
s.SACK.Blocks = make([]header.SACKBlock, e.sack.NumBlocks)
|
||
copy(s.SACK.Blocks, e.sack.Blocks[:e.sack.NumBlocks])
|
||
|
||
// Copy endpoint send state.
|
||
e.sndBufMu.Lock()
|
||
s.SndBufSize = e.sndBufSize
|
||
s.SndBufUsed = e.sndBufUsed
|
||
s.SndClosed = e.sndClosed
|
||
s.SndBufInQueue = e.sndBufInQueue
|
||
s.PacketTooBigCount = e.packetTooBigCount
|
||
s.SndMTU = e.sndMTU
|
||
e.sndBufMu.Unlock()
|
||
|
||
// Copy receiver state.
|
||
s.Receiver = stack.TCPReceiverState{
|
||
RcvNxt: e.rcv.rcvNxt,
|
||
RcvAcc: e.rcv.rcvAcc,
|
||
RcvWndScale: e.rcv.rcvWndScale,
|
||
PendingBufUsed: e.rcv.pendingBufUsed,
|
||
PendingBufSize: e.rcv.pendingBufSize,
|
||
}
|
||
|
||
// Copy sender state.
|
||
s.Sender = stack.TCPSenderState{
|
||
LastSendTime: e.snd.lastSendTime,
|
||
DupAckCount: e.snd.dupAckCount,
|
||
//FastRecovery: stack.TCPFastRecoveryState{
|
||
// Active: e.snd.fr.active,
|
||
// First: e.snd.fr.first,
|
||
// Last: e.snd.fr.last,
|
||
// MaxCwnd: e.snd.fr.maxCwnd,
|
||
//},
|
||
SndCwnd: e.snd.sndCwnd,
|
||
Ssthresh: e.snd.sndSsthresh,
|
||
SndCAAckCount: e.snd.sndCAAckCount,
|
||
Outstanding: e.snd.outstanding,
|
||
SndWnd: e.snd.sndWnd,
|
||
SndUna: e.snd.sndUna,
|
||
SndNxt: e.snd.sndNxt,
|
||
RTTMeasureSeqNum: e.snd.rttMeasureSeqNum,
|
||
RTTMeasureTime: e.snd.rttMeasureTime,
|
||
Closed: e.snd.closed,
|
||
RTO: e.snd.rto,
|
||
SRTTInited: e.snd.srttInited,
|
||
MaxPayloadSize: e.snd.maxPayloadSize,
|
||
SndWndScale: e.snd.sndWndScale,
|
||
MaxSentAck: e.snd.maxSentAck,
|
||
}
|
||
e.snd.rtt.Lock()
|
||
s.Sender.SRTT = e.snd.rtt.srtt
|
||
e.snd.rtt.Unlock()
|
||
|
||
//if cubic, ok := e.snd.cc.(*cubicState); ok {
|
||
// s.Sender.Cubic = stack.TCPCubicState{
|
||
// WMax: cubic.wMax,
|
||
// WLastMax: cubic.wLastMax,
|
||
// T: cubic.t,
|
||
// TimeSinceLastCongestion: time.Since(cubic.t),
|
||
// C: cubic.c,
|
||
// K: cubic.k,
|
||
// Beta: cubic.beta,
|
||
// WC: cubic.wC,
|
||
// WEst: cubic.wEst,
|
||
// }
|
||
//}
|
||
return s
|
||
}
|