diff --git a/example/tcp_server.go b/example/tcp_server.go index c871726..759a416 100644 --- a/example/tcp_server.go +++ b/example/tcp_server.go @@ -12,7 +12,7 @@ type TCPHandler interface { Handle(net.Conn) } -func TCPServer(listener net.Listener, handler TCPHandler, logf lg.AppLogFunc) error { +func TCPServer(listener net.Listener, handler TCPHandler) error { log.Printf("TCP: listening on %s", listener.Addr()) for { diff --git a/tcpip/header/eth.go b/tcpip/header/eth.go index ec87e04..698a131 100644 --- a/tcpip/header/eth.go +++ b/tcpip/header/eth.go @@ -6,8 +6,8 @@ import ( ) const ( - dstMAC = 0 - srcMAC = 6 + dstMAC = 0 + srcMAC = 6 ethType = 12 ) @@ -19,6 +19,7 @@ type EthernetFields struct { DstAddr tcpip.LinkAddress // 协议类型 + // Type = 0x8000 IPv4 Type = 0x8060 = ARP Type tcpip.NetworkProtocolNumber } diff --git a/tcpip/link/channel/channel.go b/tcpip/link/channel/channel.go new file mode 100644 index 0000000..a5634da --- /dev/null +++ b/tcpip/link/channel/channel.go @@ -0,0 +1,100 @@ +package channel + +import ( + "netstack/tcpip" + "netstack/tcpip/buffer" + "netstack/tcpip/stack" +) + +type PacketInfo struct { + Header buffer.View + Payload buffer.View + Proto tcpip.NetworkProtocolNumber +} + +type Endpoint struct { + dispatcher stack.NetworkDispatcher + mtu uint32 + linkAddr tcpip.LinkAddress // MAC地址 + C chan PacketInfo +} + +//创建一个新的抽象cahnnel Endpoint 可以接受数据 也可以外发数据 +func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) (tcpip.LinkEndpointID, *Endpoint) { + e := &Endpoint{ + C: make(chan PacketInfo, size), + mtu: mtu, + linkAddr: linkAddr, + } + return stack.RegisterLinkEndpoint(e), e +} + +// Drain 流走 释放channel中的数据 +func (e *Endpoint) Drain() int { + c := 0 + for { + select { + case <-e.C: + c++ + default: + return c + } + } +} + +// Inject 注入 +func (e *Endpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { + e.InjectLinkAddr(protocol, "", vv) +} + +// InjectLinkAddr injects an inbound packet with a remote link address. +func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, vv buffer.VectorisedView) { + e.dispatcher.DeliverNetworkPacket(e, remoteLinkAddr, "" /* localLinkAddr */, protocol, vv.Clone(nil)) +} + +func (e *Endpoint) MTU() uint32 { + return e.mtu +} + +// Capabilities返回链路层端点支持的功能集。 +func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities { + return 0 +} + +// MaxHeaderLength 返回数据链接(和较低级别的图层组合)标头可以具有的最大大小。 +// 较高级别使用此信息来保留它们正在构建的数据包前面预留空间。 +func (e *Endpoint) MaxHeaderLength() uint16 { + return 0 +} + +// 本地链路层地址 +func (e *Endpoint) LinkAddress() tcpip.LinkAddress { + return e.linkAddr +} + +// channel 向外写数据 +func (e *Endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, + protocol tcpip.NetworkProtocolNumber) *tcpip.Error { + p := PacketInfo{ + Header: hdr.View(), + Proto: protocol, + Payload: payload.ToView(), + } + + select { + case e.C <- p: + default: + } + + return nil +} + +// Attach 将数据链路层端点附加到协议栈的网络层调度程序。 +func (e *Endpoint) Attach(dispatcher stack.NetworkDispatcher) { + e.dispatcher = dispatcher +} + +// 是否已经添加了网络层调度器 +func (e *Endpoint) IsAttached() bool { + return e.dispatcher != nil +} diff --git a/tcpip/link/channel/stack.RegisterLinkEndpoint b/tcpip/link/channel/stack.RegisterLinkEndpoint new file mode 100644 index 0000000..e69de29 diff --git a/tcpip/link/fdbased/endpoint.go b/tcpip/link/fdbased/endpoint.go index d45b434..af457f9 100644 --- a/tcpip/link/fdbased/endpoint.go +++ b/tcpip/link/fdbased/endpoint.go @@ -10,7 +10,7 @@ import ( ) // 从NIC读取数据的多级缓存配置 -var BufConfig = []int{1<<7, 1<<8, 1<<8, 1<<9, 1<<10, 1<<11, 1<<12, 1<<13, 1<<14, 1<<15} +var BufConfig = []int{1 << 7, 1 << 8, 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15} // 负责底层网卡的io读写以及数据分发 type endpoint struct { @@ -27,8 +27,8 @@ type endpoint struct { closed func(*tcpip.Error) - iovecs []syscall.Iovec - views []buffer.View + iovecs []syscall.Iovec + views []buffer.View dispatcher stack.NetworkDispatcher // handleLocal指示发往自身的数据包是由内部netstack处理(true)还是转发到FD端点(false) @@ -36,10 +36,10 @@ type endpoint struct { } type Options struct { - FD int - MTU uint32 - ClosedFunc func(*tcpip.Error) - Address tcpip.LinkAddress + FD int + MTU uint32 + ClosedFunc func(*tcpip.Error) + Address tcpip.LinkAddress ResolutionRequired bool SaveRestore bool ChecksumOffload bool @@ -66,14 +66,14 @@ func New(opts *Options) tcpip.LinkEndpointID { } e := &endpoint{ - fd: opts.FD, - mtu: opts.MTU, - caps: caps, - closed: opts.ClosedFunc, - addr: opts.Address, - hdrSize: header.EthernetMinimumSize, - views: make([]buffer.View, len(BufConfig)), - iovecs: make([]syscall.Iovec, len(BufConfig)), + fd: opts.FD, + mtu: opts.MTU, + caps: caps, + closed: opts.ClosedFunc, + addr: opts.Address, + hdrSize: header.EthernetMinimumSize, + views: make([]buffer.View, len(BufConfig)), + iovecs: make([]syscall.Iovec, len(BufConfig)), handleLocal: opts.HandleLocal, } @@ -81,7 +81,7 @@ func New(opts *Options) tcpip.LinkEndpointID { return stack.RegisterLinkEndpoint(e) } -func (e *endpoint) MTU() uint32 { +func (e *endpoint) MTU() uint32 { return e.mtu } @@ -90,18 +90,18 @@ func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities { } // 返回当前以太网头部信息长度 -func (e *endpoint) MaxHeaderLength() uint16 { +func (e *endpoint) MaxHeaderLength() uint16 { return uint16(e.hdrSize) } // 返回当前MAC地址 -func (e *endpoint) LinkAddress() tcpip.LinkAddress { +func (e *endpoint) LinkAddress() tcpip.LinkAddress { return e.addr } // 将上层的报文经过链路层封装,写入网卡中,如果写入失败则丢弃该报文 -func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, - payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { +func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, + payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { // 如果目标地址是设备自己 那么将报文重新返回给协议栈 if e.handleLocal && r.LocalAddress != "" && r.LocalAddress == r.RemoteAddress { views := make([]buffer.View, 1, 1+len(payload.Views())) @@ -114,9 +114,9 @@ func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, } // 封装增加以太网头部 eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize)) // 分配14B的内存 - ethHdr := &header.EthernetFields{ // 配置以太帧信息 + ethHdr := &header.EthernetFields{ // 配置以太帧信息 DstAddr: r.RemoteLinkAddress, - Type: protocol, + Type: protocol, } // 如果路由信息中有配置源MAC地址,那么使用该地址 // 如果没有,则使用本网卡的地址 @@ -140,7 +140,7 @@ func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { go e.dispatchLoop() } -func (e *endpoint) IsAttached() bool { +func (e *endpoint) IsAttached() bool { return e.dispatcher != nil } @@ -168,7 +168,7 @@ func (e *endpoint) allocateViews(bufConfig []int) { e.views[i] = b e.iovecs[i] = syscall.Iovec{ Base: &b[0], - Len: uint64(len(b)), + Len: uint64(len(b)), } } } @@ -187,7 +187,7 @@ func (e *endpoint) dispatch() (bool, *tcpip.Error) { } var ( - p tcpip.NetworkProtocolNumber + p tcpip.NetworkProtocolNumber remoteLinkAddr, localLinkAddr tcpip.LinkAddress // 目标MAC 源MAC ) // 获取以太网头部信息 @@ -196,14 +196,14 @@ func (e *endpoint) dispatch() (bool, *tcpip.Error) { remoteLinkAddr = eth.SourceAddress() localLinkAddr = eth.DestinationAddress() - used := e.capViews(n, BufConfig) // 从缓存中截有效的内容 + used := e.capViews(n, BufConfig) // 从缓存中截有效的内容 vv := buffer.NewVectorisedView(n, e.views[:used]) // 用这些有效的内容构建vv - vv.TrimFront(e.hdrSize) // 将数据内容删除以太网头部信息 将网络层作为数据头 + vv.TrimFront(e.hdrSize) // 将数据内容删除以太网头部信息 将网络层作为数据头 e.dispatcher.DeliverNetworkPacket(e, remoteLinkAddr, localLinkAddr, p, vv) // 将分发后的数据无效化(设置nil可以让gc回收这些内存) - for i := 0;i < used;i++ { + for i := 0; i < used; i++ { e.views[i] = nil } diff --git a/tcpip/ports/README.md b/tcpip/ports/README.md new file mode 100644 index 0000000..de91e01 --- /dev/null +++ b/tcpip/ports/README.md @@ -0,0 +1,10 @@ +# 端口 + +## 概念 +在互联网上,各主机间通过 TCP/IP 协议发送和接收数据包,各个数据包根据其目的主机的 ip 地址来进行互联网络中的路由选择,把数据包顺利的传送到目的主机。大多数操作系统都支持多程序(进程)同时运行,那么目的主机应该把接收到的数据包传送给众多同时运行的进程中的哪一个呢?显然这个问题有待解决。 + +运行在计算机中的进程是用进程标识符来标志的。一开始我们可能会想到根据进程标识符来区分数据包给哪个进程,但是因为在因特网上使用的计算机的操作系统种类很多,而不同的操作系统又使用不同格式的进程标识符,因此发送方非常可能无法识别其他机器上的进程。为了使运行不同操作系统的计算机的应用进程能够互相通信,就必须用统一的方法对 TCP/IP 体系的应用进程进行标志,因此 TCP/IP 体系的传输层端口被提了出来。 + +TCP/IP 协议在运输层使用协议端口号(protocol port number),或通常简称为端口(port),端口统一用一个 16 位端口号进行标志。端口号只具有本地意义,即端口号只是为了标志本计算机应用层中的各进程。在因特网中不同计算机的相同端口号是没有联系的。虽然通信的终点是应用进程,但我们可以把端口想象是通信的终点,因为我们只要把要传送的报文交到目的主机的某一个合适的目的端口,剩下的工作(即最后交付目的进程)就由 TCP 来完成。 + +如果把 IP 地址比作一栋楼房,端口号就是这栋楼房里各个房子的房间号。数据包来到主机这栋大楼,会查看是个房间号,再把数据发给相应的房间。端口号只有整数,范围是从 0 到 65535(2^16-1),其中 0 一般作为保留端口,表示让系统自动分配端口。 \ No newline at end of file diff --git a/tcpip/ports/ports.go b/tcpip/ports/ports.go new file mode 100644 index 0000000..032a191 --- /dev/null +++ b/tcpip/ports/ports.go @@ -0,0 +1,23 @@ +package ports + +import ( + "netstack/tcpip" + "sync" +) + +// 端口的唯一标识 : 网络层协议-传输层协议-端口号 +type portDescriptor struct { + network tcpip.NetworkProtocolNumber + transport tcpip.TransportProtocolNumber + port uint16 +} + +// 一个IP地址的集合 +type bindAddresses map[tcpip.Address]struct{} + +// 管理端口的对象 由他来保留和释放端口 +type PortManager struct { + mu sync.RWMutex + // 用一个map接口来保存被占用的端口 + allocatedPorts map[portDescriptor]bindAddresses +} diff --git a/tcpip/stack/linkaddrcache.go b/tcpip/stack/linkaddrcache.go new file mode 100644 index 0000000..a4df7e5 --- /dev/null +++ b/tcpip/stack/linkaddrcache.go @@ -0,0 +1,308 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import ( + "fmt" + "log" + "sync" + "time" + + "netstack/sleep" + "netstack/tcpip" +) + +const linkAddrCacheSize = 512 // max cache entries + +// linkAddrCache is a fixed-sized cache mapping IP addresses to link addresses. +// +// The entries are stored in a ring buffer, oldest entry replaced first. +// +// This struct is safe for concurrent use. +type linkAddrCache struct { + // ageLimit is how long a cache entry is valid for. + ageLimit time.Duration + + // resolutionTimeout is the amount of time to wait for a link request to + // resolve an address. + resolutionTimeout time.Duration + + // resolutionAttempts is the number of times an address is attempted to be + // resolved before failing. + resolutionAttempts int + + mu sync.Mutex + cache map[tcpip.FullAddress]*linkAddrEntry + next int // array index of next available entry + entries [linkAddrCacheSize]linkAddrEntry +} + +// entryState controls the state of a single entry in the cache. +type entryState int + +const ( + // incomplete means that there is an outstanding request to resolve the + // address. This is the initial state. + incomplete entryState = iota + // ready means that the address has been resolved and can be used. + ready + // failed means that address resolution timed out and the address + // could not be resolved. + failed + // expired means that the cache entry has expired and the address must be + // resolved again. + expired +) + +// String implements Stringer. +func (s entryState) String() string { + switch s { + case incomplete: + return "incomplete" + case ready: + return "ready" + case failed: + return "failed" + case expired: + return "expired" + default: + return fmt.Sprintf("unknown(%d)", s) + } +} + +// A linkAddrEntry is an entry in the linkAddrCache. +// This struct is thread-compatible. +type linkAddrEntry struct { + addr tcpip.FullAddress + linkAddr tcpip.LinkAddress + expiration time.Time + s entryState + + // wakers is a set of waiters for address resolution result. Anytime + // state transitions out of 'incomplete' these waiters are notified. + wakers map[*sleep.Waker]struct{} + + done chan struct{} +} + +func (e *linkAddrEntry) state() entryState { + if e.s != expired && time.Now().After(e.expiration) { + // Force the transition to ensure waiters are notified. + e.changeState(expired) + } + return e.s +} + +func (e *linkAddrEntry) changeState(ns entryState) { + if e.s == ns { + return + } + + // Validate state transition. + switch e.s { + case incomplete: + // All transitions are valid. + case ready, failed: + if ns != expired { + panic(fmt.Sprintf("invalid state transition from %s to %s", e.s, ns)) + } + case expired: + // Terminal state. + panic(fmt.Sprintf("invalid state transition from %s to %s", e.s, ns)) + default: + panic(fmt.Sprintf("invalid state: %s", e.s)) + } + + // Notify whoever is waiting on address resolution when transitioning + // out of 'incomplete'. + if e.s == incomplete { + for w := range e.wakers { + w.Assert() + } + e.wakers = nil + if e.done != nil { + close(e.done) + } + } + e.s = ns +} + +func (e *linkAddrEntry) addWaker(w *sleep.Waker) { + e.wakers[w] = struct{}{} +} + +func (e *linkAddrEntry) removeWaker(w *sleep.Waker) { + delete(e.wakers, w) +} + +// add adds a k -> v mapping to the cache. +func (c *linkAddrCache) add(k tcpip.FullAddress, v tcpip.LinkAddress) { + log.Printf("add link cache: %v-%v", k, v) + c.mu.Lock() + defer c.mu.Unlock() + + entry, ok := c.cache[k] + if ok { + s := entry.state() + if s != expired && entry.linkAddr == v { + // Disregard repeated calls. + return + } + // Check if entry is waiting for address resolution. + if s == incomplete { + entry.linkAddr = v + } else { + // Otherwise create a new entry to replace it. + entry = c.makeAndAddEntry(k, v) + } + } else { + entry = c.makeAndAddEntry(k, v) + } + + entry.changeState(ready) +} + +// makeAndAddEntry is a helper function to create and add a new +// entry to the cache map and evict older entry as needed. +func (c *linkAddrCache) makeAndAddEntry(k tcpip.FullAddress, v tcpip.LinkAddress) *linkAddrEntry { + // Take over the next entry. + entry := &c.entries[c.next] + if c.cache[entry.addr] == entry { + delete(c.cache, entry.addr) + } + + // Mark the soon-to-be-replaced entry as expired, just in case there is + // someone waiting for address resolution on it. + entry.changeState(expired) + + *entry = linkAddrEntry{ + addr: k, + linkAddr: v, + expiration: time.Now().Add(c.ageLimit), + wakers: make(map[*sleep.Waker]struct{}), + done: make(chan struct{}), + } + + c.cache[k] = entry + c.next = (c.next + 1) % len(c.entries) + return entry +} + +// get reports any known link address for k. +func (c *linkAddrCache) get(k tcpip.FullAddress, linkRes LinkAddressResolver, localAddr tcpip.Address, linkEP LinkEndpoint, waker *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) { + log.Printf("link addr get linkRes: %#v, addr: %+v", linkRes, k) + if linkRes != nil { + if addr, ok := linkRes.ResolveStaticAddress(k.Addr); ok { + return addr, nil, nil + } + } + + c.mu.Lock() + defer c.mu.Unlock() + // 尝试从缓存中得到MAC地址 + if entry, ok := c.cache[k]; ok { + switch s := entry.state(); s { + case expired: + case ready: + return entry.linkAddr, nil, nil + case failed: + return "", nil, tcpip.ErrNoLinkAddress + case incomplete: + // Address resolution is still in progress. + entry.addWaker(waker) + return "", entry.done, tcpip.ErrWouldBlock + default: + panic(fmt.Sprintf("invalid cache entry state: %s", s)) + } + } + + if linkRes == nil { + return "", nil, tcpip.ErrNoLinkAddress + } + + // Add 'incomplete' entry in the cache to mark that resolution is in progress. + e := c.makeAndAddEntry(k, "") + e.addWaker(waker) + + go c.startAddressResolution(k, linkRes, localAddr, linkEP, e.done) + + return "", e.done, tcpip.ErrWouldBlock +} + +// removeWaker removes a waker previously added through get(). +func (c *linkAddrCache) removeWaker(k tcpip.FullAddress, waker *sleep.Waker) { + c.mu.Lock() + defer c.mu.Unlock() + + if entry, ok := c.cache[k]; ok { + entry.removeWaker(waker) + } +} + +func (c *linkAddrCache) startAddressResolution(k tcpip.FullAddress, linkRes LinkAddressResolver, localAddr tcpip.Address, linkEP LinkEndpoint, done <-chan struct{}) { + for i := 0; ; i++ { + // Send link request, then wait for the timeout limit and check + // whether the request succeeded. + linkRes.LinkAddressRequest(k.Addr, localAddr, linkEP) + + select { + case <-time.After(c.resolutionTimeout): + if stop := c.checkLinkRequest(k, i); stop { + return + } + case <-done: + return + } + } +} + +// checkLinkRequest checks whether previous attempt to resolve address has succeeded +// and mark the entry accordingly, e.g. ready, failed, etc. Return true if request +// can stop, false if another request should be sent. +func (c *linkAddrCache) checkLinkRequest(k tcpip.FullAddress, attempt int) bool { + c.mu.Lock() + defer c.mu.Unlock() + + entry, ok := c.cache[k] + if !ok { + // Entry was evicted from the cache. + return true + } + + switch s := entry.state(); s { + case ready, failed, expired: + // Entry was made ready by resolver or failed. Either way we're done. + return true + case incomplete: + if attempt+1 >= c.resolutionAttempts { + // Max number of retries reached, mark entry as failed. + entry.changeState(failed) + return true + } + // No response yet, need to send another ARP request. + return false + default: + panic(fmt.Sprintf("invalid cache entry state: %s", s)) + } +} + +func newLinkAddrCache(ageLimit, resolutionTimeout time.Duration, resolutionAttempts int) *linkAddrCache { + return &linkAddrCache{ + ageLimit: ageLimit, + resolutionTimeout: resolutionTimeout, + resolutionAttempts: resolutionAttempts, + cache: make(map[tcpip.FullAddress]*linkAddrEntry, linkAddrCacheSize), + } +} diff --git a/tcpip/stack/nic.go b/tcpip/stack/nic.go new file mode 100644 index 0000000..16f22e6 --- /dev/null +++ b/tcpip/stack/nic.go @@ -0,0 +1,30 @@ +package stack + +import ( + "netstack/ilist" + "netstack/tcpip" + "sync" +) + +// 代表一个网卡对象 当我们创建好tap网卡对象后 我们使用NIC来代表它在我们自己的协议栈中的网卡对象 +type NIC struct { + stack *Stack + // 每个网卡的惟一标识号 + id tcpip.NICID + // 网卡名,可有可无 + name string + // 链路层端 + linkEP LinkEndpoint // 在链路层 这就是 fdbased.endpoint + + // 传输层的解复用 + demux *transportDemuxer + + mu sync.RWMutex + spoofing bool + promiscuous bool // 混杂模式 + primary map[tcpip.NetworkProtocolNumber]*ilist.List + // 网络层端的记录 + endpoints map[NetworkEndpoingID]*referencedNetworkEndpoint + // 子网的记录 + subnets []tcpip.Subnet +} diff --git a/tcpip/stack/registration.go b/tcpip/stack/registration.go index 8546184..ee6b802 100644 --- a/tcpip/stack/registration.go +++ b/tcpip/stack/registration.go @@ -1,6 +1,8 @@ package stack import ( + "netstack/ilist" + "netstack/sleep" "netstack/tcpip" "netstack/tcpip/buffer" "sync" @@ -14,6 +16,8 @@ const ( CapabilityLoopback ) +// ====================链路层相关============================== + // 所谓 io 就是数据的输入输出,对于网卡来说就是接收或发送数据, // 接收意味着对以太网帧解封装和提交给网络层,发送意味着对上层数据的封装和写入网卡 @@ -45,6 +49,38 @@ type LinkEndpoint interface { IsAttached() bool } +// LinkAddressResolver 是对可以解析链接地址的 NetworkProtocol 的扩展 TODO 需要解读 +type LinkAddressResolver interface { + LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error + + ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) + + LinkAddressProtocol() tcpip.NetworkProtocolNumber +} + +// A LinkAddressCache caches link addresses. +type LinkAddressCache interface { + // CheckLocalAddress determines if the given local address exists, and if it + // does not exist. + CheckLocalAddress(nicid tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID + + // AddLinkAddress adds a link address to the cache. + AddLinkAddress(nicid tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) + + // GetLinkAddress looks up the cache to translate address to link address (e.g. IP -> MAC). + // If the LinkEndpoint requests address resolution and there is a LinkAddressResolver + // registered with the network protocol, the cache attempts to resolve the address + // and returns ErrWouldBlock. Waker is notified when address resolution is + // complete (success or not). + // + // If address resolution is required, ErrNoLinkAddress and a notification channel is + // returned for the top level caller to block. Channel is closed once address resolution + // is complete (success or not). + GetLinkAddress(nicid tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, w *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) + + // RemoveWaker removes a waker that has been added in GetLinkAddress(). + RemoveWaker(nicid tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) +} type NetworkDispatcher interface { DeliverNetworkPacket(linkEP LinkEndpoint, dstLinkAddr, srcLinkAddr tcpip.LinkAddress, @@ -53,16 +89,64 @@ type NetworkDispatcher interface { type LinkEndpointCapabilities uint - var ( // 传输层协议的注册存储结构 TODO // 网络层协议的注册存储结构 TODO - linkEPMu sync.RWMutex + linkEPMu sync.RWMutex nextLinkEndpointID tcpip.LinkEndpointID = 1 - linkEndpoints = make(map[tcpip.LinkEndpointID]LinkEndpoint) // 设备注册表 设备号:设备实现 + linkEndpoints = make(map[tcpip.LinkEndpointID]LinkEndpoint) // 设备注册表 设备号:设备实现 ) +// ==============================网络层相关============================== +type NetworkProtocol interface { + // TODO 需要添加 +} + +// NetworkEndpoint是需要由网络层协议(例如,ipv4,ipv6)的端点实现的接口 +type NetworkEndpoint interface { + // TODO 需要添加 +} + +type NetworkEndpoingID struct { + LocalAddress tcpip.Address +} + +// ==============================传输层相关============================== + +type TransportEndpointID struct { + // TODO +} + +// ControlType 是网络层控制消息的类型 +type ControlType int + +// TODO 需要解读 +type TransportEndpoint interface { + HandlePacket(r *Route, id TransportEndpointID, vv buffer.VectorisedView) + HandleControlPacker(id TransportEndpointID, typ ControlType, extra uint32, vv buffer.VectorisedView) +} + +// TODO 需要解读 +type referencedNetworkEndpoint struct { + ilist.Entry + refs int32 + ep NetworkEndpoint + nic *NIC + protocol tcpip.NetworkProtocolNumber + + // linkCache is set if link address resolution is enabled for this + // protocol. Set to nil otherwise. + linkCache LinkAddressCache + linkAddrCache + + // holdsInsertRef is protected by the NIC's mutex. It indicates whether + // the reference count is biased by 1 due to the insertion of the + // endpoint. It is reset to false when RemoveAddress is called on the + // NIC. + holdsInsertRef bool +} + // 注册一个链路层设备 func RegisterLinkEndpoint(linkEP LinkEndpoint) tcpip.LinkEndpointID { linkEPMu.Lock() diff --git a/tcpip/stack/route.go b/tcpip/stack/route.go index 3fb6af2..de96a7d 100644 --- a/tcpip/stack/route.go +++ b/tcpip/stack/route.go @@ -11,7 +11,6 @@ type Route struct { // 远端网卡MAC地址 RemoteLinkAddress tcpip.LinkAddress - // 本地网络层地址 ipv4 or ipv6 地址 LocalAddress tcpip.Address // 本地网卡MAC地址 @@ -24,5 +23,5 @@ type Route struct { NetProto tcpip.NetworkProtocolNumber // 相关的网络终端 - //ref *referenceNetworkEndpoint + ref *referencedNetworkEndpoint } diff --git a/tcpip/stack/stack.go b/tcpip/stack/stack.go new file mode 100644 index 0000000..3e4d948 --- /dev/null +++ b/tcpip/stack/stack.go @@ -0,0 +1,75 @@ +package stack + +import ( + "netstack/tcpip" + "netstack/tcpip/ports" + "sync" +) + +// TODO 需要解读 +type TCPProbeFunc func(s TcpEndpointState) + +// TODO 需要解读 +type TcpEndpointState struct { + // TODO 需要添加 +} + +type transportProtocolState struct { +} + +// Stack 是一个网络堆栈,具有所有支持的协议、NIC 和路由表。 +type Stack struct { + transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState // 各种传输层协议 + networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol // 各种网络层协议 + linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver // 各种传输层协议 + + demux *transportDemuxer // 传输层的复用器 + + stats tcpip.Stats // 网络栈的状态监测器 + + linkAddrCache *linkAddrCache // 链路层地址的缓存 + + mu sync.RWMutex + nics map[tcpip.NICID]*NIC // 所有的网卡设备 + forwarding bool // 是否正在转发 + + // route is the route table passed in by the user via SetRouteTable(), + // it is used by FindRoute() to build a route for a specific + // destination. + routeTable []tcpip.Route // 路由表 + + *ports.PortManager // 端口管理器 + + // If not nil, then any new endpoints will have this probe function + // invoked everytime they receive a TCP segment. + tcpProbeFunc TCPProbeFunc + + // clock is used to generate user-visible times. + clock tcpip.Clock +} + +func (s *Stack) CreateNIC(id tcpip.NICID, linkEP tcpip.LinkEndpointID) *tcpip.Error { + return s.createNIC(id, "", linkEP, true) +} + +// 新建一个网卡对象,并且激活它 激活就是准备好熊网卡中读取和写入数据 +func (s *Stack) createNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID, enable bool) *tcpip.Error { + ep := FindLinkEndpoint(linkEP) + if ep == nil { + return tcpip.ErrBadLinkEndpoint + } + + s.mu.Lock() + defer s.mu.Unlock() + + // Make sure id is unique + if _, ok := s.nics[id]; ok { + return tcpip.ErrDuplicateNICID + } + n := newIC(s, id, name, ep) + + s.nics[id] = n + if enable { + n.attachLinkEndpoint() + } +} diff --git a/tcpip/stack/stack_test.go b/tcpip/stack/stack_test.go new file mode 100644 index 0000000..eb230c2 --- /dev/null +++ b/tcpip/stack/stack_test.go @@ -0,0 +1,21 @@ +package stack_test + +import ( + "netstack/tcpip/link/channel" + "netstack/tcpip/stack" + "testing" +) + +const ( + defaultMTU = 65536 +) + +func TestStackBase(t *testing.T) { + + myStack := &stack.Stack{} + id, _ := channel.New(10, defaultMTU, "") + + if err := myStack.CreateNIC(1, id); err != nil { + panic(err) + } +} diff --git a/tcpip/stack/transport_demuxer.go b/tcpip/stack/transport_demuxer.go new file mode 100644 index 0000000..23c3964 --- /dev/null +++ b/tcpip/stack/transport_demuxer.go @@ -0,0 +1,23 @@ +package stack + +import ( + "netstack/tcpip" + "sync" +) + +// 网络层协议号和传输层协议号的组合 当作分流器的key值 +type protocolIDs struct { + network tcpip.NetworkProtocolNumber + transport tcpip.TransportProtocolNumber +} + +type transportEndpoints struct { + mu sync.RWMutex + endpoints map[TransportEndpointID]TransportEndpoint +} + +// transportDemuxer 解复用战队传输端点的数据包 +// 他执行两级解复用:首先基于网络层和传输协议 然后基于端点ID +type transportDemuxer struct { + protocol map[protocolIDs]*transportEndpoints +} diff --git a/tcpip/tcpip.go b/tcpip/tcpip.go index d0b2c5b..cc477a4 100644 --- a/tcpip/tcpip.go +++ b/tcpip/tcpip.go @@ -5,9 +5,8 @@ import ( "strings" ) - type Error struct { - msg string + msg string ignoreStats bool } @@ -56,14 +55,34 @@ var ( ErrNoBufferSpace = &Error{msg: "no buffer space available"} ) +// Clock 提供当前的时间戳 +type Clock interface { + NowNanoseconds() int64 + + NowMonotonic() int64 +} + +// 地址是一个字节切片,转换为表示网络节点地址的字符串。或者,在 unix 端点的情况下,它可能代表一条路径 type Address string type AddressMask string +// 传输层的完整地址 +type FullAddress struct { + NIC NICID // NICID + Addr Address // IP Address + Port uint16 // transport Port +} + func (a AddressMask) String() string { return Address(a).String() } +type Subnet struct { + address Address + mask AddressMask +} + // LinkAddress 是一个字节切片,转换为表示链接地址的字符串。 // 它通常是一个 6 字节的 MAC 地址。 type LinkAddress string // MAC地址 @@ -74,6 +93,19 @@ type TransportProtocolNumber uint32 type NetworkProtocolNumber uint32 +type NICID int32 + +type Route struct { + Destination Address // 目标地址 + Mask AddressMask // 掩码 + Gateway Address // 网关 + MIC NICID // 使用的网卡设备 +} + +// Stats 包含了网络栈的统计信息 +type Stats struct { + // TODO 需要添加 +} func (a Address) String() string { switch len(a) {