diff --git a/tcpip/buffer/prependable.go b/tcpip/buffer/prependable.go index d6c06a8..83b4daa 100644 --- a/tcpip/buffer/prependable.go +++ b/tcpip/buffer/prependable.go @@ -8,3 +8,24 @@ type Prependable struct { func NewPrependable(size int) Prependable { return Prependable{} } + +func NewPrependableFromView(v View) Prependable { + return Prependable{buf: v, usedIdx: 0} +} + +func (p Prependable) View() View { + return p.buf[p.usedIdx:] +} + +func (p Prependable) UsedLength() inty { + return len(p.buf) - p.usedIdx +} + +func (p *Prependable) Prepend(size int) []byte { + if size > p.usedIdx { + return nil + } + + p.usedIdx -= size + return p.View()[:size:size] +} diff --git a/tcpip/link/fdbased/endpoint.go b/tcpip/link/fdbased/endpoint.go index 86fd48d..ef0764d 100644 --- a/tcpip/link/fdbased/endpoint.go +++ b/tcpip/link/fdbased/endpoint.go @@ -1,10 +1,13 @@ package fdbased import ( + "log" "syscall" "github.com/impact-eintr/netstack/tcpip" "github.com/impact-eintr/netstack/tcpip/buffer" + "github.com/impact-eintr/netstack/tcpip/header" + "github.com/impact-eintr/netstack/tcpip/link/rawfile" "github.com/impact-eintr/netstack/tcpip/stack" ) @@ -29,6 +32,8 @@ type endpoint struct { // its end of the communication pipe. closed func(*tcpip.Error) + // 为了提高从磁盘读取数据到内存的效率,引入了IO向量机制,IO向量即struct iovec, + // 在API接口在readv和writev中使用,当然其他地方也较多的使用它。 iovecs []syscall.Iovec views []buffer.View dispatcher stack.NetworkDispatcher @@ -45,3 +50,199 @@ type endpoint struct { // echo foo | nc 192.168.0.2 8080 handleLocal bool } + +// 创建fdbase端的一些选项参数 +type Options struct { + FD int + MTU uint32 + ClosedFunc func(*tcpip.Error) + Address tcpip.LinkAddress + ResolutionRequired bool + SaveRestore bool + ChecksumOffload bool + DisconnectOk bool + HandleLocal bool + TestLossPacket func(data []byte) bool +} + +// 从NIC读取数据的多级缓存配置 +var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} + +// 根据选项参数创建一个链路层的endpoint,并返回该endpoint的id +func New(opts *Options) tcpip.LinkEndpointID { + syscall.SetNonblock(opts.FD, true) + + caps := stack.LinkEndpointCapabilities(0) + if opts.ResolutionRequired { + caps |= stack.CapabilityResolutionRequired + } + if opts.ChecksumOffload { + caps |= stack.CapabilityChecksumOffload + } + if opts.SaveRestore { + caps |= stack.CapabilitySaveRestore + } + if opts.DisconnectOk { + caps |= stack.CapabilityDisconnectOk + } + + e := &endpoint{ + fd: opts.FD, + mtu: opts.MTU, + caps: caps, + closed: opts.ClosedFunc, + addr: opts.Address, + hdrSize: header.EthernetMinimumSize, + views: make([]buffer.View, len(BufConfig)), + iovecs: make([]syscall.Iovec, len(BufConfig)), + handleLocal: opts.HandleLocal, + } + // 全局注册链路层设备 + return stack.RegisterLinkEndpoint(e) + +} + +func (e *endpoint) MTU() uint32 { + return e.mtu +} + +// Attach 启动从文件描述符中读取数据包的goroutine 并通过提供的分发函数来分发数据报 +func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { + e.dispatcher = dispatcher + // 链接端点不可靠 保存传输端点后 它们将停止发送传出数据包 并拒绝所有传入数据包 + go e.dispatchLoop() +} + +func (e *endpoint) IsAttached() bool { + return e.dispatcher != nil +} + +func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities { + return e.caps +} + +func (e *endpoint) MaxHeaderLength() uint16 { + return uint16(e.hdrSize) +} + +func (e *endpoint) LinkAddress() tcpip.LinkAddress { + return e.addr +} + +// 循环地从fd中读取数据 然后将数据包分发给协议栈 +func (e *endpoint) dispatchLoop() *tcpip.Error { + for { + cont, err := e.dispatch() + if err != nil || !cont { + e.closed(err) + } + return err + } +} + +// 从网卡中读取一个数据报 +func (e *endpoint) dispatch() (bool, *tcpip.Error) { + // 读取数据缓存的分配 + e.allocateViews(BufConfig) + + // 从网卡中读取数据 + n, err := rawfile.BlockingReadv(e.fd, e.iovecs) + if err != nil { + return false, err + } + + // 如果比头部长度还小 直接丢弃 + if n <= e.hdrSize { + return false, err + } + + var ( + p tcpip.NetworkProtocolNumber + remoteLinkAddr, localLinkAddr tcpip.LinkAddress + ) + + // 获取以太网头部信息 + eth := header.Ethernet(e.views[0]) + p = eth.Type() + remoteLinkAddr = eth.SourceAddress() + localLinkAddr = eth.DestinationAddress() + + used := e.capViews(n, BufConfig) + vv := buffer.NewVectorisedView(n, e.views[:used]) + // 将数据内容删除以太网头部信息 也就是将数据指针指向网络层的第一个字节 + vv.TrimFront(e.hdrSize) + + // 调用nic.DeliverNetworkPacket 来分发网络层数据 + log.Printf("read from nic %d byte", e.hdrSize+vv.Size()) + e.dispatcher.DeliverNetworkPacket(e, remoteLinkAddr, localLinkAddr, p, vv) + + for i := 0; i < used; i++ { + e.views[i] = nil + } + return true, nil + +} + +// 按照bugConfig的长度分配内存大小 +// 注意e.views和e.iovecs共用相同的内存块 +func (e *endpoint) allocateViews(bufConfig []int) { + for i, v := range e.views { + if v != nil { + break + } + b := buffer.NewView(bufConfig[i]) + e.views[i] = b + e.iovecs[i] = syscall.Iovec{ + Base: &b[0], + Len: uint64(len(b)), + } + } +} + +func (e *endpoint) capViews(n int, buffers []int) int { + c := 0 + for i, s := range buffers { + c += s + if c >= n { + e.views[i].CapLength((s - (c - n))) + return i + 1 + } + } + return len(buffers) +} + +func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, + protocol tcpip.NetworkProtocolNumber) *tcpip.Error { + // 如果目标地址就是设备本身自己 那么将报文重新返回给协议栈 + if e.handleLocal && r.LocalAddress != "" && r.LocalAddress == r.RemoteAddress { + views := make([]buffer.View, 1, 1+len(payload.Views())) + views[0] = hdr.View() + views = append(views, payload.Views()...) + vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) + e.dispatcher.DeliverNetworkPacket(e, r.RemoteLinkAddress, r.LocalLinkAddress, protocol, vv) + return nil + } + + // 封装增加以太网头部 + eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize)) + ethHdr := &header.EthernetFields{ + DstAddr: r.RemoteLinkAddress, + Type: protocol, + } + + // 如果路由信息中有配置源MAC地址 那么使用该地址 如果没有则使用网卡的地址 + if r.LocalLinkAddress != "" { + ethHdr.SrcAddr = r.LocalLinkAddress + } else { + ethHdr.SrcAddr = e.addr + } + eth.Encode(ethHdr) + + // 写入网卡中 + log.Printf("write to nic %d bytes", hdr.UsedLength()+payload.Size()) + if payload.Size() == 0 { + return rawfile.NonBlockingWrite(e.fd, hdr.View()) + } + + return rawfile.NonBlockingWrite2(e.fd, hdr.View(), payload.ToView()) +} diff --git a/tcpip/link/rawfile/rawfile_unsafe.go b/tcpip/link/rawfile/rawfile_unsafe.go index 5385351..2ca55d8 100644 --- a/tcpip/link/rawfile/rawfile_unsafe.go +++ b/tcpip/link/rawfile/rawfile_unsafe.go @@ -31,7 +31,7 @@ func GetMTU(name string) (uint32, error) { } -func NonbolockingWrite(fd int, buf []byte) *tcpip.Error { +func NonBlockingWrite(fd int, buf []byte) *tcpip.Error { var ptr unsafe.Pointer if len(buf) > 0 { ptr = unsafe.Pointer(&buf[0]) @@ -45,9 +45,9 @@ func NonbolockingWrite(fd int, buf []byte) *tcpip.Error { return nil } -func NonBolckingWrite2(fd int, b1, b2 []byte) *tcpip.Error { +func NonBlockingWrite2(fd int, b1, b2 []byte) *tcpip.Error { if len(b2) == 0 { - return NonbolockingWrite(fd, b1) + return NonBlockingWrite(fd, b1) } iovec := [...]syscall.Iovec{ diff --git a/tcpip/stack/registration.go b/tcpip/stack/registration.go index c7e4b5f..396dea5 100644 --- a/tcpip/stack/registration.go +++ b/tcpip/stack/registration.go @@ -1,6 +1,8 @@ package stack import ( + "sync" + "github.com/impact-eintr/netstack/tcpip" "github.com/impact-eintr/netstack/tcpip/buffer" ) @@ -28,7 +30,7 @@ type LinkEndpoint interface { protocol tcpip.NetworkProtocolNumber) *tcpip.Error // 将数据链路层端点附加到协议栈的为那个网络层调度程序 - Atach(dispatcher NetworkDispatcher) + Attach(dispatcher NetworkDispatcher) // 是否已经添加了网络调度器 IsAttached() bool @@ -44,5 +46,34 @@ const ( CapabilityLoopback ) +// 包含网络协议栈用于在 数据链路层 处理数据包后将数据包传送到适当网络端点的方法。 type NetworkDispatcher interface { + // deliver 递送 + DeliverNetworkPacket(linkEP LinkEndpoint, dstLinkAddr, srcLinkAddr tcpip.LinkAddress, + protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) +} + +var ( + // 传输层协议的注册存储结构 + //transportProtocols = make(map[string]TransportProtocolFactory) + // 网络层协议的出册存储结构 + //networkProtocols = make(map[string]TransportProtocolFactory) + linkEPMu sync.RWMutex + nextLinkEndpointID tcpip.LinkEndpointID = 1 + // 保存设备号与设备信息 + linkEndpoints = make(map[tcpip.LinkEndpointID]LinkEndpoint) +) + +// 注册一个链路层设备 +func RegisterLinkEndpoint(linkEP LinkEndpoint) tcpip.LinkEndpointID { + linkEPMu.Lock() + defer linkEPMu.Unlock() + + v := nextLinkEndpointID + nextLinkEndpointID++ + + // 进行注册 + linkEndpoints[v] = linkEP + return v + } diff --git a/tcpip/stack/route.go b/tcpip/stack/route.go index 71e1e0f..ac5a3a9 100644 --- a/tcpip/stack/route.go +++ b/tcpip/stack/route.go @@ -15,6 +15,9 @@ type Route struct { // 本地网络层地址 LocalAddress tcpip.Address + // 本地网卡MAC地址 + LocalLinkAddress tcpip.LinkAddress + // 下一跳网络层地址 NextHop tcpip.Address diff --git a/tcpip/tcpip.go b/tcpip/tcpip.go index 90c74a6..cb90618 100644 --- a/tcpip/tcpip.go +++ b/tcpip/tcpip.go @@ -98,3 +98,5 @@ type ProtocolAddr struct { Protocol NetworkProtocolNumber Address Address } + +type LinkEndpointID uint64