package plugin_debug import ( "context" "encoding/json" "fmt" "io" "net/http" "net/http/pprof" "os" "os/exec" // 新增导入 "runtime" runtimePPROF "runtime/pprof" "sort" "strconv" "strings" "sync" "syscall" "time" myproc "github.com/cloudwego/goref/pkg/proc" "github.com/go-delve/delve/pkg/config" "github.com/go-delve/delve/service/debugger" task "github.com/langhuihui/gotask" "google.golang.org/protobuf/types/known/emptypb" "google.golang.org/protobuf/types/known/timestamppb" "m7s.live/v5" "m7s.live/v5/plugin/debug/pb" debug "m7s.live/v5/plugin/debug/pkg" "m7s.live/v5/plugin/debug/pkg/profile" ) var _ = m7s.InstallPlugin[DebugPlugin](m7s.PluginMeta{ ServiceDesc: &pb.Api_ServiceDesc, RegisterGRPCHandler: pb.RegisterApiHandler, }) var conf, _ = config.LoadConfig() type DebugPlugin struct { pb.UnimplementedApiServer m7s.Plugin ProfileDuration time.Duration `default:"10s" desc:"profile持续时间"` Profile string `desc:"采集profile存储文件"` Grfout string `default:"grf.out" desc:"grf输出文件"` EnableChart bool `default:"true" desc:"是否启用图表功能"` EnableTaskHistory bool `default:"false" desc:"是否启用任务历史功能"` // 添加缓存字段 cpuProfileData *profile.Profile // 缓存 CPU Profile 数据 cpuProfileOnce sync.Once // 确保只采集一次 cpuProfileLock sync.Mutex // 保护缓存数据 chartServer server // Monitor plugin fields session *debug.Session } type WriteToFile struct { header http.Header io.Writer } func (w *WriteToFile) Header() http.Header { return w.header } func (w *WriteToFile) WriteHeader(statusCode int) {} func (p *DebugPlugin) Start() error { // 启用阻塞分析 runtime.SetBlockProfileRate(1) // 设置采样率为1纳秒 if p.Profile != "" { go func() { file, err := os.Create(p.Profile) if err != nil { return } defer file.Close() p.Info("cpu profile start") err = runtimePPROF.StartCPUProfile(file) time.Sleep(p.ProfileDuration) runtimePPROF.StopCPUProfile() p.Info("cpu profile done") }() } if p.EnableChart { p.AddTask(&p.chartServer) } // 初始化 monitor session if p.DB != nil && p.EnableTaskHistory { p.session = &debug.Session{ PID: os.Getpid(), Args: strings.Join(os.Args, " "), StartTime: time.Now(), } err := p.DB.AutoMigrate(p.session) if err != nil { return err } err = p.DB.Create(p.session).Error if err != nil { return err } err = p.DB.AutoMigrate(&debug.Task{}) if err != nil { return err } p.Plugin.Server.Using(func() { p.saveTask(p.Plugin.Server) }) // 监听任务完成事件 p.Plugin.Server.OnDescendantsDispose(p.saveTask) } return nil } func (p *DebugPlugin) Pprof_Trace(w http.ResponseWriter, r *http.Request) { r.URL.Path = "/debug" + r.URL.Path pprof.Trace(w, r) } func (p *DebugPlugin) Dispose() { // 保存 session 结束时间 if p.DB != nil && p.session != nil { p.DB.Model(p.session).Update("end_time", time.Now()) } } // saveTask 保存任务信息到数据库 func (p *DebugPlugin) saveTask(task task.ITask) { if p.DB == nil || p.session == nil { return } var th debug.Task th.SessionID = p.session.ID th.TaskID = task.GetTaskID() th.ParentID = task.GetParent().GetTaskID() th.StartTime = task.GetTask().StartTime th.EndTime = time.Now() th.OwnerType = task.GetOwnerType() th.TaskType = byte(task.GetTaskType()) th.Reason = task.StopReason().Error() th.Level = task.GetLevel() b, _ := json.Marshal(task.GetDescriptions()) th.Description = string(b) p.DB.Create(&th) } func (p *DebugPlugin) Pprof_profile(w http.ResponseWriter, r *http.Request) { r.URL.Path = "/debug" + r.URL.Path pprof.Profile(w, r) } // Monitor plugin API implementations func (p *DebugPlugin) SearchTask(ctx context.Context, req *pb.SearchTaskRequest) (res *pb.SearchTaskResponse, err error) { if p.DB == nil { return nil, fmt.Errorf("database is not initialized") } if !p.EnableTaskHistory { return nil, fmt.Errorf("task history is not enabled") } res = &pb.SearchTaskResponse{} var tasks []*debug.Task tx := p.DB.Find(&tasks, "session_id = ?", req.SessionId) if err = tx.Error; err == nil { for _, t := range tasks { res.Data = append(res.Data, &pb.Task{ Id: t.TaskID, StartTime: timestamppb.New(t.StartTime), EndTime: timestamppb.New(t.EndTime), Owner: t.OwnerType, Type: uint32(t.TaskType), Description: t.Description, Reason: t.Reason, SessionId: t.SessionID, ParentId: t.ParentID, }) } } return } func (p *DebugPlugin) SessionList(context.Context, *emptypb.Empty) (res *pb.SessionListResponse, err error) { if p.DB == nil { return nil, fmt.Errorf("database is not initialized") } if !p.EnableTaskHistory { return nil, fmt.Errorf("task history is not enabled") } res = &pb.SessionListResponse{} var sessions []*debug.Session tx := p.DB.Find(&sessions) err = tx.Error if err == nil { for _, s := range sessions { res.Data = append(res.Data, &pb.Session{ Id: s.ID, Pid: uint32(s.PID), Args: s.Args, StartTime: timestamppb.New(s.StartTime), EndTime: timestamppb.New(s.EndTime.Time), }) } } return } func (p *DebugPlugin) ServeHTTP(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/pprof" { http.Redirect(w, r, "/debug/pprof/", http.StatusFound) return } r.URL.Path = "/debug" + r.URL.Path pprof.Index(w, r) } func (p *DebugPlugin) Charts_(w http.ResponseWriter, r *http.Request) { r.URL.Path = "/static" + strings.TrimPrefix(r.URL.Path, "/charts") staticFSHandler.ServeHTTP(w, r) } func (p *DebugPlugin) Charts_data(w http.ResponseWriter, r *http.Request) { p.chartServer.dataHandler(w, r) } func (p *DebugPlugin) Charts_datafeed(w http.ResponseWriter, r *http.Request) { p.chartServer.dataFeedHandler(w, r) } func (p *DebugPlugin) Grf(w http.ResponseWriter, r *http.Request) { dConf := debugger.Config{ AttachPid: os.Getpid(), Backend: "default", CoreFile: "", DebugInfoDirectories: conf.DebugInfoDirectories, AttachWaitFor: "", AttachWaitForInterval: 1, AttachWaitForDuration: 0, } dbg, err := debugger.New(&dConf, nil) defer dbg.Detach(false) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err = myproc.ObjectReference(dbg.Target(), p.Grfout); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } w.Write([]byte("ok")) } func (p *DebugPlugin) GetHeap(ctx context.Context, empty *emptypb.Empty) (*pb.HeapResponse, error) { // 创建临时文件用于存储堆信息 f, err := os.CreateTemp("", "heap") if err != nil { return nil, err } defer os.Remove(f.Name()) defer f.Close() // 获取堆信息 runtime.GC() if err := runtimePPROF.WriteHeapProfile(f); err != nil { return nil, err } // 读取堆信息 f.Seek(0, 0) prof, err := profile.Parse(f) if err != nil { return nil, err } // 准备响应数据 resp := &pb.HeapResponse{ Data: &pb.HeapData{ Stats: &pb.HeapStats{}, Objects: make([]*pb.HeapObject, 0), Edges: make([]*pb.HeapEdge, 0), }, } // 创建类型映射用于聚合统计 typeMap := make(map[string]*pb.HeapObject) var totalSize int64 // 处理每个样本 for _, sample := range prof.Sample { size := sample.Value[1] // 内存大小 if size == 0 { continue } // 获取分配类型信息 var typeName string if len(sample.Location) > 0 && len(sample.Location[0].Line) > 0 { if fn := sample.Location[0].Line[0].Function; fn != nil { typeName = fn.Name } } // 创建或更新堆对象 obj, exists := typeMap[typeName] if !exists { obj = &pb.HeapObject{ Type: typeName, Address: fmt.Sprintf("%p", sample), Refs: make([]string, 0), } typeMap[typeName] = obj resp.Data.Objects = append(resp.Data.Objects, obj) } obj.Count++ obj.Size += size totalSize += size // 构建引用关系 for i := 1; i < len(sample.Location); i++ { loc := sample.Location[i] if len(loc.Line) == 0 || loc.Line[0].Function == nil { continue } callerName := loc.Line[0].Function.Name // 跳过系统函数 if callerName == "" || strings.HasPrefix(callerName, "runtime.") { continue } // 添加边 edge := &pb.HeapEdge{ From: callerName, To: typeName, FieldName: callerName, } resp.Data.Edges = append(resp.Data.Edges, edge) // 将调用者添加到引用列表 if !contains(obj.Refs, callerName) { obj.Refs = append(obj.Refs, callerName) } } } // 计算百分比 for _, obj := range resp.Data.Objects { if totalSize > 0 { obj.SizePerc = float64(obj.Size) / float64(totalSize) * 100 } } // 按大小排序 sort.Slice(resp.Data.Objects, func(i, j int) bool { return resp.Data.Objects[i].Size > resp.Data.Objects[j].Size }) // 获取运行时内存统计 var ms runtime.MemStats runtime.ReadMemStats(&ms) // 填充内存统计信息 resp.Data.Stats.Alloc = ms.Alloc resp.Data.Stats.TotalAlloc = ms.TotalAlloc resp.Data.Stats.Sys = ms.Sys resp.Data.Stats.NumGC = ms.NumGC resp.Data.Stats.HeapAlloc = ms.HeapAlloc resp.Data.Stats.HeapSys = ms.HeapSys resp.Data.Stats.HeapIdle = ms.HeapIdle resp.Data.Stats.HeapInuse = ms.HeapInuse resp.Data.Stats.HeapReleased = ms.HeapReleased resp.Data.Stats.HeapObjects = ms.HeapObjects resp.Data.Stats.GcCPUFraction = ms.GCCPUFraction return resp, nil } // 采集 CPU Profile 并缓存 func (p *DebugPlugin) collectCPUProfile() error { p.cpuProfileLock.Lock() defer p.cpuProfileLock.Unlock() // 如果已经采集过,直接返回 if p.cpuProfileData != nil { return nil } // 创建临时文件用于存储 CPU Profile 数据 f, err := os.CreateTemp("", "cpu_profile") if err != nil { return fmt.Errorf("could not create CPU profile: %v", err) } defer os.Remove(f.Name()) defer f.Close() // 开始 CPU profiling if err := runtimePPROF.StartCPUProfile(f); err != nil { return fmt.Errorf("could not start CPU profile: %v", err) } // 采样指定时间 time.Sleep(p.ProfileDuration) runtimePPROF.StopCPUProfile() // 读取并解析 CPU Profile 数据 f.Seek(0, 0) profileData, err := profile.Parse(f) if err != nil { return fmt.Errorf("could not parse CPU profile: %v", err) } // 缓存 CPU Profile 数据 p.cpuProfileData = profileData return nil } // GetCpu 接口 func (p *DebugPlugin) GetCpu(ctx context.Context, req *pb.CpuRequest) (*pb.CpuResponse, error) { // 如果需要刷新或者缓存中没有数据 if req.Refresh || p.cpuProfileData == nil { p.cpuProfileLock.Lock() p.cpuProfileData = nil // 清除现有缓存 p.cpuProfileOnce = sync.Once{} // 重置 Once p.cpuProfileLock.Unlock() } // 如果请求指定了duration,临时更新ProfileDuration originalDuration := p.ProfileDuration if req.Duration > 0 { p.ProfileDuration = time.Duration(req.Duration) * time.Second } // 确保采集 CPU Profile p.cpuProfileOnce.Do(func() { if err := p.collectCPUProfile(); err != nil { fmt.Printf("Failed to collect CPU profile: %v\n", err) } }) // 恢复原始的ProfileDuration if req.Duration > 0 { p.ProfileDuration = originalDuration } // 如果缓存中没有数据,返回错误 if p.cpuProfileData == nil { return nil, fmt.Errorf("CPU profile data is not available") } // 使用缓存的 CPU Profile 数据构建响应 resp := &pb.CpuResponse{ Data: &pb.CpuData{ TotalCpuTimeNs: uint64(p.cpuProfileData.DurationNanos), SamplingIntervalNs: uint64(p.cpuProfileData.Period), Functions: make([]*pb.FunctionProfile, 0), Goroutines: make([]*pb.GoroutineProfile, 0), SystemCalls: make([]*pb.SystemCall, 0), RuntimeStats: &pb.RuntimeStats{}, }, } // 填充函数调用信息 for _, sample := range p.cpuProfileData.Sample { functionProfile := &pb.FunctionProfile{ FunctionName: sample.Location[0].Line[0].Function.Name, CpuTimeNs: uint64(sample.Value[0]), InvocationCount: uint64(sample.Value[1]), CallStack: make([]string, 0), } // 填充调用栈信息 for _, loc := range sample.Location { for _, line := range loc.Line { functionProfile.CallStack = append(functionProfile.CallStack, line.Function.Name) } } resp.Data.Functions = append(resp.Data.Functions, functionProfile) } return resp, nil } // GetCpuGraph 接口 func (p *DebugPlugin) GetCpuGraph(ctx context.Context, req *pb.CpuRequest) (*pb.CpuGraphResponse, error) { // 如果需要刷新或者缓存中没有数据 if req.Refresh || p.cpuProfileData == nil { p.cpuProfileLock.Lock() p.cpuProfileData = nil // 清除现有缓存 p.cpuProfileOnce = sync.Once{} // 重置 Once p.cpuProfileLock.Unlock() } // 如果请求指定了duration,临时更新ProfileDuration originalDuration := p.ProfileDuration if req.Duration > 0 { p.ProfileDuration = time.Duration(req.Duration) * time.Second } // 确保采集 CPU Profile p.cpuProfileOnce.Do(func() { if err := p.collectCPUProfile(); err != nil { fmt.Printf("Failed to collect CPU profile: %v\n", err) } }) // 恢复原始的ProfileDuration if req.Duration > 0 { p.ProfileDuration = originalDuration } // 如果缓存中没有数据,返回错误 if p.cpuProfileData == nil { return nil, fmt.Errorf("CPU profile data is not available") } // 使用缓存的 CPU Profile 数据生成 dot 图 dot, err := debug.GetDotGraph(p.cpuProfileData) if err != nil { return nil, fmt.Errorf("could not generate dot graph: %v", err) } return &pb.CpuGraphResponse{ Data: dot, }, nil } // 辅助函数:检查字符串切片是否包含特定字符串 func contains(slice []string, str string) bool { for _, s := range slice { if s == str { return true } } return false } func (p *DebugPlugin) GetHeapGraph(ctx context.Context, empty *emptypb.Empty) (*pb.HeapGraphResponse, error) { // 创建临时文件用于存储堆信息 f, err := os.CreateTemp("", "heap") if err != nil { return nil, err } defer os.Remove(f.Name()) defer f.Close() // 获取堆信息 runtime.GC() if err := runtimePPROF.WriteHeapProfile(f); err != nil { return nil, err } // 读取堆信息 f.Seek(0, 0) profile, err := profile.Parse(f) if err != nil { return nil, err } // 清理不重要的函数,使图形更干净明了 if err := profile.RemoveUninteresting(); err != nil { return nil, fmt.Errorf("could not remove uninteresting functions: %v", err) } // Generate dot graph. dot, err := debug.GetDotGraph(profile) if err != nil { return nil, err } return &pb.HeapGraphResponse{ Data: dot, }, nil } func (p *DebugPlugin) API_TcpDump(rw http.ResponseWriter, r *http.Request) { query := r.URL.Query() cmdName := "sudo" args := []string{"-S", "tcpdump", "-w", "dump.cap"} // 检查当前程序是否具有 root 权限 isRoot := syscall.Geteuid() == 0 if isRoot { cmdName = "tcpdump" args = args[2:] } if query.Get("interface") != "" { args = append(args, "-i", query.Get("interface")) } if query.Get("filter") != "" { args = append(args, query.Get("filter")) } if query.Get("extra_args") != "" { args = append(args, strings.Fields(query.Get("extra_args"))...) } if query.Get("duration") == "" { http.Error(rw, "duration is required", http.StatusBadRequest) return } // rw.Header().Set("Content-Type", "text/plain") // rw.Header().Set("Cache-Control", "no-cache") // rw.Header().Set("Content-Disposition", "attachment; filename=tcpdump.txt") duration, err := strconv.Atoi(query.Get("duration")) if err != nil { http.Error(rw, "invalid duration", http.StatusBadRequest) return } ctx, _ := context.WithTimeout(p, time.Duration(duration)*time.Second) cmd := exec.CommandContext(ctx, cmdName, args...) p.Info("starting tcpdump", "args", strings.Join(cmd.Args, " ")) cmd.Stdin = strings.NewReader(query.Get("password")) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr // 将错误输出重定向到标准错误 err = cmd.Start() if err != nil { http.Error(rw, fmt.Sprintf("failed to start tcpdump: %v", err), http.StatusInternalServerError) return } <-ctx.Done() // 杀死 tcpdump 进程 var killcmd *exec.Cmd if isRoot { killcmd = exec.Command("pkill", "-9", "tcpdump") } else { killcmd = exec.Command("sudo", "-S", "pkill", "-9", "tcpdump") killcmd.Stdin = strings.NewReader(query.Get("password")) } p.Info("killing tcpdump", "args", strings.Join(killcmd.Args, " ")) killcmd.Stderr = os.Stderr killcmd.Stdout = os.Stdout killcmd.Run() p.Info("kill done") cmd.Wait() p.Info("dump done") http.ServeFile(rw, r, "dump.cap") }