Files
Archive/echo/internal/metrics/metrics.go
2024-03-05 02:32:38 -08:00

249 lines
6.5 KiB
Go

package metrics
import (
"fmt"
"math"
"os"
"runtime"
"strings"
"time"
"github.com/Ehco1996/ehco/internal/config"
"github.com/alecthomas/kingpin/v2"
"github.com/go-ping/ping"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/promlog"
"github.com/prometheus/common/version"
"github.com/prometheus/node_exporter/collector"
"go.uber.org/zap"
)
const (
METRIC_NS = "ehco"
METRIC_SUBSYSTEM_TRAFFIC = "traffic"
METRIC_SUBSYSTEM_PING = "ping"
METRIC_LABEL_REMOTE = "remote"
METRIC_LABEL_CONN_FLOW = "flow"
METRIC_CONN_FLOW_WRITE = "write"
METRIC_CONN_FLOW_READ = "read"
METRIC_LABEL_CONN_TYPE = "type"
METRIC_CONN_TYPE_TCP = "tcp"
METRIC_CONN_TYPE_UDP = "udp"
EhcoAliveStateInit = 0
EhcoAliveStateRunning = 1
)
// ping metrics
var (
pingLabelNames = []string{"ip", "host", "label"}
pingBuckets = prometheus.ExponentialBuckets(0.001, 2, 12) // 1ms ~ 4s
pingInterval = time.Second * 30
PingResponseDurationSeconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: METRIC_NS,
Subsystem: METRIC_SUBSYSTEM_PING,
Name: "response_duration_seconds",
Help: "A histogram of latencies for ping responses.",
Buckets: pingBuckets,
ConstLabels: ConstLabels,
},
pingLabelNames,
)
PingRequestTotal = prometheus.NewDesc(
prometheus.BuildFQName(METRIC_NS, METRIC_SUBSYSTEM_PING, "requests_total"),
"Number of ping requests sent",
pingLabelNames,
ConstLabels,
)
)
// traffic metrics
var (
Hostname, _ = os.Hostname()
ConstLabels = map[string]string{
"ehco_runner_hostname": Hostname,
}
EhcoAlive = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: METRIC_NS,
Subsystem: "",
Name: "alive_state",
Help: "ehco 存活状态",
ConstLabels: ConstLabels,
})
CurConnectionCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: METRIC_NS,
Subsystem: METRIC_SUBSYSTEM_TRAFFIC,
Name: "current_connection_count",
Help: "当前链接数",
ConstLabels: ConstLabels,
}, []string{METRIC_LABEL_REMOTE, METRIC_LABEL_CONN_TYPE})
NetWorkTransmitBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: METRIC_NS,
Subsystem: METRIC_SUBSYSTEM_TRAFFIC,
Name: "network_transmit_bytes",
Help: "传输流量总量bytes",
ConstLabels: ConstLabels,
}, []string{METRIC_LABEL_REMOTE, METRIC_LABEL_CONN_TYPE, METRIC_LABEL_CONN_FLOW})
HandShakeDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Subsystem: METRIC_SUBSYSTEM_TRAFFIC,
Namespace: METRIC_NS,
Name: "handshake_duration",
Help: "握手时间ms",
ConstLabels: ConstLabels,
}, []string{METRIC_LABEL_REMOTE})
)
type PingGroup struct {
Pingers []*ping.Pinger
LabelMap map[string]string
}
func initPinger(host string) *ping.Pinger {
pinger := ping.New(host)
if err := pinger.Resolve(); err != nil {
zap.S().Named("web").Errorf("failed to resolve pinger host:%s err:%s\n", host, err.Error())
return nil
}
zap.S().Named("web").Infof("Resolved %s as %s", host, pinger.IPAddr())
pinger.Interval = pingInterval
pinger.Timeout = time.Duration(math.MaxInt64)
pinger.RecordRtts = false
if runtime.GOOS != "darwin" {
pinger.SetPrivileged(true)
}
return pinger
}
func NewPingGroup(cfg *config.Config) *PingGroup {
seen := make(map[string]*ping.Pinger)
labelMap := make(map[string]string)
for _, relayCfg := range cfg.RelayConfigs {
// NOTE (https/ws/wss)://xxx.com -> xxx.com
for _, host := range relayCfg.TCPRemotes {
if strings.Contains(host, "//") {
host = strings.Split(host, "//")[1]
}
// NOTE xxx:1234 -> xxx
if strings.Contains(host, ":") {
host = strings.Split(host, ":")[0]
}
if _, ok := seen[host]; ok {
continue
}
seen[host] = initPinger(host)
labelMap[host] = relayCfg.Label
}
}
pingers := make([]*ping.Pinger, len(seen))
i := 0
for _, pinger := range seen {
pinger.OnRecv = func(pkt *ping.Packet) {
PingResponseDurationSeconds.WithLabelValues(
pkt.IPAddr.String(), pkt.Addr, labelMap[pkt.Addr]).Observe(pkt.Rtt.Seconds())
zap.S().Named("web").Infof("%d bytes from %s: icmp_seq=%d time=%v ttl=%v",
pkt.Nbytes, pkt.Addr, pkt.Seq, pkt.Rtt, pkt.Ttl)
}
pinger.OnDuplicateRecv = func(pkt *ping.Packet) {
zap.S().Named("web").Infof("%d bytes from %s: icmp_seq=%d time=%v ttl=%v (DUP!)",
pkt.Nbytes, pkt.IPAddr, pkt.Seq, pkt.Rtt, pkt.Ttl)
}
pingers[i] = pinger
i += 1
}
return &PingGroup{
Pingers: pingers,
LabelMap: labelMap,
}
}
func (pg *PingGroup) Describe(ch chan<- *prometheus.Desc) {
ch <- PingRequestTotal
}
func (pg *PingGroup) Collect(ch chan<- prometheus.Metric) {
for _, pinger := range pg.Pingers {
stats := pinger.Statistics()
ch <- prometheus.MustNewConstMetric(
PingRequestTotal,
prometheus.CounterValue,
float64(stats.PacketsSent),
stats.IPAddr.String(),
stats.Addr,
pg.LabelMap[stats.Addr],
)
}
}
func (pg *PingGroup) Run() {
if len(pg.Pingers) <= 0 {
return
}
splay := time.Duration(pingInterval.Nanoseconds() / int64(len(pg.Pingers)))
zap.S().Named("web").Infof("Waiting %s between starting pingers", splay)
for idx := range pg.Pingers {
go func() {
pinger := pg.Pingers[idx]
if err := pinger.Run(); err != nil {
zap.S().Named("web").Infof("Starting prober err: %s", err)
}
zap.S().Named("web").Infof("Starting prober for %s", pinger.Addr())
}()
time.Sleep(splay)
}
}
func RegisterEhcoMetrics(cfg *config.Config) error {
// traffic
prometheus.MustRegister(EhcoAlive)
prometheus.MustRegister(CurConnectionCount)
prometheus.MustRegister(NetWorkTransmitBytes)
prometheus.MustRegister(HandShakeDuration)
EhcoAlive.Set(EhcoAliveStateInit)
// ping
if cfg.EnablePing {
pg := NewPingGroup(cfg)
prometheus.MustRegister(PingResponseDurationSeconds)
prometheus.MustRegister(pg)
go pg.Run()
}
return nil
}
func RegisterNodeExporterMetrics(cfg *config.Config) error {
level := &promlog.AllowedLevel{}
// mute node_exporter logger
if err := level.Set("error"); err != nil {
return err
}
promlogConfig := &promlog.Config{Level: level}
logger := promlog.New(promlogConfig)
// see this https://github.com/prometheus/node_exporter/pull/2463
if _, err := kingpin.CommandLine.Parse([]string{}); err != nil {
return err
}
nc, err := collector.NewNodeCollector(logger)
if err != nil {
return fmt.Errorf("couldn't create collector: %s", err)
}
// nc.Collectors = collectors
prometheus.MustRegister(
nc,
version.NewCollector("node_exporter"),
)
return nil
}