mirror of
https://github.com/bolucat/Archive.git
synced 2025-10-06 16:48:17 +08:00
249 lines
6.5 KiB
Go
249 lines
6.5 KiB
Go
package metrics
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/Ehco1996/ehco/internal/config"
|
|
"github.com/alecthomas/kingpin/v2"
|
|
"github.com/go-ping/ping"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/common/promlog"
|
|
"github.com/prometheus/common/version"
|
|
"github.com/prometheus/node_exporter/collector"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
const (
|
|
METRIC_NS = "ehco"
|
|
METRIC_SUBSYSTEM_TRAFFIC = "traffic"
|
|
METRIC_SUBSYSTEM_PING = "ping"
|
|
|
|
METRIC_LABEL_REMOTE = "remote"
|
|
|
|
METRIC_LABEL_CONN_FLOW = "flow"
|
|
METRIC_CONN_FLOW_WRITE = "write"
|
|
METRIC_CONN_FLOW_READ = "read"
|
|
|
|
METRIC_LABEL_CONN_TYPE = "type"
|
|
METRIC_CONN_TYPE_TCP = "tcp"
|
|
METRIC_CONN_TYPE_UDP = "udp"
|
|
|
|
EhcoAliveStateInit = 0
|
|
EhcoAliveStateRunning = 1
|
|
)
|
|
|
|
// ping metrics
|
|
var (
|
|
pingLabelNames = []string{"ip", "host", "label"}
|
|
pingBuckets = prometheus.ExponentialBuckets(0.001, 2, 12) // 1ms ~ 4s
|
|
pingInterval = time.Second * 30
|
|
|
|
PingResponseDurationSeconds = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: METRIC_NS,
|
|
Subsystem: METRIC_SUBSYSTEM_PING,
|
|
Name: "response_duration_seconds",
|
|
Help: "A histogram of latencies for ping responses.",
|
|
Buckets: pingBuckets,
|
|
ConstLabels: ConstLabels,
|
|
},
|
|
pingLabelNames,
|
|
)
|
|
PingRequestTotal = prometheus.NewDesc(
|
|
prometheus.BuildFQName(METRIC_NS, METRIC_SUBSYSTEM_PING, "requests_total"),
|
|
"Number of ping requests sent",
|
|
pingLabelNames,
|
|
ConstLabels,
|
|
)
|
|
)
|
|
|
|
// traffic metrics
|
|
var (
|
|
Hostname, _ = os.Hostname()
|
|
|
|
ConstLabels = map[string]string{
|
|
"ehco_runner_hostname": Hostname,
|
|
}
|
|
|
|
EhcoAlive = prometheus.NewGauge(prometheus.GaugeOpts{
|
|
Namespace: METRIC_NS,
|
|
Subsystem: "",
|
|
Name: "alive_state",
|
|
Help: "ehco 存活状态",
|
|
ConstLabels: ConstLabels,
|
|
})
|
|
|
|
CurConnectionCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: METRIC_NS,
|
|
Subsystem: METRIC_SUBSYSTEM_TRAFFIC,
|
|
Name: "current_connection_count",
|
|
Help: "当前链接数",
|
|
ConstLabels: ConstLabels,
|
|
}, []string{METRIC_LABEL_REMOTE, METRIC_LABEL_CONN_TYPE})
|
|
|
|
NetWorkTransmitBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: METRIC_NS,
|
|
Subsystem: METRIC_SUBSYSTEM_TRAFFIC,
|
|
Name: "network_transmit_bytes",
|
|
Help: "传输流量总量bytes",
|
|
ConstLabels: ConstLabels,
|
|
}, []string{METRIC_LABEL_REMOTE, METRIC_LABEL_CONN_TYPE, METRIC_LABEL_CONN_FLOW})
|
|
|
|
HandShakeDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Subsystem: METRIC_SUBSYSTEM_TRAFFIC,
|
|
Namespace: METRIC_NS,
|
|
Name: "handshake_duration",
|
|
Help: "握手时间ms",
|
|
ConstLabels: ConstLabels,
|
|
}, []string{METRIC_LABEL_REMOTE})
|
|
)
|
|
|
|
type PingGroup struct {
|
|
Pingers []*ping.Pinger
|
|
LabelMap map[string]string
|
|
}
|
|
|
|
func initPinger(host string) *ping.Pinger {
|
|
pinger := ping.New(host)
|
|
if err := pinger.Resolve(); err != nil {
|
|
zap.S().Named("web").Errorf("failed to resolve pinger host:%s err:%s\n", host, err.Error())
|
|
return nil
|
|
}
|
|
zap.S().Named("web").Infof("Resolved %s as %s", host, pinger.IPAddr())
|
|
pinger.Interval = pingInterval
|
|
pinger.Timeout = time.Duration(math.MaxInt64)
|
|
pinger.RecordRtts = false
|
|
if runtime.GOOS != "darwin" {
|
|
pinger.SetPrivileged(true)
|
|
}
|
|
return pinger
|
|
}
|
|
|
|
func NewPingGroup(cfg *config.Config) *PingGroup {
|
|
seen := make(map[string]*ping.Pinger)
|
|
labelMap := make(map[string]string)
|
|
|
|
for _, relayCfg := range cfg.RelayConfigs {
|
|
// NOTE (https/ws/wss)://xxx.com -> xxx.com
|
|
for _, host := range relayCfg.TCPRemotes {
|
|
if strings.Contains(host, "//") {
|
|
host = strings.Split(host, "//")[1]
|
|
}
|
|
// NOTE xxx:1234 -> xxx
|
|
if strings.Contains(host, ":") {
|
|
host = strings.Split(host, ":")[0]
|
|
}
|
|
if _, ok := seen[host]; ok {
|
|
continue
|
|
}
|
|
seen[host] = initPinger(host)
|
|
labelMap[host] = relayCfg.Label
|
|
}
|
|
}
|
|
|
|
pingers := make([]*ping.Pinger, len(seen))
|
|
i := 0
|
|
for _, pinger := range seen {
|
|
pinger.OnRecv = func(pkt *ping.Packet) {
|
|
PingResponseDurationSeconds.WithLabelValues(
|
|
pkt.IPAddr.String(), pkt.Addr, labelMap[pkt.Addr]).Observe(pkt.Rtt.Seconds())
|
|
zap.S().Named("web").Infof("%d bytes from %s: icmp_seq=%d time=%v ttl=%v",
|
|
pkt.Nbytes, pkt.Addr, pkt.Seq, pkt.Rtt, pkt.Ttl)
|
|
}
|
|
pinger.OnDuplicateRecv = func(pkt *ping.Packet) {
|
|
zap.S().Named("web").Infof("%d bytes from %s: icmp_seq=%d time=%v ttl=%v (DUP!)",
|
|
pkt.Nbytes, pkt.IPAddr, pkt.Seq, pkt.Rtt, pkt.Ttl)
|
|
}
|
|
pingers[i] = pinger
|
|
i += 1
|
|
}
|
|
return &PingGroup{
|
|
Pingers: pingers,
|
|
LabelMap: labelMap,
|
|
}
|
|
}
|
|
|
|
func (pg *PingGroup) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- PingRequestTotal
|
|
}
|
|
|
|
func (pg *PingGroup) Collect(ch chan<- prometheus.Metric) {
|
|
for _, pinger := range pg.Pingers {
|
|
stats := pinger.Statistics()
|
|
ch <- prometheus.MustNewConstMetric(
|
|
PingRequestTotal,
|
|
prometheus.CounterValue,
|
|
float64(stats.PacketsSent),
|
|
stats.IPAddr.String(),
|
|
stats.Addr,
|
|
pg.LabelMap[stats.Addr],
|
|
)
|
|
}
|
|
}
|
|
|
|
func (pg *PingGroup) Run() {
|
|
if len(pg.Pingers) <= 0 {
|
|
return
|
|
}
|
|
splay := time.Duration(pingInterval.Nanoseconds() / int64(len(pg.Pingers)))
|
|
zap.S().Named("web").Infof("Waiting %s between starting pingers", splay)
|
|
for idx := range pg.Pingers {
|
|
go func() {
|
|
pinger := pg.Pingers[idx]
|
|
if err := pinger.Run(); err != nil {
|
|
zap.S().Named("web").Infof("Starting prober err: %s", err)
|
|
}
|
|
zap.S().Named("web").Infof("Starting prober for %s", pinger.Addr())
|
|
}()
|
|
time.Sleep(splay)
|
|
}
|
|
}
|
|
|
|
func RegisterEhcoMetrics(cfg *config.Config) error {
|
|
// traffic
|
|
prometheus.MustRegister(EhcoAlive)
|
|
prometheus.MustRegister(CurConnectionCount)
|
|
prometheus.MustRegister(NetWorkTransmitBytes)
|
|
prometheus.MustRegister(HandShakeDuration)
|
|
|
|
EhcoAlive.Set(EhcoAliveStateInit)
|
|
|
|
// ping
|
|
if cfg.EnablePing {
|
|
pg := NewPingGroup(cfg)
|
|
prometheus.MustRegister(PingResponseDurationSeconds)
|
|
prometheus.MustRegister(pg)
|
|
go pg.Run()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func RegisterNodeExporterMetrics(cfg *config.Config) error {
|
|
level := &promlog.AllowedLevel{}
|
|
// mute node_exporter logger
|
|
if err := level.Set("error"); err != nil {
|
|
return err
|
|
}
|
|
promlogConfig := &promlog.Config{Level: level}
|
|
logger := promlog.New(promlogConfig)
|
|
// see this https://github.com/prometheus/node_exporter/pull/2463
|
|
if _, err := kingpin.CommandLine.Parse([]string{}); err != nil {
|
|
return err
|
|
}
|
|
nc, err := collector.NewNodeCollector(logger)
|
|
if err != nil {
|
|
return fmt.Errorf("couldn't create collector: %s", err)
|
|
}
|
|
// nc.Collectors = collectors
|
|
prometheus.MustRegister(
|
|
nc,
|
|
version.NewCollector("node_exporter"),
|
|
)
|
|
return nil
|
|
}
|