Use more precise terminology in metrics

"Successful" is a nebulous term. "Answered" is more precise (at least one record returned in the answer section of the DNS response).
2025-10-31 02:46:35 +08:00 · 2022-02-08 11:17:28 -08:00
parent 51ed47317e
commit 01d68dcd8b
3 changed files with 63 additions and 65 deletions
--- a/bosh-release/src/sslip.io-dns-server/integration_metrics_test.go
+++ b/bosh-release/src/sslip.io-dns-server/integration_metrics_test.go
@@ -162,19 +162,18 @@ func getMetrics() (m xip.Metrics) {
 	var uptime int
 	var junk string
 	_, err = fmt.Sscanf(string(stdout),
-		"\"uptime (seconds): %d\"\n"+
+		"\"Uptime (seconds): %d\"\n"+
-			"\"key-value store: %s\n"+ // %s "swallows" the double-quote at the end
+			"\"Key-value store: %s\n"+ // %s "swallows" the double-quote at the end
-			"\"queries: %d\"\n"+
+			"\"Queries: %d\"\n"+
-			"\"queries/second: %s\n"+
+			"\"Queries/second: %s\n"+
-			"\"successful:\"\n"+
+			"\"AnsQueries: %d\"\n"+
-			"\"- queries: %d\"\n"+
+			"\"AnsQueries/second: %s\n"+
-			"\"- queries/second: %s\n"+
+			"\"AnsA: %d\"\n"+
-			"\"- A: %d\"\n"+
+			"\"AnsAAAA: %d\"\n"+
-			"\"- AAAA: %d\"\n"+
+			"\"Source IP TXT: %d\"\n"+
-			"\"- source IP TXT: %d\"\n"+
+			"\"Version TXT: %d\"\n"+
-			"\"- version TXT: %d\"\n"+
+			"\"DNS-01 challenge: %d\"\n"+
-			"\"- DNS-01 challenge: %d\"\n"+
+			"\"Blocked: %d\"\n",
 			"\"- blocked: %d\"\n",
 		&uptime,
 		&junk,
 		&m.Queries,
--- a/bosh-release/src/sslip.io-dns-server/xip/xip.go
+++ b/bosh-release/src/sslip.io-dns-server/xip/xip.go
@@ -735,23 +735,22 @@ func metricsSslipIo(x Xip) (txtResources []dnsmessage.TXTResource, err error) {
 	<-x.DnsAmplificationAttackDelay
 	var metrics []string
 	uptime := time.Since(x.Metrics.Start)
-	metrics = append(metrics, fmt.Sprintf("uptime (seconds): %.0f", uptime.Seconds()))
+	metrics = append(metrics, fmt.Sprintf("Uptime (seconds): %.0f", uptime.Seconds()))
 	keyValueStore := "etcd"
 	if x.isEtcdNil() {
 		keyValueStore = "builtin"
 	}
-	metrics = append(metrics, "key-value store: "+keyValueStore)
+	metrics = append(metrics, "Key-value store: "+keyValueStore)
-	metrics = append(metrics, fmt.Sprintf("queries: %d", x.Metrics.Queries))
+	metrics = append(metrics, fmt.Sprintf("Queries: %d", x.Metrics.Queries))
-	metrics = append(metrics, fmt.Sprintf("queries/second: %.1f", float64(x.Metrics.Queries)/uptime.Seconds()))
+	metrics = append(metrics, fmt.Sprintf("Queries/second: %.1f", float64(x.Metrics.Queries)/uptime.Seconds()))
-	metrics = append(metrics, "successful:")
+	metrics = append(metrics, fmt.Sprintf("AnsQueries: %d", x.Metrics.AnsweredQueries))
-	metrics = append(metrics, fmt.Sprintf("- queries: %d", x.Metrics.AnsweredQueries))
+	metrics = append(metrics, fmt.Sprintf("AnsQueries/second: %.1f", float64(x.Metrics.AnsweredQueries)/uptime.Seconds()))
-	metrics = append(metrics, fmt.Sprintf("- queries/second: %.1f", float64(x.Metrics.AnsweredQueries)/uptime.Seconds()))
+	metrics = append(metrics, fmt.Sprintf("AnsA: %d", x.Metrics.AnsweredAQueries))
-	metrics = append(metrics, fmt.Sprintf("- A: %d", x.Metrics.AnsweredAQueries))
+	metrics = append(metrics, fmt.Sprintf("AnsAAAA: %d", x.Metrics.AnsweredAAAAQueries))
-	metrics = append(metrics, fmt.Sprintf("- AAAA: %d", x.Metrics.AnsweredAAAAQueries))
+	metrics = append(metrics, fmt.Sprintf("Source IP TXT: %d", x.Metrics.AnsweredTXTSrcIPQueries))
-	metrics = append(metrics, fmt.Sprintf("- source IP TXT: %d", x.Metrics.AnsweredTXTSrcIPQueries))
+	metrics = append(metrics, fmt.Sprintf("Version TXT: %d", x.Metrics.AnsweredXTVersionQueries))
-	metrics = append(metrics, fmt.Sprintf("- version TXT: %d", x.Metrics.AnsweredXTVersionQueries))
+	metrics = append(metrics, fmt.Sprintf("DNS-01 challenge: %d", x.Metrics.AnsweredNSDNS01ChallengeQueries))
-	metrics = append(metrics, fmt.Sprintf("- DNS-01 challenge: %d", x.Metrics.AnsweredNSDNS01ChallengeQueries))
+	metrics = append(metrics, fmt.Sprintf("Blocked: %d", x.Metrics.AnsweredBlockedQueries))
 	metrics = append(metrics, fmt.Sprintf("- blocked: %d", x.Metrics.AnsweredBlockedQueries))
 	for _, metric := range metrics {
 		txtResources = append(txtResources, dnsmessage.TXTResource{TXT: []string{metric}})
 	}
--- a/k8s/document_root/index.html
+++ b/k8s/document_root/index.html
@@ -151,9 +151,9 @@ src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> <![endif]-->
      <pre><code>169.254.169.254</code></pre>
      <h3 id="server">But I Want My Own DNS Server!</h3>
      <p>If you want to run your own DNS server, it's simple: you can compile from <a href=
-      "https://github.com/cunnie/sslip.io/tree/main/bosh-release/src/sslip.io-dns-server">source</a> or you can use
+      "https://github.com/cunnie/sslip.io/tree/main/bosh-release/src/sslip.io-dns-server">source</a> or you can use one
-      one of our <a href="https://github.com/cunnie/sslip.io/releases">pre-built binaries</a>. In the following
+      of our <a href="https://github.com/cunnie/sslip.io/releases">pre-built binaries</a>. In the following example, we
-      example, we install & run our server within a docker container:</p>
+      install & run our server within a docker container:</p>
      <pre>
 docker run -it --rm fedora
 curl -L https://github.com/cunnie/sslip.io/releases/download/2.5.0/sslip.io-dns-server-linux-amd64 -o dns-server
@@ -276,54 +276,54 @@ dig @ns-gce.nono.io version.status.sslip.io txt +short
      <code>metrics.status.sslip.io</code>
      <pre>
 dig @ns-aws.sslip.io metrics.status.sslip.io txt +short
-  "uptime (seconds): 13486"
+  "Uptime (seconds): 13486"
-  "key-value store: etcd"
+  "Key-value store: etcd"
-  "queries: 550488"
+  "Queries: 550488"
-  "queries/second: 40.8"
+  "Queries/second: 40.8"
-  "successful:"
+  "AnsQueries: 149974"
-  "- queries: 149974"
+  "AnsQueries/second: 11.1"
-  "- queries/second: 11.1"
+  "AnsA: 113520"
-  "- A: 113520"
+  "AnsAAAA: 7993"
-  "- AAAA: 7993"
+  "Source IP TXT: 12"
-  "- source IP TXT: 12"
+  "Version TXT: 5"
-  "- version TXT: 5"
+  "DNS-01 challenge: 39"
  "- DNS-01 challenge: 39"
      </pre>
      <h5>Explanation of Metrics</h5>
      <dl>
-        <dt>uptime</dt>
+        <dt>Uptime</dt>
        <dd>The time since the DNS server has been started, in seconds</dd>
-        <dt>key-value store</dt>
+        <dt>Key-value store</dt>
        <dd>This can be one of two values: "builtin" and "etcd". "builtin" means any changes to key/value are limited
        to that specific DNS server. "etcd" means changes are propagated to all servers whose key-value store is
-        "etcd".</dd>
+        "etcd"</dd>
-        <dt>uptime</dt>
+        <dt>Queries</dt>
        <dd>The time since the DNS server has been started, in seconds</dd>
        <dt>queries</dt>
        <dd>The raw number of DNS queries that the server has responded to since starting operation</dd>
-        <dt>queries/second</dt>
+        <dt>Queries/second</dt>
        <dd>The raw number of DNS queries that the server has responded to since starting operation divided by the
        number of seconds</dd>
-        <dt>successful</dt>
+        <dt>AnsQueries</dt>
-        <dd>"successful" means the number of queries for which at least one record was returned as an answer (at least
+        <dd>The number of queries we responded to with at least one record in the answer section. Note that the number
-        one record in the ANSWER section). Note that the number of successful queries is typically a third or fourth
+        of answered queries is typically a third or fourth the size of the overall queries. This is normal. One reason
-        the size of the overall queries. This is normal. One reason for this disparity is that often both the IPv4 (A)
+        for this disparity is that often both the IPv4 (A) and IPv6 (AAAA) records will be checked, but only one reply
-        and IPv6 (AAAA) records will be checked, but only one record is returned (successful). For example, browsing to
+        will have a record in the answer section . For example, browsing to "127.0.0.1.sslip.io" generates two lookups,
-        "127.0.0.1.sslip.io" generates two lookups, one successful (IPv4), and one not (IPv6). Another reason is that
+        one with an answer (IPv4), and one without (IPv6). Another reason is that lookups follow a chain, e.g. looking
-        lookups follow a chain, e.g. looking up "127.0.0.1.sslip.io" may generate up to four queries for A records
+        up "127.0.0.1.sslip.io" may generate up to four queries for A records ("1.sslip.io", "0.1.sslip.io",
-        ("1.sslip.io", "0.1.sslip.io", "0.0.1.sslip.io" and "127.0.0.1.sslip.io"), only the last of which is
+        "0.0.1.sslip.io" and "127.0.0.1.sslip.io"), only the last of which returns a record in the answer section.
-        successful. Pro-tip: if you want to shave milliseconds off name resolution, use dashes not dots in your
+        Pro-tip: if you want to shave milliseconds off name resolution, use dashes not dots in your hostname (e.g.
-        hostname (e.g. "10-9-9-30.sslip.io" instead of "10.9.9.30.sslip.io")</dd>
+        "10-9-9-30.sslip.io" instead of "10.9.9.30.sslip.io")</dd>
-        <dt>A</dt>
+        <dt>AnsQueries/second</dt>
-        <dd>The number of responses which included an IPv4 record since starting operation (e.g. "dig
+        <dd>The raw number of DNS queries that the server has responded to with at least one record in the answer
        section since starting operation divided by the number of seconds</dd>
        <dt>AnsA</dt>
        <dd>The number of responses which included an A (IPv4) record since starting operation (e.g. "dig
        127.0.0.1.sslip.io")</dd>
-        <dt>AAAA</dt>
+        <dt>AnsAAAA</dt>
-        <dd>The number of responses which included an IPv6 record since starting operation (e.g. "dig --1.sslip.io
+        <dd>The number of responses which included an AAAA (IPv6) record since starting operation (e.g. "dig
-        aaaa")</dd>
+        --1.sslip.io aaaa")</dd>
-        <dt>source IP TXT</dt>
+        <dt>Source IP TXT</dt>
        <dd>The number of responses which included a TXT record of the querier's IP address since starting operation
        (e.g. "dig @ns.sslip.io ip.sslip.io txt")</dd>
-        <dt>version TXT</dt>
+        <dt>Version TXT</dt>
        <dd>The number of responses which included a TXT record of the DNS's servers version since starting operation
        (e.g. "dig @ns-azure.sslip.io version.status.sslip.io txt")</dd>
        <dt>DNS-01 Challenge</dt>