From 369ac1140d469d15313d8f62b007484c8fb08508 Mon Sep 17 00:00:00 2001 From: Brian Cunnie Date: Sun, 7 Aug 2022 07:31:22 -0700 Subject: [PATCH] Improve test parallelization w/ nodes > 8 Parallelizable tests (`ginkgo -r -p .`) were failing on my 20-core (`-nodes=20`) Mac Studio. We narrowed this down to two causes: 1. The servers sometimes took longer than the hard-coded 3-second delay to become ready to answer queries. 2. The blocklist was downloaded asynchronously, and sometimes weren't ready by the time the queries were run. To address these, we did the following: 1. Rather than hard-code a 3-second delay, we modified the server to signal that it's ready to answer queries (by printing "Ready to answer queries" to the log). We now wait for that string to appear before we begin testing the server. IMHO, this is a much better solution than a hard-coded delay. 2. The initial download of the blocklist occurs synchronously, and subsequent downloads, asynchronously. Drive-bys: - If the server can't bind to even one address, it exits. - Refactored the blocklist code; the nested if-then-else were too deep Fixes: ``` Expected : 43.134.66.67 to match regular expression : \A52.0.56.137\n\z In [It] at: /Users/cunnie/workspace/sslip.io/src/sslip.io-dns-server/integration_test.go:421 ``` --- src/sslip.io-dns-server/integration_test.go | 4 +- src/sslip.io-dns-server/main.go | 12 ++-- src/sslip.io-dns-server/xip/xip.go | 64 ++++++++++----------- 3 files changed, 39 insertions(+), 41 deletions(-) diff --git a/src/sslip.io-dns-server/integration_test.go b/src/sslip.io-dns-server/integration_test.go index 44da1b5..f455738 100644 --- a/src/sslip.io-dns-server/integration_test.go +++ b/src/sslip.io-dns-server/integration_test.go @@ -29,8 +29,8 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) // takes 0.455s to start up on macOS Big Sur 3.7 GHz Quad Core 22-nm Xeon E5-1620v2 processor (2013 Mac Pro) // takes 1.312s to start up on macOS Big Sur 2.0GHz quad-core 10th-generation Intel Core i5 processor (2020 13" MacBook Pro) - // round up to 3 seconds to account for slow container-on-a-VM-with-shared-core - time.Sleep(3 * time.Second) // takes 0.455s to start up on macOS Big Sur 4-core Xeon + // 10 seconds should be long enough for slow container-on-a-VM-with-shared-core + Eventually(serverSession.Err, 10).Should(Say("Ready to answer queries")) }) var _ = AfterSuite(func() { diff --git a/src/sslip.io-dns-server/main.go b/src/sslip.io-dns-server/main.go index 87816c7..833f118 100644 --- a/src/sslip.io-dns-server/main.go +++ b/src/sslip.io-dns-server/main.go @@ -29,9 +29,7 @@ func main() { // common err hierarchy: net.OpError → os.SyscallError → syscall.Errno switch { case err == nil: - log.Printf("Successfully bound to all interfaces, port %d.\n", *bindPort) - wg.Add(1) - readFrom(conn, &wg, x) + log.Printf("Successfully bound to all IPs, port %d.\n", *bindPort) case isErrorPermissionsError(err): log.Printf("Try invoking me with `sudo` because I don't have permission to bind to port %d.\n", *bindPort) log.Fatal(err.Error()) @@ -58,15 +56,19 @@ func main() { go readFrom(conn, &wg, x) } } - if len(boundIPsPorts) > 0 { - log.Printf(`I bound to the following: "%s"`, strings.Join(boundIPsPorts, `", "`)) + if len(boundIPsPorts) == 0 { + log.Fatalf("I couldn't bind to any IPs on port %d, so I'm exiting", *bindPort) } + log.Printf(`I bound to the following IPs: "%s"`, strings.Join(boundIPsPorts, `", "`)) if len(unboundIPs) > 0 { log.Printf(`I couldn't bind to the following IPs: "%s"`, strings.Join(unboundIPs, `", "`)) } default: log.Fatal(err.Error()) } + log.Printf("Ready to answer queries") + wg.Add(1) + readFrom(conn, &wg, x) wg.Wait() } diff --git a/src/sslip.io-dns-server/xip/xip.go b/src/sslip.io-dns-server/xip/xip.go index be8a2ea..4e29adc 100644 --- a/src/sslip.io-dns-server/xip/xip.go +++ b/src/sslip.io-dns-server/xip/xip.go @@ -240,26 +240,20 @@ func NewXip(etcdEndpoint, blocklistURL string) (x *Xip, logmessages []string) { // determine whether to use a local key-value store instead x.Etcd, err = clientv3New(etcdEndpoint) if err != nil { - logmessages = append(logmessages, fmt.Sprintf("failed to connect to etcd at %s; using local key-value store instead: %s", etcdEndpoint, err.Error())) + logmessages = append(logmessages, fmt.Sprintf("failed to connect to etcd at %s, using local key-value store instead: %s", etcdEndpoint, err.Error())) } else { logmessages = append(logmessages, fmt.Sprintf("Successfully connected to etcd at %s", etcdEndpoint)) } // don't `defer etcdCli.Close()`: "The Client has internal state (watchers and leases), so // Clients should be reused instead of created as needed" + // Download the blocklist + logmessages = append(logmessages, x.downloadBlockList(blocklistURL)) // re-download the blocklist every hour so I don't need to restart servers after updating blocklist go func() { for { - blocklistStrings, blocklistCDIRs, err := readBlocklist(blocklistURL) - if err != nil { - logmessages = append(logmessages, fmt.Sprintf("couldn't get blocklist at %s: %s", blocklistURL, err.Error())) - } else { - logmessages = append(logmessages, fmt.Sprintf("Successfully downloaded blocklist from %s: %v, %v", blocklistURL, blocklistStrings, blocklistCDIRs)) - x.BlocklistStrings = blocklistStrings - x.BlocklistCDIRs = blocklistCDIRs - x.BlocklistUpdated = time.Now() - } time.Sleep(1 * time.Hour) + _ = x.downloadBlockList(blocklistURL) // uh-oh, I lose the log message. } }() @@ -295,11 +289,12 @@ func NewXip(etcdEndpoint, blocklistURL string) (x *Xip, logmessages []string) { // QueryResponse are not as hard. // // Examples of log strings returned: -// 78.46.204.247.33654: TypeA 127-0-0-1.sslip.io ? 127.0.0.1 -// 78.46.204.247.33654: TypeA non-existent.sslip.io ? nil, SOA -// 78.46.204.247.33654: TypeNS www.example.com ? NS -// 78.46.204.247.33654: TypeSOA www.example.com ? SOA -// 2600::.33654: TypeAAAA --1.sslip.io ? ::1 +// +// 78.46.204.247.33654: TypeA 127-0-0-1.sslip.io ? 127.0.0.1 +// 78.46.204.247.33654: TypeA non-existent.sslip.io ? nil, SOA +// 78.46.204.247.33654: TypeNS www.example.com ? NS +// 78.46.204.247.33654: TypeSOA www.example.com ? SOA +// 2600::.33654: TypeAAAA --1.sslip.io ? ::1 func (x *Xip) QueryResponse(queryBytes []byte, srcAddr net.IP) (responseBytes []byte, logMessage string, err error) { var queryHeader dnsmessage.Header var p dnsmessage.Parser @@ -374,7 +369,8 @@ func (x *Xip) processQuestion(q dnsmessage.Question, srcAddr net.IP) (response R RCode: dnsmessage.RCodeSuccess, // assume success, may be replaced later }, } - if IsAcmeChallenge(q.Name.String()) && !x.blocklist(q.Name.String()) { // thanks @NormanR + if IsAcmeChallenge(q.Name.String()) && !x.blocklist(q.Name.String()) { + // thanks, @NormanR // delegate everything to its stripped (remove "_acme-challenge.") address, e.g. // dig _acme-challenge.127-0-0-1.sslip.io mx → NS 127-0-0-1.sslip.io response.Header.Authoritative = false // we're delegating, so we're not authoritative @@ -610,7 +606,7 @@ func (x *Xip) processQuestion(q dnsmessage.Question, srcAddr net.IP) (response R } } -// NSResponse sets the Answers/Authorities depending whether we're delegating or authoritative +// NSResponse sets the Answers/Authorities depending upon whether we're delegating or authoritative // (whether it's an "_acme-challenge." domain or not). Either way, it supplies the Additionals // (IP addresses of the nameservers). func (x *Xip) NSResponse(name dnsmessage.Name, response Response, logMessage string) (Response, string, error) { @@ -1071,29 +1067,29 @@ func (a Metrics) MostlyEquals(b Metrics) bool { return false } -// readBlocklist downloads the blocklist of domains & CIDRs that are forbidden -// because they're used for phishing (e.g. "raiffeisen") -func readBlocklist(blocklistURL string) (blocklistStrings []string, blocklistCIDRs []net.IPNet, err error) { +func (x *Xip) downloadBlockList(blocklistURL string) string { resp, err := http.Get(blocklistURL) if err != nil { - log.Println(fmt.Errorf(`failed to download blocklist "%s": %w`, blocklistURL, err)) - } else { - //noinspection GoUnhandledErrorResult - defer resp.Body.Close() - if resp.StatusCode > 299 { - log.Printf(`failed to download blocklist "%s", HTTP status: "%d"`, blocklistURL, resp.StatusCode) - } else { - blocklistStrings, blocklistCIDRs, err = ReadBlocklist(resp.Body) - if err != nil { - log.Println(fmt.Errorf(`failed to parse blocklist "%s": %w`, blocklistURL, err)) - } - } + return fmt.Sprintf(`failed to download blocklist "%s": %s`, blocklistURL, err.Error()) } - return blocklistStrings, blocklistCIDRs, err + //noinspection GoUnhandledErrorResult + defer resp.Body.Close() + if resp.StatusCode > 299 { + return fmt.Sprintf(`failed to download blocklist "%s", HTTP status: "%d"`, blocklistURL, resp.StatusCode) + } + blocklistStrings, blocklistCIDRs, err := ReadBlocklist(resp.Body) + if err != nil { + return fmt.Sprintf(`failed to parse blocklist "%s": %s`, blocklistURL, err.Error()) + } + x.BlocklistStrings = blocklistStrings + x.BlocklistCDIRs = blocklistCIDRs + x.BlocklistUpdated = time.Now() + return fmt.Sprintf("Successfully downloaded blocklist from %s: %v, %v", blocklistURL, x.BlocklistStrings, x.BlocklistCDIRs) } // ReadBlocklist "sanitizes" the block list, removing comments, invalid characters -// and lowercasing the names to be blocked +// and lowercasing the names to be blocked. +// public to make testing easier func ReadBlocklist(blocklist io.Reader) (stringBlocklists []string, cidrBlocklists []net.IPNet, err error) { scanner := bufio.NewScanner(blocklist) comments := regexp.MustCompile(`#.*`)