mirror of
https://github.com/cunnie/sslip.io.git
synced 2025-09-27 03:55:56 +08:00
Performance-tune the blocklist
Previously we blocked by CIDRs, not IPs, but that was flawed: of the 746 CIDRs, 744 of them were /32 — in other words, IP addresses. And matching CIDRs is computationally expensive: consuming 4.8% of the CPU for each query. We switched to a string-indexed map instead to accelerate matching. - Fivefold increase in blocklist lookup speed, dropping from consuming 4.8% of the CPU to 0.96% - Added a new member, `xip.BlocklistIPs` - All blocked sites are IPv4. I have never gotten a takedown for an IPv6 site - I wanted to maintain backwards-compatiblity with my blocklist file; I didn't want to be forced to coordinate updating that simultaneously with a deploy of this code, hence the automated "/32" conversion from a CIDR to an IP address - I cleaned up the test blocklist file (`blocklist-test.txt`); it's easier to read & understand - I added profiling from before, `profile/cpu-cidr.prof`, and after, `profile/cpu-ip.prof`, the change.
This commit is contained in:
@@ -3,8 +3,7 @@
|
||||
# This is a shortened variant meant to be used for testing (`ginkgo`) because
|
||||
# the legitimate one has grown so long it clutters the test output
|
||||
|
||||
raiffeisen # https://www.rbinternational.com/en/homepage.html
|
||||
43-134-66-67 # Netflix, https://nf-43-134-66-67.sslip.io/sg
|
||||
43.134.66.67/24 # Netflix
|
||||
2601:646:100:69f7:cafe:bebe:cafe:bebe/112 # personal (Comcast) IPv6 range for testing blocklist
|
||||
|
||||
12.34.56.78/24 # IPv4 CIDR
|
||||
1234::/64 # IPv6 CIDR
|
||||
23.45.67.89 # IPv4
|
||||
raiffeisen # string
|
||||
|
@@ -452,13 +452,21 @@ var _ = Describe("sslip.io-dns-server", func() {
|
||||
`\Ans-[a-z-]+.sslip.io.\nns-[a-z-]+.sslip.io.\nns-[a-z-]+.sslip.io.\nns-[a-z-]+.sslip.io.\n\z`,
|
||||
`TypeNS _acme-challenge.raiffeisen.fe80--.sslip.io. \? ns-do-sg.sslip.io., ns-gce.sslip.io., ns-hetzner.sslip.io., ns-ovh.sslip.io.\n$`),
|
||||
Entry("an A record with a forbidden CIDR is redirected",
|
||||
"@localhost nf.43.134.66.67.sslip.io +short",
|
||||
"@localhost nf.12.34.56.0.sslip.io +short",
|
||||
`\A52.0.56.137\n\z`,
|
||||
`TypeA nf.43.134.66.67.sslip.io. \? 52.0.56.137\n$`),
|
||||
`TypeA nf.12.34.56.0.sslip.io. \? 52.0.56.137\n$`),
|
||||
Entry("an A record with a forbidden IP is redirected",
|
||||
"@localhost nf.23.45.67.89.sslip.io +short",
|
||||
`\A52.0.56.137\n\z`,
|
||||
`TypeA nf.23.45.67.89.sslip.io. \? 52.0.56.137\n$`),
|
||||
Entry("an A record with a forbidden IP with dashes is redirected",
|
||||
"@localhost nf.23-45-67-89.sslip.io +short",
|
||||
`\A52.0.56.137\n\z`,
|
||||
`TypeA nf.23-45-67-89.sslip.io. \? 52.0.56.137\n$`),
|
||||
Entry("an AAAA record with a forbidden CIDR is redirected",
|
||||
"@localhost 2601-646-100-69f7-cafe-bebe-cafe-baba.sslip.io aaaa +short",
|
||||
"@localhost 1234--1.sslip.io aaaa +short",
|
||||
`\A2600:1f18:aaf:6900::a\n\z`,
|
||||
`TypeAAAA 2601-646-100-69f7-cafe-bebe-cafe-baba.sslip.io. \? 2600:1f18:aaf:6900::a\n$`),
|
||||
`TypeAAAA 1234--1.sslip.io. \? 2600:1f18:aaf:6900::a\n$`),
|
||||
)
|
||||
})
|
||||
When("it can't bind to any UDP port", func() {
|
||||
|
@@ -232,7 +232,7 @@ dig @ns-ovh.nip.io version.status.nip.io txt +short
|
||||
<pre>
|
||||
dig @ns-ovh.nip.io metrics.status.nip.io txt +short
|
||||
"Uptime: 1168705"
|
||||
"Blocklist: 2025-07-22 04:30:18-07 3,722"
|
||||
"Blocklist: 2025-07-22 04:30:18-07 3,722,2"
|
||||
"Queries: 2619971786 (2241.8/s)"
|
||||
"TCP/UDP: 2949450/2617181176"
|
||||
"Answer > 0: 934226491 (799.4/s)"
|
||||
@@ -250,11 +250,11 @@ dig @ns-ovh.nip.io version.status.nip.io txt +short
|
||||
<dd>The time since the DNS server has been started, in seconds</dd>
|
||||
<dt>Blocklist</dt>
|
||||
<dd>
|
||||
The first value ("2023-10-04 07:37:50-07") is the date the blocklist was last downloaded. The following two
|
||||
numbers are the number of string matches that are blocked (e.g. "raiffeisen" is a string that is blocked if
|
||||
it appears in the queried hostname) and the number of CIDR matches that are blocked (e.g. "43.134.66.67/24"
|
||||
is blocked). The blocklist can be found <a
|
||||
href="https://github.com/cunnie/sslip.io/blob/main/etc/blocklist.txt">here</a>
|
||||
The first value ("2023-10-04 07:37:50-07") is the date the blocklist was last downloaded. The following three
|
||||
numbers are the number of CIDR matches that are blocked (e.g. 86.106.104.0/24), the number of IP addresses
|
||||
that are blocked (e.g. 212.64.214.54), and the number of strings that are blocked (e.g. "raiffeisen" is a
|
||||
string that is blocked if it appears in the queried hostname). The blocklist can be found <a
|
||||
href="https://github.com/cunnie/sslip.io-blocklist/blob/main/blocklist.txt">here</a>
|
||||
</dd>
|
||||
<dt>Queries</dt>
|
||||
<dd>This consists of two numbers: The first is the raw number of DNS queries that the server has responded to
|
||||
|
BIN
profile/cpu-cidr.prof
Normal file
BIN
profile/cpu-cidr.prof
Normal file
Binary file not shown.
BIN
profile/cpu-ip.prof
Normal file
BIN
profile/cpu-ip.prof
Normal file
Binary file not shown.
72
xip/xip.go
72
xip/xip.go
@@ -28,8 +28,9 @@ import (
|
||||
type Xip struct {
|
||||
DnsAmplificationAttackDelay chan struct{} // for throttling metrics.status.sslip.io
|
||||
Metrics Metrics // DNS server metrics
|
||||
BlocklistStrings []string // list of blacklisted strings that shouldn't appear in public hostnames
|
||||
BlocklistCIDRs []net.IPNet // list of blacklisted CIDRs; no A/AAAA records should resolve to IPs in these CIDRs
|
||||
BlocklistCIDRs []net.IPNet // list of blocked CIDRs; no A/AAAA records should resolve to IPs in these CIDRs
|
||||
BlocklistIPs map[string]struct{} // list of blocked IPs; no A/AAAA records should resolve to these IPs
|
||||
BlocklistStrings []string // list of blocked strings that shouldn't appear in public hostnames
|
||||
BlocklistUpdated time.Time // The most recent time the Blocklist was updated
|
||||
NameServers []dnsmessage.NSResource // The list of authoritative name servers (NS)
|
||||
Public bool // Whether to resolve public IPs; set to false if security-conscious
|
||||
@@ -1076,10 +1077,12 @@ func TXTMetrics(x *Xip, _ net.IP) (txtResources []dnsmessage.TXTResource, err er
|
||||
var metrics []string
|
||||
uptime := time.Since(x.Metrics.Start)
|
||||
metrics = append(metrics, fmt.Sprintf("Uptime: %.0f", uptime.Seconds()))
|
||||
metrics = append(metrics, fmt.Sprintf("Blocklist: %s %d,%d",
|
||||
metrics = append(metrics, fmt.Sprintf("Blocklist: %s %d,%d,%d",
|
||||
x.BlocklistUpdated.Format("2006-01-02 15:04:05-07"),
|
||||
len(x.BlocklistCIDRs),
|
||||
len(x.BlocklistIPs),
|
||||
len(x.BlocklistStrings),
|
||||
len(x.BlocklistCIDRs)))
|
||||
))
|
||||
metrics = append(metrics, fmt.Sprintf("Queries: %d (%.1f/s)", x.Metrics.Queries, float64(x.Metrics.Queries)/uptime.Seconds()))
|
||||
metrics = append(metrics, fmt.Sprintf("TCP/UDP: %d/%d", x.Metrics.TCPQueries, x.Metrics.UDPQueries))
|
||||
metrics = append(metrics, fmt.Sprintf("Answer > 0: %d (%.1f/s)", x.Metrics.AnsweredQueries, float64(x.Metrics.AnsweredQueries)/uptime.Seconds()))
|
||||
@@ -1152,21 +1155,25 @@ func (x *Xip) downloadBlockList(blocklistURL string) string {
|
||||
return fmt.Sprintf(`failed to download blocklist "%s", HTTP status: "%d"`, blocklistURL, resp.StatusCode)
|
||||
}
|
||||
}
|
||||
blocklistStrings, blocklistCIDRs, err := ReadBlocklist(blocklistReader)
|
||||
blocklistCIDRs, blocklistIPs, blocklistStrings, err := ReadBlocklist(blocklistReader)
|
||||
if err != nil {
|
||||
return fmt.Sprintf(`failed to parse blocklist "%s": %s`, blocklistURL, err.Error())
|
||||
}
|
||||
x.BlocklistStrings = blocklistStrings
|
||||
x.BlocklistCIDRs = blocklistCIDRs
|
||||
x.BlocklistIPs = blocklistIPs
|
||||
x.BlocklistStrings = blocklistStrings
|
||||
x.BlocklistUpdated = time.Now()
|
||||
return fmt.Sprintf("Successfully downloaded blocklist from %s: %v, %v", blocklistURL, x.BlocklistStrings, x.BlocklistCIDRs)
|
||||
return fmt.Sprintf("Successfully downloaded blocklist from %s: %v, %v, %v", blocklistURL, x.BlocklistCIDRs, x.BlocklistIPs, x.BlocklistStrings)
|
||||
}
|
||||
|
||||
// ReadBlocklist "sanitizes" the block list, removing comments, invalid characters
|
||||
// and lowercasing the names to be blocked.
|
||||
// public to make testing easier
|
||||
func ReadBlocklist(blocklist io.Reader) (stringBlocklists []string, cidrBlocklists []net.IPNet, err error) {
|
||||
func ReadBlocklist(blocklist io.Reader) (blocklistCIDRs []net.IPNet, blocklistIPs map[string]struct{}, blocklistStrings []string, err error) {
|
||||
scanner := bufio.NewScanner(blocklist)
|
||||
blocklistCIDRs = []net.IPNet{}
|
||||
blocklistIPs = make(map[string]struct{})
|
||||
blocklistStrings = []string{}
|
||||
comments := regexp.MustCompile(`#.*`)
|
||||
invalidDNSchars := regexp.MustCompile(`[^-\da-z]`)
|
||||
invalidDNScharsWithSlashesDotsAndColons := regexp.MustCompile(`[^-_\da-z/.:]`)
|
||||
@@ -1177,20 +1184,42 @@ func ReadBlocklist(blocklist io.Reader) (stringBlocklists []string, cidrBlocklis
|
||||
line = comments.ReplaceAllString(line, "") // strip comments
|
||||
line = invalidDNScharsWithSlashesDotsAndColons.ReplaceAllString(line, "") // strip invalid characters
|
||||
_, ipcidr, err := net.ParseCIDR(line)
|
||||
if err != nil {
|
||||
line = invalidDNSchars.ReplaceAllString(line, "") // strip invalid DNS characters
|
||||
if line == "" {
|
||||
if err == nil {
|
||||
// Previously we blocked by CIDRs, not IPs, but that was flawed:
|
||||
// of the 746 CIDRs, 744 of them were /32 — in other words, IP
|
||||
// addresses. And matching CIDRs is computationally expensive:
|
||||
// consuming 0.25s of 2.21s of xip.QueryResponse() -> 11%,
|
||||
// so we use a string-indexed map instead
|
||||
//
|
||||
// All blocked sites are IPv4. I have never gotten a takedown for an IPv6
|
||||
if ipcidr.IP.To4() != nil && ipcidr.Mask.String() == "ffffffff" {
|
||||
blocklistIPs[ipcidr.IP.String()] = struct{}{}
|
||||
continue
|
||||
}
|
||||
stringBlocklists = append(stringBlocklists, line)
|
||||
} else {
|
||||
cidrBlocklists = append(cidrBlocklists, *ipcidr)
|
||||
// We still need CIDRs though, especially for poorly-secured WordPress
|
||||
// hosting sites like Valkyrie Hosting, where we block entire subnets
|
||||
blocklistCIDRs = append(blocklistCIDRs, *ipcidr)
|
||||
continue
|
||||
}
|
||||
// it's not a CIDR; is it an IP?
|
||||
// we convert the IP to a string because we can't use net.IP as a map index
|
||||
ip := net.ParseIP(line)
|
||||
if ip != nil {
|
||||
blocklistIPs[ip.String()] = struct{}{}
|
||||
continue
|
||||
}
|
||||
// it's not a CIDR or IP; is it a string?
|
||||
line = invalidDNSchars.ReplaceAllString(line, "") // strip [/.:]
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
// it's a string
|
||||
blocklistStrings = append(blocklistStrings, line)
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return []string{}, []net.IPNet{}, err
|
||||
return []net.IPNet{}, map[string]struct{}{}, []string{}, err
|
||||
}
|
||||
return stringBlocklists, cidrBlocklists, nil
|
||||
return blocklistCIDRs, blocklistIPs, blocklistStrings, nil
|
||||
}
|
||||
|
||||
func (x *Xip) blocklist(hostname string) bool {
|
||||
@@ -1212,13 +1241,16 @@ func (x *Xip) blocklist(hostname string) bool {
|
||||
if ip.IsPrivate() {
|
||||
return false
|
||||
}
|
||||
for _, blockstring := range x.BlocklistStrings {
|
||||
if strings.Contains(hostname, blockstring) {
|
||||
for _, blockCIDR := range x.BlocklistCIDRs {
|
||||
if blockCIDR.Contains(ip) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, blockCIDR := range x.BlocklistCIDRs {
|
||||
if blockCIDR.Contains(ip) {
|
||||
if _, exists := x.BlocklistIPs[ip.String()]; exists {
|
||||
return true
|
||||
}
|
||||
for _, blockstring := range x.BlocklistStrings {
|
||||
if strings.Contains(hostname, blockstring) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@@ -514,47 +514,69 @@ var _ = Describe("Xip", func() {
|
||||
Describe("ReadBlocklist()", func() {
|
||||
It("strips comments", func() {
|
||||
input := strings.NewReader("# a comment\n#another comment\nno-comments\n")
|
||||
bls, blIPs, err := xip.ReadBlocklist(input)
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(bls).To(Equal([]string{"no-comments"}))
|
||||
Expect(blIPs).To(BeNil())
|
||||
Expect(len(blCIDRs)).To(BeZero())
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{}))
|
||||
Expect(blStrings).To(Equal([]string{"no-comments"}))
|
||||
})
|
||||
It("strips blank lines", func() {
|
||||
input := strings.NewReader("\n\n\nno-blank-lines")
|
||||
bls, blIPs, err := xip.ReadBlocklist(input)
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(bls).To(Equal([]string{"no-blank-lines"}))
|
||||
Expect(blIPs).To(BeNil())
|
||||
Expect(len(blCIDRs)).To(BeZero())
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{}))
|
||||
Expect(blStrings).To(Equal([]string{"no-blank-lines"}))
|
||||
})
|
||||
It("lowercases names for comparison", func() {
|
||||
input := strings.NewReader("NO-YELLING")
|
||||
bls, blIPs, err := xip.ReadBlocklist(input)
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(bls).To(Equal([]string{"no-yelling"}))
|
||||
Expect(blIPs).To(BeNil())
|
||||
Expect(len(blCIDRs)).To(BeZero())
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{}))
|
||||
Expect(blStrings).To(Equal([]string{"no-yelling"}))
|
||||
})
|
||||
It("removes all non-allowable characters", func() {
|
||||
input := strings.NewReader("\nalpha #comment # comment\nåß∂ # comment # comment\ndelta∆\n ... GAMMA∑µ®† ...#asdfasdf#asdfasdf")
|
||||
bls, blIPs, err := xip.ReadBlocklist(input)
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(bls).To(Equal([]string{"alpha", "delta", "gamma"}))
|
||||
Expect(blIPs).To(BeNil())
|
||||
Expect(len(blCIDRs)).To(BeZero())
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{}))
|
||||
Expect(blStrings).To(Equal([]string{"alpha", "delta", "gamma"}))
|
||||
})
|
||||
It("reads in IPv4 CIDRs", func() {
|
||||
input := strings.NewReader("\n43.134.66.67/24 #asdfasdf")
|
||||
bls, blIPs, err := xip.ReadBlocklist(input)
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(bls).To(BeNil())
|
||||
Expect(blIPs).To(Equal([]net.IPNet{{IP: net.IP{43, 134, 66, 0}, Mask: net.IPMask{255, 255, 255, 0}}}))
|
||||
Expect(blCIDRs).To(Equal([]net.IPNet{{IP: net.IP{43, 134, 66, 0}, Mask: net.IPMask{255, 255, 255, 0}}}))
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{}))
|
||||
Expect(len(blStrings)).To(BeZero())
|
||||
})
|
||||
It("reads in IPv4 CIDRs, but with a /32 converts it to an IP address", func() {
|
||||
input := strings.NewReader("\n43.134.66.67/32 #asdfasdf")
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(blCIDRs)).To(BeZero())
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{"43.134.66.67": {}}))
|
||||
Expect(len(blStrings)).To(BeZero())
|
||||
})
|
||||
It("reads in IPv6 CIDRs", func() {
|
||||
input := strings.NewReader("\n 2600::/64 #asdfasdf")
|
||||
bls, blIPs, err := xip.ReadBlocklist(input)
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(bls).To(BeNil())
|
||||
Expect(blIPs).To(Equal([]net.IPNet{
|
||||
Expect(blCIDRs).To(Equal([]net.IPNet{
|
||||
{IP: net.IP{38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
Mask: net.IPMask{255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0}}}))
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{}))
|
||||
Expect(len(blStrings)).To(BeZero())
|
||||
})
|
||||
It("reads in IPv4 IP addresses (but not IPv6)", func() {
|
||||
input := strings.NewReader("\n 104.155.144.4 #asdfasdf")
|
||||
blCIDRs, blIPs, blStrings, err := xip.ReadBlocklist(input)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(blCIDRs)).To(BeZero())
|
||||
Expect(blIPs).To(Equal(map[string]struct{}{"104.155.144.4": {}}))
|
||||
Expect(len(blStrings)).To(BeZero())
|
||||
})
|
||||
})
|
||||
|
||||
|
Reference in New Issue
Block a user