mirror of
https://github.com/burrowers/garble.git
synced 2025-09-26 20:01:16 +08:00

Go code can retrieve and use field and method names via the `reflect` package. For that reason, historically we did not obfuscate names of fields and methods underneath types that we detected as used for reflection, via e.g. `reflect.TypeOf`. However, that caused a number of issues. Since we obfuscate and build one package at a time, we could only detect when types were used for reflection in their own package or in upstream packages. Use of reflection in downstream packages would be detected too late, causing one package to obfuscate the names and the other not to, leading to a build failure. A different approach is implemented here. All names are obfuscated now, but we collect those types used for reflection, and at the end of a build in `package main`, we inject a function into the runtime's `internal/abi` package to reverse the obfuscation for those names which can be used for reflection. This does mean that the obfuscation for these names is very weak, as the binary contains a one-to-one mapping to their original names, but they cannot be obfuscated without breaking too many Go packages out in the wild. There is also some amount of overhead in `internal/abi` due to this, but we aim to make the overhead insignificant. Fixes #884, #799, #817, #881, #858, #843, #842 Closes #406
414 lines
14 KiB
Go
414 lines
14 KiB
Go
// Copyright (c) 2019, The Garble Authors.
|
|
// See LICENSE for licensing information.
|
|
|
|
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"go/token"
|
|
"go/types"
|
|
"io"
|
|
"os/exec"
|
|
"strings"
|
|
|
|
"mvdan.cc/garble/internal/literals"
|
|
)
|
|
|
|
const buildIDSeparator = "/"
|
|
|
|
// splitActionID returns the action ID half of a build ID, the first hash.
|
|
func splitActionID(buildID string) string {
|
|
return buildID[:strings.Index(buildID, buildIDSeparator)]
|
|
}
|
|
|
|
// splitContentID returns the content ID half of a build ID, the last hash.
|
|
func splitContentID(buildID string) string {
|
|
return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:]
|
|
}
|
|
|
|
// buildIDHashLength is the number of bytes each build ID hash takes,
|
|
// such as an action ID or a content ID.
|
|
const buildIDHashLength = 15
|
|
|
|
// decodeBuildIDHash decodes a build ID hash in base64, just like cmd/go does.
|
|
func decodeBuildIDHash(str string) []byte {
|
|
h, err := base64.RawURLEncoding.DecodeString(str)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("invalid hash %q: %v", str, err))
|
|
}
|
|
if len(h) != buildIDHashLength {
|
|
panic(fmt.Sprintf("decodeBuildIDHash expects to result in a hash of length %d, got %d", buildIDHashLength, len(h)))
|
|
}
|
|
return h
|
|
}
|
|
|
|
// encodeBuildIDHash encodes a build ID hash in base64, just like cmd/go does.
|
|
func encodeBuildIDHash(h [sha256.Size]byte) string {
|
|
return base64.RawURLEncoding.EncodeToString(h[:buildIDHashLength])
|
|
}
|
|
|
|
func alterToolVersion(tool string, args []string) error {
|
|
cmd := exec.Command(args[0], args[1:]...)
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
if err, _ := err.(*exec.ExitError); err != nil {
|
|
return fmt.Errorf("%v: %s", err, err.Stderr)
|
|
}
|
|
return err
|
|
}
|
|
line := string(bytes.TrimSpace(out)) // no trailing newline
|
|
f := strings.Fields(line)
|
|
if len(f) < 3 || f[0] != tool || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") {
|
|
return fmt.Errorf("%s -V=full: unexpected output:\n\t%s", args[0], line)
|
|
}
|
|
var toolID []byte
|
|
if f[2] == "devel" {
|
|
// On the development branch, use the content ID part of the build ID.
|
|
toolID = decodeBuildIDHash(splitContentID(f[len(f)-1]))
|
|
} else {
|
|
// For a release, the output is like: "compile version go1.9.1 X:framepointer".
|
|
// Use the whole line, as we can assume it's unique.
|
|
toolID = []byte(line)
|
|
}
|
|
|
|
contentID := addGarbleToHash(toolID)
|
|
// The part of the build ID that matters is the last, since it's the
|
|
// "content ID" which is used to work out whether there is a need to redo
|
|
// the action (build) or not. Since cmd/go parses the last word in the
|
|
// output as "buildID=...", we simply add "+garble buildID=_/_/_/${hash}".
|
|
// The slashes let us imitate a full binary build ID, but we assume that
|
|
// the other hashes such as the action ID are not necessary, since the
|
|
// only reader here is cmd/go and it only consumes the content ID.
|
|
fmt.Printf("%s +garble buildID=_/_/_/%s\n", line, encodeBuildIDHash(contentID))
|
|
return nil
|
|
}
|
|
|
|
var (
|
|
hasher = sha256.New()
|
|
sumBuffer [sha256.Size]byte
|
|
)
|
|
|
|
// addGarbleToHash takes some arbitrary input bytes,
|
|
// typically a hash such as an action ID or a content ID,
|
|
// and returns a new hash which also contains garble's own deterministic inputs.
|
|
//
|
|
// This includes garble's own version, obtained via its own binary's content ID,
|
|
// as well as any other options which affect a build, such as GOGARBLE and -tiny.
|
|
func addGarbleToHash(inputHash []byte) [sha256.Size]byte {
|
|
// Join the two content IDs together into a single base64-encoded sha256
|
|
// sum. This includes the original tool's content ID, and garble's own
|
|
// content ID.
|
|
hasher.Reset()
|
|
hasher.Write(inputHash)
|
|
if len(sharedCache.BinaryContentID) == 0 {
|
|
panic("missing binary content ID")
|
|
}
|
|
hasher.Write(sharedCache.BinaryContentID)
|
|
|
|
// We also need to add the selected options to the full version string,
|
|
// because all of them result in different output. We use spaces to
|
|
// separate the env vars and flags, to reduce the chances of collisions.
|
|
fmt.Fprintf(hasher, " GOGARBLE=%s", sharedCache.GOGARBLE)
|
|
appendFlags(hasher, true)
|
|
// addGarbleToHash returns the sum buffer, so we need a new copy.
|
|
// Otherwise the next use of the global sumBuffer would conflict.
|
|
var sumBuffer [sha256.Size]byte
|
|
hasher.Sum(sumBuffer[:0])
|
|
return sumBuffer
|
|
}
|
|
|
|
// appendFlags writes garble's own flags to w in string form.
|
|
// Errors are ignored, as w is always a buffer or hasher.
|
|
// If forBuildHash is set, only the flags affecting a build are written.
|
|
func appendFlags(w io.Writer, forBuildHash bool) {
|
|
if flagLiterals {
|
|
io.WriteString(w, " -literals")
|
|
}
|
|
if flagTiny {
|
|
io.WriteString(w, " -tiny")
|
|
}
|
|
if flagDebug && !forBuildHash {
|
|
// -debug doesn't affect the build result at all,
|
|
// so don't give it separate entries in the build cache.
|
|
// If the user really wants to see debug info for already built deps,
|
|
// they can use "go clean cache" or the "-a" build flag to rebuild.
|
|
io.WriteString(w, " -debug")
|
|
}
|
|
if flagDebugDir != "" && !forBuildHash {
|
|
// -debugdir is a bit special.
|
|
//
|
|
// When passing down flags via -toolexec,
|
|
// we do want the actual flag value to be kept.
|
|
//
|
|
// For build hashes, we can skip the flag entirely,
|
|
// as it doesn't affect obfuscation at all.
|
|
//
|
|
// TODO: in the future, we could avoid using the -a build flag
|
|
// by using "-debugdir=yes" here, and caching the obfuscated source.
|
|
// Incremental builds would recover the cached source
|
|
// to repopulate the output directory if it was removed.
|
|
io.WriteString(w, " -debugdir=")
|
|
io.WriteString(w, flagDebugDir)
|
|
}
|
|
if flagSeed.present() {
|
|
io.WriteString(w, " -seed=")
|
|
io.WriteString(w, flagSeed.String())
|
|
}
|
|
if flagControlFlow && forBuildHash {
|
|
io.WriteString(w, " -ctrlflow")
|
|
}
|
|
if literals.TestObfuscator != "" && forBuildHash {
|
|
io.WriteString(w, literals.TestObfuscator)
|
|
}
|
|
}
|
|
|
|
func buildidOf(path string) (string, error) {
|
|
cmd := exec.Command("go", "tool", "buildid", path)
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
if err, _ := err.(*exec.ExitError); err != nil {
|
|
return "", fmt.Errorf("%v: %s", err, err.Stderr)
|
|
}
|
|
return "", err
|
|
}
|
|
return string(out), nil
|
|
}
|
|
|
|
var (
|
|
// Hashed names are base64-encoded.
|
|
// Go names can only be letters, numbers, and underscores.
|
|
// This means we can use base64's URL encoding, minus '-',
|
|
// which is later replaced with a duplicate 'a'.
|
|
// Such a lossy encoding is fine, since we never decode hashes.
|
|
// We don't need padding either, as we take a short prefix anyway.
|
|
nameBase64 = base64.URLEncoding.WithPadding(base64.NoPadding)
|
|
|
|
b64NameBuffer [12]byte // nameBase64.EncodedLen(neededSumBytes) = 12
|
|
)
|
|
|
|
// These funcs mimic the unicode package API, but byte-based since we know
|
|
// base64 is all ASCII.
|
|
|
|
func isDigit(b byte) bool { return '0' <= b && b <= '9' }
|
|
func isLower(b byte) bool { return 'a' <= b && b <= 'z' }
|
|
func isUpper(b byte) bool { return 'A' <= b && b <= 'Z' }
|
|
func toLower(b byte) byte { return b + ('a' - 'A') }
|
|
func toUpper(b byte) byte { return b - ('a' - 'A') }
|
|
|
|
func runtimeHashWithCustomSalt(salt []byte) uint32 {
|
|
hasher.Reset()
|
|
if !flagSeed.present() {
|
|
hasher.Write(sharedCache.ListedPackages["runtime"].GarbleActionID[:])
|
|
} else {
|
|
hasher.Write(flagSeed.bytes)
|
|
}
|
|
hasher.Write(salt)
|
|
sum := hasher.Sum(sumBuffer[:0])
|
|
return binary.LittleEndian.Uint32(sum)
|
|
}
|
|
|
|
// magicValue returns random magic value based
|
|
// on user specified seed or the runtime package's GarbleActionID.
|
|
func magicValue() uint32 {
|
|
return runtimeHashWithCustomSalt([]byte("magic"))
|
|
}
|
|
|
|
// entryOffKey returns random entry offset key
|
|
// on user specified seed or the runtime package's GarbleActionID.
|
|
func entryOffKey() uint32 {
|
|
return runtimeHashWithCustomSalt([]byte("entryOffKey"))
|
|
}
|
|
|
|
func hashWithPackage(pkg *listedPackage, name string) string {
|
|
// If the user provided us with an obfuscation seed,
|
|
// we use that with the package import path directly..
|
|
// Otherwise, we use GarbleActionID as a fallback salt.
|
|
if !flagSeed.present() {
|
|
return hashWithCustomSalt(pkg.GarbleActionID[:], name)
|
|
}
|
|
// Use a separator at the end of ImportPath as a salt,
|
|
// to ensure that "pkgfoo.bar" and "pkg.foobar" don't both hash
|
|
// as the same string "pkgfoobar".
|
|
return hashWithCustomSalt([]byte(pkg.ImportPath+"|"), name)
|
|
}
|
|
|
|
// stripStructTags takes the bytes produced by [types.WriteType]
|
|
// and removes any struct tags in-place, such as rewriting
|
|
//
|
|
// struct{Foo int; Bar string "json:\"bar\""}
|
|
//
|
|
// into
|
|
//
|
|
// struct{Foo int; Bar string}
|
|
//
|
|
// Note that, unlike most Go source, WriteType uses double quotes for tags.
|
|
//
|
|
// Reusing WriteType does require a second pass over its output here,
|
|
// which we could save by implementing our own modified version of WriteType.
|
|
// However, that would be a significant amount of code to maintain.
|
|
func stripStructTags(p []byte) []byte {
|
|
i := 0
|
|
for i < len(p) {
|
|
b := p[i]
|
|
start := i - 1 // a struct tag is preceded by a space
|
|
i++
|
|
if b != '"' {
|
|
continue
|
|
}
|
|
// Find the closing double quote, skipping over escaped characters.
|
|
// Note that we should probably iterate over runes and not bytes,
|
|
// but this byte implementation is probably good enough in practice.
|
|
for {
|
|
b = p[i]
|
|
i++
|
|
if b == '\\' {
|
|
i++
|
|
} else if b == '"' {
|
|
break
|
|
}
|
|
}
|
|
end := i
|
|
// Remove the bytes between start and end,
|
|
// and reset i to start, since we just shortened p.
|
|
p = append(p[:start], p[end:]...)
|
|
i = start
|
|
}
|
|
return p
|
|
}
|
|
|
|
var typeIdentityBuf bytes.Buffer
|
|
|
|
// hashWithStruct is separate from hashWithPackage since Go
|
|
// allows converting between struct types across packages.
|
|
// Hashing struct field names differently between packages would break that.
|
|
//
|
|
// We hash field names with the identity struct type as a salt
|
|
// so that the same field name used in different struct types is obfuscated differently.
|
|
// Note that "identity" means omitting struct tags since conversions ignore them.
|
|
func hashWithStruct(strct *types.Struct, field *types.Var) string {
|
|
typeIdentityBuf.Reset()
|
|
types.WriteType(&typeIdentityBuf, strct, nil)
|
|
salt := stripStructTags(typeIdentityBuf.Bytes())
|
|
|
|
// If the user provided us with an obfuscation seed,
|
|
// we only use the identity struct type as a salt.
|
|
// Otherwise, we add garble's own inputs to the salt as a fallback.
|
|
if !flagSeed.present() {
|
|
withGarbleHash := addGarbleToHash(salt)
|
|
salt = withGarbleHash[:]
|
|
}
|
|
return hashWithCustomSalt(salt, field.Name())
|
|
}
|
|
|
|
// minHashLength and maxHashLength define the range for the number of base64
|
|
// characters to use for the final hashed name.
|
|
//
|
|
// minHashLength needs to be long enough to realistically avoid hash collisions,
|
|
// but maxHashLength should be short enough to not bloat binary sizes.
|
|
// The namespace for collisions is generally a single package, since
|
|
// that's where most hashed names are namespaced to.
|
|
//
|
|
// Using a "hash collision" formula, and taking a generous estimate of a
|
|
// package having 10k names, we get the following probabilities.
|
|
// Most packages will have far fewer names, but some packages are huge,
|
|
// especially generated ones.
|
|
//
|
|
// We also have slightly fewer bits in practice, since the base64
|
|
// charset has 'z' twice, and the first base64 char is coerced into a
|
|
// valid Go identifier. So we must be conservative.
|
|
// Remember that base64 stores 6 bits per encoded byte.
|
|
// The probability numbers are approximated.
|
|
//
|
|
// length (base64) | length (bits) | collision probability
|
|
// -------------------------------------------------------
|
|
// 4 24 ~95%
|
|
// 5 30 ~4%
|
|
// 6 36 ~0.07%
|
|
// 7 42 ~0.001%
|
|
// 8 48 ~0.00001%
|
|
//
|
|
// We want collisions to be practically impossible, so the hashed names end up
|
|
// with lengths evenly distributed between 6 and 12. Naively, this results in an
|
|
// average length of 9, which has a chance well below 1 in a million even when a
|
|
// package has thousands of obfuscated names.
|
|
//
|
|
// These numbers are also chosen to keep obfuscated binary sizes reasonable.
|
|
// For example, increasing the average length of 9 by 1 results in roughly a 1%
|
|
// increase in binary sizes.
|
|
const (
|
|
minHashLength = 6
|
|
maxHashLength = 12
|
|
|
|
// At most we'll need maxHashLength base64 characters,
|
|
// so 9 checksum bytes are enough for that purpose,
|
|
// which is nameBase64.DecodedLen(12) being rounded up.
|
|
neededSumBytes = 9
|
|
)
|
|
|
|
// hashWithCustomSalt returns a hashed version of name,
|
|
// including the provided salt as well as opts.Seed into the hash input.
|
|
//
|
|
// The result is always four bytes long. If the input was a valid identifier,
|
|
// the output remains equally exported or unexported. Note that this process is
|
|
// reproducible, but not reversible.
|
|
func hashWithCustomSalt(salt []byte, name string) string {
|
|
if len(salt) == 0 {
|
|
panic("hashWithCustomSalt: empty salt")
|
|
}
|
|
if name == "" {
|
|
panic("hashWithCustomSalt: empty name")
|
|
}
|
|
|
|
hasher.Reset()
|
|
hasher.Write(salt)
|
|
hasher.Write(flagSeed.bytes)
|
|
io.WriteString(hasher, name)
|
|
sum := hasher.Sum(sumBuffer[:0])
|
|
|
|
// The byte after neededSumBytes is never used as part of the name,
|
|
// but it is still deterministic and hard to predict,
|
|
// so it provides us with useful randomness between 0 and 255.
|
|
// We want the number to be between 0 and hashLenthRange-1 as well,
|
|
// so we use a remainder operation.
|
|
hashLengthRandomness := sum[neededSumBytes] % ((maxHashLength - minHashLength) + 1)
|
|
hashLength := minHashLength + hashLengthRandomness
|
|
|
|
nameBase64.Encode(b64NameBuffer[:], sum[:neededSumBytes])
|
|
b64Name := b64NameBuffer[:hashLength]
|
|
|
|
// Even if we are hashing a package path, which is not an identifier,
|
|
// we still want the result to be a valid identifier,
|
|
// since we'll use it as the package name too.
|
|
if isDigit(b64Name[0]) {
|
|
// Turn "3foo" into "Dfoo".
|
|
// Similar to toLower, since uppercase letters go after digits
|
|
// in the ASCII table.
|
|
b64Name[0] += 'A' - '0'
|
|
}
|
|
for i, b := range b64Name {
|
|
if b == '-' { // URL encoding uses dashes, which aren't valid
|
|
b64Name[i] = 'a'
|
|
}
|
|
}
|
|
// Valid identifiers should stay exported or unexported.
|
|
if token.IsIdentifier(name) {
|
|
if token.IsExported(name) {
|
|
if b64Name[0] == '_' {
|
|
// Turn "_foo" into "Zfoo".
|
|
b64Name[0] = 'Z'
|
|
} else if isLower(b64Name[0]) {
|
|
// Turn "afoo" into "Afoo".
|
|
b64Name[0] = toUpper(b64Name[0])
|
|
}
|
|
} else if isUpper(b64Name[0]) {
|
|
// Turn "Afoo" into "afoo".
|
|
b64Name[0] = toLower(b64Name[0])
|
|
}
|
|
}
|
|
return string(b64Name)
|
|
}
|