Files
garble/position.go
Paul Scheduikat 926f3de60d obfuscate all names used in reflection
Go code can retrieve and use field and method names via the `reflect` package.
For that reason, historically we did not obfuscate names of fields and methods
underneath types that we detected as used for reflection, via e.g. `reflect.TypeOf`.

However, that caused a number of issues. Since we obfuscate and build one package
at a time, we could only detect when types were used for reflection in their own package
or in upstream packages. Use of reflection in downstream packages would be detected
too late, causing one package to obfuscate the names and the other not to, leading to a build failure.

A different approach is implemented here. All names are obfuscated now, but we collect
those types used for reflection, and at the end of a build in `package main`,
we inject a function into the runtime's `internal/abi` package to reverse the obfuscation
for those names which can be used for reflection.

This does mean that the obfuscation for these names is very weak, as the binary
contains a one-to-one mapping to their original names, but they cannot be obfuscated
without breaking too many Go packages out in the wild. There is also some amount
of overhead in `internal/abi` due to this, but we aim to make the overhead insignificant.

Fixes #884, #799, #817, #881, #858, #843, #842

Closes #406
2024-11-27 22:38:43 +01:00

148 lines
4.8 KiB
Go

// Copyright (c) 2020, The Garble Authors.
// See LICENSE for licensing information.
package main
import (
"bytes"
"fmt"
"go/ast"
"go/printer"
"go/scanner"
"go/token"
"path/filepath"
"strings"
)
var printBuf1, printBuf2 bytes.Buffer
// printFile prints a Go file to a buffer, while also removing non-directive
// comments and adding extra compiler directives to obfuscate position information.
func printFile(lpkg *listedPackage, file *ast.File) ([]byte, error) {
if lpkg.ToObfuscate {
// Omit comments from the final Go code.
// Keep directives, as they affect the build.
// We do this before printing to print fewer bytes below.
var newComments []*ast.CommentGroup
for _, group := range file.Comments {
var newGroup ast.CommentGroup
for _, comment := range group.List {
if strings.HasPrefix(comment.Text, "//go:") {
newGroup.List = append(newGroup.List, comment)
}
}
if len(newGroup.List) > 0 {
newComments = append(newComments, &newGroup)
}
}
file.Comments = newComments
}
printBuf1.Reset()
printConfig := printer.Config{Mode: printer.RawFormat}
if err := printConfig.Fprint(&printBuf1, fset, file); err != nil {
return nil, err
}
src := printBuf1.Bytes()
if !lpkg.ToObfuscate {
// We lightly transform packages which shouldn't be obfuscated,
// such as when rewriting go:linkname directives to obfuscated packages.
// We still need to print the files, but without obfuscating positions.
return src, nil
}
fsetFile := fset.File(file.Pos())
filename := filepath.Base(fsetFile.Name())
newPrefix := ""
if strings.HasPrefix(filename, "_cgo_") {
newPrefix = "_cgo_"
}
// Many parts of garble, notably the literal obfuscator, modify the AST.
// Unfortunately, comments are free-floating in File.Comments,
// and those are the only source of truth that go/printer uses.
// So the positions of the comments in the given file are wrong.
// The only way we can get the final ones is to tokenize again.
// Using go/scanner is slightly awkward, but cheaper than parsing again.
// We want to use the original positions for the hashed positions.
// Since later we'll iterate on tokens rather than walking an AST,
// we use a list of offsets indexed by identifiers in source order.
var origCallOffsets []int
nextOffset := -1
ast.Inspect(file, func(node ast.Node) bool {
switch node := node.(type) {
case *ast.CallExpr:
nextOffset = fsetFile.Position(node.Pos()).Offset
case *ast.Ident:
origCallOffsets = append(origCallOffsets, nextOffset)
nextOffset = -1
}
return true
})
copied := 0
printBuf2.Reset()
// Make sure the entire file gets a zero filename by default,
// in case we miss any positions below.
// We use a //-style comment, because there might be build tags.
fmt.Fprintf(&printBuf2, "//line %s:1\n", newPrefix)
// We use an empty filename when tokenizing below.
// We use a nil go/scanner.ErrorHandler because src comes from go/printer.
// Syntax errors should be rare, and when they do happen,
// we don't want to point to the original source file on disk.
// That would be confusing, as we've changed the source in memory.
var s scanner.Scanner
fsetFile = fset.AddFile("", fset.Base(), len(src))
s.Init(fsetFile, src, nil, scanner.ScanComments)
identIndex := 0
for {
pos, tok, lit := s.Scan()
switch tok {
case token.EOF:
// Copy the rest and return.
printBuf2.Write(src[copied:])
return printBuf2.Bytes(), nil
case token.COMMENT:
// Omit comments from the final Go code, again.
// Before we removed the comments from file.Comments,
// but go/printer also grabs comments from some Doc ast.Node fields.
// TODO: is there an easy way to filter all comments at once?
if strings.HasPrefix(lit, "//go:") {
continue // directives are kept
}
offset := fsetFile.Position(pos).Offset
printBuf2.Write(src[copied:offset])
copied = offset + len(lit)
case token.IDENT:
origOffset := origCallOffsets[identIndex]
identIndex++
if origOffset == -1 {
continue // identifiers which don't start func calls are left untouched
}
newName := ""
if !flagTiny {
origPos := fmt.Sprintf("%s:%d", filename, origOffset)
newName = hashWithPackage(lpkg, origPos) + ".go"
// log.Printf("%q hashed with %x to %q", origPos, curPkg.GarbleActionID, newName)
}
offset := fsetFile.Position(pos).Offset
printBuf2.Write(src[copied:offset])
copied = offset
// We use the "/*text*/" form, since we can use multiple of them
// on a single line, and they don't require extra newlines.
// Make sure there is whitespace at either side of a comment.
// Otherwise, we could change the syntax of the program.
// Inserting "/*text*/" in "a/b" // must be "a/ /*text*/ b",
// as "a//*text*/b" is tokenized as a "//" comment.
fmt.Fprintf(&printBuf2, " /*line %s%s:1*/ ", newPrefix, newName)
}
}
}