feat: add pprof

This commit is contained in:
langhuihui
2024-12-16 20:06:39 +08:00
parent c1616740ec
commit b3a3e37429
220 changed files with 36494 additions and 56 deletions

View File

@@ -0,0 +1,238 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package binutils
import (
"bufio"
"fmt"
"io"
"os/exec"
"strconv"
"strings"
"sync"
"m7s.live/v5/plugin/debug/pkg/internal/plugin"
)
const (
defaultAddr2line = "addr2line"
// addr2line may produce multiple lines of output. We
// use this sentinel to identify the end of the output.
sentinel = ^uint64(0)
)
// addr2Liner is a connection to an addr2line command for obtaining
// address and line number information from a binary.
type addr2Liner struct {
mu sync.Mutex
rw lineReaderWriter
base uint64
// nm holds an addr2Liner using nm tool. Certain versions of addr2line
// produce incomplete names due to
// https://sourceware.org/bugzilla/show_bug.cgi?id=17541. As a workaround,
// the names from nm are used when they look more complete. See addrInfo()
// code below for the exact heuristic.
nm *addr2LinerNM
}
// lineReaderWriter is an interface to abstract the I/O to an addr2line
// process. It writes a line of input to the job, and reads its output
// one line at a time.
type lineReaderWriter interface {
write(string) error
readLine() (string, error)
close()
}
type addr2LinerJob struct {
cmd *exec.Cmd
in io.WriteCloser
out *bufio.Reader
}
func (a *addr2LinerJob) write(s string) error {
_, err := fmt.Fprint(a.in, s+"\n")
return err
}
func (a *addr2LinerJob) readLine() (string, error) {
s, err := a.out.ReadString('\n')
if err != nil {
return "", err
}
return strings.TrimSpace(s), nil
}
// close releases any resources used by the addr2liner object.
func (a *addr2LinerJob) close() {
a.in.Close()
a.cmd.Wait()
}
// newAddr2Liner starts the given addr2liner command reporting
// information about the given executable file. If file is a shared
// library, base should be the address at which it was mapped in the
// program under consideration.
func newAddr2Liner(cmd, file string, base uint64) (*addr2Liner, error) {
if cmd == "" {
cmd = defaultAddr2line
}
j := &addr2LinerJob{
cmd: exec.Command(cmd, "-aif", "-e", file),
}
var err error
if j.in, err = j.cmd.StdinPipe(); err != nil {
return nil, err
}
outPipe, err := j.cmd.StdoutPipe()
if err != nil {
return nil, err
}
j.out = bufio.NewReader(outPipe)
if err := j.cmd.Start(); err != nil {
return nil, err
}
a := &addr2Liner{
rw: j,
base: base,
}
return a, nil
}
// readFrame parses the addr2line output for a single address. It
// returns a populated plugin.Frame and whether it has reached the end of the
// data.
func (d *addr2Liner) readFrame() (plugin.Frame, bool) {
funcname, err := d.rw.readLine()
if err != nil {
return plugin.Frame{}, true
}
if strings.HasPrefix(funcname, "0x") {
// If addr2line returns a hex address we can assume it is the
// sentinel. Read and ignore next two lines of output from
// addr2line
d.rw.readLine()
d.rw.readLine()
return plugin.Frame{}, true
}
fileline, err := d.rw.readLine()
if err != nil {
return plugin.Frame{}, true
}
linenumber := 0
if funcname == "??" {
funcname = ""
}
if fileline == "??:0" {
fileline = ""
} else {
if i := strings.LastIndex(fileline, ":"); i >= 0 {
// Remove discriminator, if present
if disc := strings.Index(fileline, " (discriminator"); disc > 0 {
fileline = fileline[:disc]
}
// If we cannot parse a number after the last ":", keep it as
// part of the filename.
if line, err := strconv.Atoi(fileline[i+1:]); err == nil {
linenumber = line
fileline = fileline[:i]
}
}
}
return plugin.Frame{
Func: funcname,
File: fileline,
Line: linenumber}, false
}
func (d *addr2Liner) rawAddrInfo(addr uint64) ([]plugin.Frame, error) {
d.mu.Lock()
defer d.mu.Unlock()
if err := d.rw.write(fmt.Sprintf("%x", addr-d.base)); err != nil {
return nil, err
}
if err := d.rw.write(fmt.Sprintf("%x", sentinel)); err != nil {
return nil, err
}
resp, err := d.rw.readLine()
if err != nil {
return nil, err
}
if !strings.HasPrefix(resp, "0x") {
return nil, fmt.Errorf("unexpected addr2line output: %s", resp)
}
var stack []plugin.Frame
for {
frame, end := d.readFrame()
if end {
break
}
if frame != (plugin.Frame{}) {
stack = append(stack, frame)
}
}
return stack, err
}
// addrInfo returns the stack frame information for a specific program
// address. It returns nil if the address could not be identified.
func (d *addr2Liner) addrInfo(addr uint64) ([]plugin.Frame, error) {
stack, err := d.rawAddrInfo(addr)
if err != nil {
return nil, err
}
// Certain versions of addr2line produce incomplete names due to
// https://sourceware.org/bugzilla/show_bug.cgi?id=17541. Attempt to replace
// the name with a better one from nm.
if len(stack) > 0 && d.nm != nil {
nm, err := d.nm.addrInfo(addr)
if err == nil && len(nm) > 0 {
// Last entry in frame list should match since it is non-inlined. As a
// simple heuristic, we only switch to the nm-based name if it is longer
// by 2 or more characters. We consider nm names that are longer by 1
// character insignificant to avoid replacing foo with _foo on MacOS (for
// unknown reasons read2line produces the former and nm produces the
// latter on MacOS even though both tools are asked to produce mangled
// names).
nmName := nm[len(nm)-1].Func
a2lName := stack[len(stack)-1].Func
if len(nmName) > len(a2lName)+1 {
stack[len(stack)-1].Func = nmName
}
}
}
return stack, nil
}

View File

@@ -0,0 +1,184 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package binutils
import (
"bufio"
"encoding/json"
"fmt"
"io"
"os/exec"
"strconv"
"strings"
"sync"
"m7s.live/v5/plugin/debug/pkg/internal/plugin"
)
const (
defaultLLVMSymbolizer = "llvm-symbolizer"
)
// llvmSymbolizer is a connection to an llvm-symbolizer command for
// obtaining address and line number information from a binary.
type llvmSymbolizer struct {
sync.Mutex
filename string
rw lineReaderWriter
base uint64
isData bool
}
type llvmSymbolizerJob struct {
cmd *exec.Cmd
in io.WriteCloser
out *bufio.Reader
// llvm-symbolizer requires the symbol type, CODE or DATA, for symbolization.
symType string
}
func (a *llvmSymbolizerJob) write(s string) error {
_, err := fmt.Fprintln(a.in, a.symType, s)
return err
}
func (a *llvmSymbolizerJob) readLine() (string, error) {
s, err := a.out.ReadString('\n')
if err != nil {
return "", err
}
return strings.TrimSpace(s), nil
}
// close releases any resources used by the llvmSymbolizer object.
func (a *llvmSymbolizerJob) close() {
a.in.Close()
a.cmd.Wait()
}
// newLLVMSymbolizer starts the given llvmSymbolizer command reporting
// information about the given executable file. If file is a shared
// library, base should be the address at which it was mapped in the
// program under consideration.
func newLLVMSymbolizer(cmd, file string, base uint64, isData bool) (*llvmSymbolizer, error) {
if cmd == "" {
cmd = defaultLLVMSymbolizer
}
j := &llvmSymbolizerJob{
cmd: exec.Command(cmd, "--inlining", "-demangle=false", "--output-style=JSON"),
symType: "CODE",
}
if isData {
j.symType = "DATA"
}
var err error
if j.in, err = j.cmd.StdinPipe(); err != nil {
return nil, err
}
outPipe, err := j.cmd.StdoutPipe()
if err != nil {
return nil, err
}
j.out = bufio.NewReader(outPipe)
if err := j.cmd.Start(); err != nil {
return nil, err
}
a := &llvmSymbolizer{
filename: file,
rw: j,
base: base,
isData: isData,
}
return a, nil
}
// readDataFrames parses the llvm-symbolizer DATA output for a single address. It
// returns a populated plugin.Frame array with a single entry.
func (d *llvmSymbolizer) readDataFrames() ([]plugin.Frame, error) {
line, err := d.rw.readLine()
if err != nil {
return nil, err
}
var frame struct {
Address string `json:"Address"`
ModuleName string `json:"ModuleName"`
Data struct {
Start string `json:"Start"`
Size string `json:"Size"`
Name string `json:"Name"`
} `json:"Data"`
}
if err := json.Unmarshal([]byte(line), &frame); err != nil {
return nil, err
}
// Match non-JSON output behaviour of stuffing the start/size into the filename of a single frame,
// with the size being a decimal value.
size, err := strconv.ParseInt(frame.Data.Size, 0, 0)
if err != nil {
return nil, err
}
var stack []plugin.Frame
stack = append(stack, plugin.Frame{Func: frame.Data.Name, File: fmt.Sprintf("%s %d", frame.Data.Start, size)})
return stack, nil
}
// readCodeFrames parses the llvm-symbolizer CODE output for a single address. It
// returns a populated plugin.Frame array.
func (d *llvmSymbolizer) readCodeFrames() ([]plugin.Frame, error) {
line, err := d.rw.readLine()
if err != nil {
return nil, err
}
var frame struct {
Address string `json:"Address"`
ModuleName string `json:"ModuleName"`
Symbol []struct {
Line int `json:"Line"`
Column int `json:"Column"`
FunctionName string `json:"FunctionName"`
FileName string `json:"FileName"`
StartLine int `json:"StartLine"`
} `json:"Symbol"`
}
if err := json.Unmarshal([]byte(line), &frame); err != nil {
return nil, err
}
var stack []plugin.Frame
for _, s := range frame.Symbol {
stack = append(stack, plugin.Frame{Func: s.FunctionName, File: s.FileName, Line: s.Line, Column: s.Column, StartLine: s.StartLine})
}
return stack, nil
}
// addrInfo returns the stack frame information for a specific program
// address. It returns nil if the address could not be identified.
func (d *llvmSymbolizer) addrInfo(addr uint64) ([]plugin.Frame, error) {
d.Lock()
defer d.Unlock()
if err := d.rw.write(fmt.Sprintf("%s 0x%x", d.filename, addr-d.base)); err != nil {
return nil, err
}
if d.isData {
return d.readDataFrames()
}
return d.readCodeFrames()
}

View File

@@ -0,0 +1,144 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package binutils
import (
"bufio"
"bytes"
"io"
"os/exec"
"strconv"
"strings"
"m7s.live/v5/plugin/debug/pkg/internal/plugin"
)
const (
defaultNM = "nm"
)
// addr2LinerNM is a connection to an nm command for obtaining symbol
// information from a binary.
type addr2LinerNM struct {
m []symbolInfo // Sorted list of symbol addresses from binary.
}
type symbolInfo struct {
address uint64
size uint64
name string
symType string
}
// isData returns if the symbol has a known data object symbol type.
func (s *symbolInfo) isData() bool {
// The following symbol types are taken from https://linux.die.net/man/1/nm:
// Lowercase letter means local symbol, uppercase denotes a global symbol.
// - b or B: the symbol is in the uninitialized data section, e.g. .bss;
// - d or D: the symbol is in the initialized data section;
// - r or R: the symbol is in a read only data section;
// - v or V: the symbol is a weak object;
// - W: the symbol is a weak symbol that has not been specifically tagged as a
// weak object symbol. Experiments with some binaries, showed these to be
// mostly data objects.
return strings.ContainsAny(s.symType, "bBdDrRvVW")
}
// newAddr2LinerNM starts the given nm command reporting information about the
// given executable file. If file is a shared library, base should be the
// address at which it was mapped in the program under consideration.
func newAddr2LinerNM(cmd, file string, base uint64) (*addr2LinerNM, error) {
if cmd == "" {
cmd = defaultNM
}
var b bytes.Buffer
c := exec.Command(cmd, "--numeric-sort", "--print-size", "--format=posix", file)
c.Stdout = &b
if err := c.Run(); err != nil {
return nil, err
}
return parseAddr2LinerNM(base, &b)
}
func parseAddr2LinerNM(base uint64, nm io.Reader) (*addr2LinerNM, error) {
a := &addr2LinerNM{
m: []symbolInfo{},
}
// Parse nm output and populate symbol map.
// Skip lines we fail to parse.
buf := bufio.NewReader(nm)
for {
line, err := buf.ReadString('\n')
if line == "" && err != nil {
if err == io.EOF {
break
}
return nil, err
}
line = strings.TrimSpace(line)
fields := strings.Split(line, " ")
if len(fields) != 4 {
continue
}
address, err := strconv.ParseUint(fields[2], 16, 64)
if err != nil {
continue
}
size, err := strconv.ParseUint(fields[3], 16, 64)
if err != nil {
continue
}
a.m = append(a.m, symbolInfo{
address: address + base,
size: size,
name: fields[0],
symType: fields[1],
})
}
return a, nil
}
// addrInfo returns the stack frame information for a specific program
// address. It returns nil if the address could not be identified.
func (a *addr2LinerNM) addrInfo(addr uint64) ([]plugin.Frame, error) {
if len(a.m) == 0 || addr < a.m[0].address || addr >= (a.m[len(a.m)-1].address+a.m[len(a.m)-1].size) {
return nil, nil
}
// Binary search. Search until low, high are separated by 1.
low, high := 0, len(a.m)
for low+1 < high {
mid := (low + high) / 2
v := a.m[mid].address
if addr == v {
low = mid
break
} else if addr > v {
low = mid
} else {
high = mid
}
}
// Address is between a.m[low] and a.m[high]. Pick low, as it represents
// [low, high). For data symbols, we use a strict check that the address is in
// the [start, start + size) range of a.m[low].
if a.m[low].isData() && addr >= (a.m[low].address+a.m[low].size) {
return nil, nil
}
return []plugin.Frame{{Func: a.m[low].name}}, nil
}

View File

@@ -0,0 +1,736 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package binutils provides access to the GNU binutils.
package binutils
import (
"debug/elf"
"debug/macho"
"debug/pe"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"m7s.live/v5/plugin/debug/pkg/internal/elfexec"
"m7s.live/v5/plugin/debug/pkg/internal/plugin"
)
// A Binutils implements plugin.ObjTool by invoking the GNU binutils.
type Binutils struct {
mu sync.Mutex
rep *binrep
}
var (
objdumpLLVMVerRE = regexp.MustCompile(`LLVM version (?:(\d*)\.(\d*)\.(\d*)|.*(trunk).*)`)
// Defined for testing
elfOpen = elf.Open
)
// binrep is an immutable representation for Binutils. It is atomically
// replaced on every mutation to provide thread-safe access.
type binrep struct {
// Commands to invoke.
llvmSymbolizer string
llvmSymbolizerFound bool
addr2line string
addr2lineFound bool
nm string
nmFound bool
objdump string
objdumpFound bool
isLLVMObjdump bool
// if fast, perform symbolization using nm (symbol names only),
// instead of file-line detail from the slower addr2line.
fast bool
}
// get returns the current representation for bu, initializing it if necessary.
func (bu *Binutils) get() *binrep {
bu.mu.Lock()
r := bu.rep
if r == nil {
r = &binrep{}
initTools(r, "")
bu.rep = r
}
bu.mu.Unlock()
return r
}
// update modifies the rep for bu via the supplied function.
func (bu *Binutils) update(fn func(r *binrep)) {
r := &binrep{}
bu.mu.Lock()
defer bu.mu.Unlock()
if bu.rep == nil {
initTools(r, "")
} else {
*r = *bu.rep
}
fn(r)
bu.rep = r
}
// String returns string representation of the binutils state for debug logging.
func (bu *Binutils) String() string {
r := bu.get()
var llvmSymbolizer, addr2line, nm, objdump string
if r.llvmSymbolizerFound {
llvmSymbolizer = r.llvmSymbolizer
}
if r.addr2lineFound {
addr2line = r.addr2line
}
if r.nmFound {
nm = r.nm
}
if r.objdumpFound {
objdump = r.objdump
}
return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
llvmSymbolizer, addr2line, nm, objdump, r.fast)
}
// SetFastSymbolization sets a toggle that makes binutils use fast
// symbolization (using nm), which is much faster than addr2line but
// provides only symbol name information (no file/line).
func (bu *Binutils) SetFastSymbolization(fast bool) {
bu.update(func(r *binrep) { r.fast = fast })
}
// SetTools processes the contents of the tools option. It
// expects a set of entries separated by commas; each entry is a pair
// of the form t:path, where cmd will be used to look only for the
// tool named t. If t is not specified, the path is searched for all
// tools.
func (bu *Binutils) SetTools(config string) {
bu.update(func(r *binrep) { initTools(r, config) })
}
func initTools(b *binrep, config string) {
// paths collect paths per tool; Key "" contains the default.
paths := make(map[string][]string)
for _, t := range strings.Split(config, ",") {
name, path := "", t
if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
name, path = ct[0], ct[1]
}
paths[name] = append(paths[name], path)
}
defaultPath := paths[""]
b.llvmSymbolizer, b.llvmSymbolizerFound = chooseExe([]string{"llvm-symbolizer"}, []string{}, append(paths["llvm-symbolizer"], defaultPath...))
b.addr2line, b.addr2lineFound = chooseExe([]string{"addr2line"}, []string{"gaddr2line"}, append(paths["addr2line"], defaultPath...))
// The "-n" option is supported by LLVM since 2011. The output of llvm-nm
// and GNU nm with "-n" option is interchangeable for our purposes, so we do
// not need to differrentiate them.
b.nm, b.nmFound = chooseExe([]string{"llvm-nm", "nm"}, []string{"gnm"}, append(paths["nm"], defaultPath...))
b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
}
// findObjdump finds and returns path to preferred objdump binary.
// Order of preference is: llvm-objdump, objdump.
// On MacOS only, also looks for gobjdump with least preference.
// Accepts a list of paths and returns:
// a string with path to the preferred objdump binary if found,
// or an empty string if not found;
// a boolean if any acceptable objdump was found;
// a boolean indicating if it is an LLVM objdump.
func findObjdump(paths []string) (string, bool, bool) {
objdumpNames := []string{"llvm-objdump", "objdump"}
if runtime.GOOS == "darwin" {
objdumpNames = append(objdumpNames, "gobjdump")
}
for _, objdumpName := range objdumpNames {
if objdump, objdumpFound := findExe(objdumpName, paths); objdumpFound {
cmdOut, err := exec.Command(objdump, "--version").Output()
if err != nil {
continue
}
if isLLVMObjdump(string(cmdOut)) {
return objdump, true, true
}
if isBuObjdump(string(cmdOut)) {
return objdump, true, false
}
}
}
return "", false, false
}
// chooseExe finds and returns path to preferred binary. names is a list of
// names to search on both Linux and OSX. osxNames is a list of names specific
// to OSX. names always has a higher priority than osxNames. The order of
// the name within each list decides its priority (e.g. the first name has a
// higher priority than the second name in the list).
//
// It returns a string with path to the binary and a boolean indicating if any
// acceptable binary was found.
func chooseExe(names, osxNames []string, paths []string) (string, bool) {
if runtime.GOOS == "darwin" {
names = append(names, osxNames...)
}
for _, name := range names {
if binary, found := findExe(name, paths); found {
return binary, true
}
}
return "", false
}
// isLLVMObjdump accepts a string with path to an objdump binary,
// and returns a boolean indicating if the given binary is an LLVM
// objdump binary of an acceptable version.
func isLLVMObjdump(output string) bool {
fields := objdumpLLVMVerRE.FindStringSubmatch(output)
if len(fields) != 5 {
return false
}
if fields[4] == "trunk" {
return true
}
verMajor, err := strconv.Atoi(fields[1])
if err != nil {
return false
}
verPatch, err := strconv.Atoi(fields[3])
if err != nil {
return false
}
if runtime.GOOS == "linux" && verMajor >= 8 {
// Ensure LLVM objdump is at least version 8.0 on Linux.
// Some flags, like --demangle, and double dashes for options are
// not supported by previous versions.
return true
}
if runtime.GOOS == "darwin" {
// Ensure LLVM objdump is at least version 10.0.1 on MacOS.
return verMajor > 10 || (verMajor == 10 && verPatch >= 1)
}
return false
}
// isBuObjdump accepts a string with path to an objdump binary,
// and returns a boolean indicating if the given binary is a GNU
// binutils objdump binary. No version check is performed.
func isBuObjdump(output string) bool {
return strings.Contains(output, "GNU objdump")
}
// findExe looks for an executable command on a set of paths.
// If it cannot find it, returns cmd.
func findExe(cmd string, paths []string) (string, bool) {
for _, p := range paths {
cp := filepath.Join(p, cmd)
if c, err := exec.LookPath(cp); err == nil {
return c, true
}
}
return cmd, false
}
// Disasm returns the assembly instructions for the specified address range
// of a binary.
func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
b := bu.get()
if !b.objdumpFound {
return nil, errors.New("cannot disasm: no objdump tool available")
}
args := []string{"--disassemble", "--demangle", "--no-show-raw-insn",
"--line-numbers", fmt.Sprintf("--start-address=%#x", start),
fmt.Sprintf("--stop-address=%#x", end)}
if intelSyntax {
if b.isLLVMObjdump {
args = append(args, "--x86-asm-syntax=intel")
} else {
args = append(args, "-M", "intel")
}
}
args = append(args, file)
cmd := exec.Command(b.objdump, args...)
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("%v: %v", cmd.Args, err)
}
return disassemble(out)
}
// Open satisfies the plugin.ObjTool interface.
func (bu *Binutils) Open(name string, start, limit, offset uint64, relocationSymbol string) (plugin.ObjFile, error) {
b := bu.get()
// Make sure file is a supported executable.
// This uses magic numbers, mainly to provide better error messages but
// it should also help speed.
if _, err := os.Stat(name); err != nil {
// For testing, do not require file name to exist.
if strings.Contains(b.addr2line, "testdata/") {
return &fileAddr2Line{file: file{b: b, name: name}}, nil
}
return nil, err
}
// Read the first 4 bytes of the file.
f, err := os.Open(name)
if err != nil {
return nil, fmt.Errorf("error opening %s: %v", name, err)
}
defer f.Close()
var header [4]byte
if _, err = io.ReadFull(f, header[:]); err != nil {
return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
}
elfMagic := string(header[:])
// Match against supported file types.
if elfMagic == elf.ELFMAG {
f, err := b.openELF(name, start, limit, offset, relocationSymbol)
if err != nil {
return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
}
return f, nil
}
// Mach-O magic numbers can be big or little endian.
machoMagicLittle := binary.LittleEndian.Uint32(header[:])
machoMagicBig := binary.BigEndian.Uint32(header[:])
if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
f, err := b.openMachO(name, start, limit, offset)
if err != nil {
return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
}
return f, nil
}
if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
f, err := b.openFatMachO(name, start, limit, offset)
if err != nil {
return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
}
return f, nil
}
peMagic := string(header[:2])
if peMagic == "MZ" {
f, err := b.openPE(name, start, limit, offset)
if err != nil {
return nil, fmt.Errorf("error reading PE file %s: %v", name, err)
}
return f, nil
}
return nil, fmt.Errorf("unrecognized binary format: %s", name)
}
func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {
// Subtract the load address of the __TEXT section. Usually 0 for shared
// libraries or 0x100000000 for executables. You can check this value by
// running `objdump -private-headers <file>`.
textSegment := of.Segment("__TEXT")
if textSegment == nil {
return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
}
if textSegment.Addr > start {
return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
name, textSegment.Addr, start)
}
base := start - textSegment.Addr
if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
return &fileNM{file: file{b: b, name: name, base: base}}, nil
}
return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
}
func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
of, err := macho.OpenFat(name)
if err != nil {
return nil, fmt.Errorf("error parsing %s: %v", name, err)
}
defer of.Close()
if len(of.Arches) == 0 {
return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
}
var arch macho.Cpu
// Use the host architecture.
// TODO: This is not ideal because the host architecture may not be the one
// that was profiled. E.g. an amd64 host can profile a 386 program.
switch runtime.GOARCH {
case "386":
arch = macho.Cpu386
case "amd64", "amd64p32":
arch = macho.CpuAmd64
case "arm", "armbe", "arm64", "arm64be":
arch = macho.CpuArm
case "ppc":
arch = macho.CpuPpc
case "ppc64", "ppc64le":
arch = macho.CpuPpc64
default:
return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
}
for i := range of.Arches {
if of.Arches[i].Cpu == arch {
return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
}
}
return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
}
func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
of, err := macho.Open(name)
if err != nil {
return nil, fmt.Errorf("error parsing %s: %v", name, err)
}
defer of.Close()
return b.openMachOCommon(name, of, start, limit, offset)
}
func (b *binrep) openELF(name string, start, limit, offset uint64, relocationSymbol string) (plugin.ObjFile, error) {
ef, err := elfOpen(name)
if err != nil {
return nil, fmt.Errorf("error parsing %s: %v", name, err)
}
defer ef.Close()
buildID := ""
if id, err := elfexec.GetBuildID(ef); err == nil {
buildID = fmt.Sprintf("%x", id)
}
var (
kernelOffset *uint64
pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
)
if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
// Reading all Symbols is expensive, and we only rarely need it so
// we don't want to do it every time. But if _stext happens to be
// page-aligned but isn't the same as Vaddr, we would symbolize
// wrong. So if the name the addresses aren't page aligned, or if
// the name is "vmlinux" we read _stext. We can be wrong if: (1)
// someone passes a kernel path that doesn't contain "vmlinux" AND
// (2) _stext is page-aligned AND (3) _stext is not at Vaddr
symbols, err := ef.Symbols()
if err != nil && err != elf.ErrNoSymbols {
return nil, err
}
// The kernel relocation symbol (the mapping start address) can be either
// _text or _stext. When profiles are generated by `perf`, which one was used is
// distinguished by the mapping name for the kernel image:
// '[kernel.kallsyms]_text' or '[kernel.kallsyms]_stext', respectively. If we haven't
// been able to parse it from the mapping, we default to _stext.
if relocationSymbol == "" {
relocationSymbol = "_stext"
}
for _, s := range symbols {
if s.Name == relocationSymbol {
kernelOffset = &s.Value
break
}
}
}
// Check that we can compute a base for the binary. This may not be the
// correct base value, so we don't save it. We delay computing the actual base
// value until we have a sample address for this mapping, so that we can
// correctly identify the associated program segment that is needed to compute
// the base.
if _, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), kernelOffset, start, limit, offset); err != nil {
return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
}
if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
return &fileNM{file: file{
b: b,
name: name,
buildID: buildID,
m: &elfMapping{start: start, limit: limit, offset: offset, kernelOffset: kernelOffset},
}}, nil
}
return &fileAddr2Line{file: file{
b: b,
name: name,
buildID: buildID,
m: &elfMapping{start: start, limit: limit, offset: offset, kernelOffset: kernelOffset},
}}, nil
}
func (b *binrep) openPE(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
pf, err := pe.Open(name)
if err != nil {
return nil, fmt.Errorf("error parsing %s: %v", name, err)
}
defer pf.Close()
var imageBase uint64
switch h := pf.OptionalHeader.(type) {
case *pe.OptionalHeader32:
imageBase = uint64(h.ImageBase)
case *pe.OptionalHeader64:
imageBase = uint64(h.ImageBase)
default:
return nil, fmt.Errorf("unknown OptionalHeader %T", pf.OptionalHeader)
}
var base uint64
if start > 0 {
base = start - imageBase
}
if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
return &fileNM{file: file{b: b, name: name, base: base}}, nil
}
return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
}
// elfMapping stores the parameters of a runtime mapping that are needed to
// identify the ELF segment associated with a mapping.
type elfMapping struct {
// Runtime mapping parameters.
start, limit, offset uint64
// Offset of kernel relocation symbol. Only defined for kernel images, nil otherwise.
kernelOffset *uint64
}
// findProgramHeader returns the program segment that matches the current
// mapping and the given address, or an error if it cannot find a unique program
// header.
func (m *elfMapping) findProgramHeader(ef *elf.File, addr uint64) (*elf.ProgHeader, error) {
// For user space executables, we try to find the actual program segment that
// is associated with the given mapping. Skip this search if limit <= start.
// We cannot use just a check on the start address of the mapping to tell if
// it's a kernel / .ko module mapping, because with quipper address remapping
// enabled, the address would be in the lower half of the address space.
if m.kernelOffset != nil || m.start >= m.limit || m.limit >= (uint64(1)<<63) {
// For the kernel, find the program segment that includes the .text section.
return elfexec.FindTextProgHeader(ef), nil
}
// Fetch all the loadable segments.
var phdrs []elf.ProgHeader
for i := range ef.Progs {
if ef.Progs[i].Type == elf.PT_LOAD {
phdrs = append(phdrs, ef.Progs[i].ProgHeader)
}
}
// Some ELF files don't contain any loadable program segments, e.g. .ko
// kernel modules. It's not an error to have no header in such cases.
if len(phdrs) == 0 {
return nil, nil
}
// Get all program headers associated with the mapping.
headers := elfexec.ProgramHeadersForMapping(phdrs, m.offset, m.limit-m.start)
if len(headers) == 0 {
return nil, errors.New("no program header matches mapping info")
}
if len(headers) == 1 {
return headers[0], nil
}
// Use the file offset corresponding to the address to symbolize, to narrow
// down the header.
return elfexec.HeaderForFileOffset(headers, addr-m.start+m.offset)
}
// file implements the binutils.ObjFile interface.
type file struct {
b *binrep
name string
buildID string
baseOnce sync.Once // Ensures the base, baseErr and isData are computed once.
base uint64
baseErr error // Any eventual error while computing the base.
isData bool
// Mapping information. Relevant only for ELF files, nil otherwise.
m *elfMapping
}
// computeBase computes the relocation base for the given binary file only if
// the elfMapping field is set. It populates the base and isData fields and
// returns an error.
func (f *file) computeBase(addr uint64) error {
if f == nil || f.m == nil {
return nil
}
if addr < f.m.start || addr >= f.m.limit {
return fmt.Errorf("specified address %x is outside the mapping range [%x, %x] for file %q", addr, f.m.start, f.m.limit, f.name)
}
ef, err := elfOpen(f.name)
if err != nil {
return fmt.Errorf("error parsing %s: %v", f.name, err)
}
defer ef.Close()
ph, err := f.m.findProgramHeader(ef, addr)
if err != nil {
return fmt.Errorf("failed to find program header for file %q, ELF mapping %#v, address %x: %v", f.name, *f.m, addr, err)
}
base, err := elfexec.GetBase(&ef.FileHeader, ph, f.m.kernelOffset, f.m.start, f.m.limit, f.m.offset)
if err != nil {
return err
}
f.base = base
f.isData = ph != nil && ph.Flags&elf.PF_X == 0
return nil
}
func (f *file) Name() string {
return f.name
}
func (f *file) ObjAddr(addr uint64) (uint64, error) {
f.baseOnce.Do(func() { f.baseErr = f.computeBase(addr) })
if f.baseErr != nil {
return 0, f.baseErr
}
return addr - f.base, nil
}
func (f *file) BuildID() string {
return f.buildID
}
func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
f.baseOnce.Do(func() { f.baseErr = f.computeBase(addr) })
if f.baseErr != nil {
return nil, f.baseErr
}
return nil, nil
}
func (f *file) Close() error {
return nil
}
func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
// Get from nm a list of symbols sorted by address.
cmd := exec.Command(f.b.nm, "-n", f.name)
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("%v: %v", cmd.Args, err)
}
return findSymbols(out, f.name, r, addr)
}
// fileNM implements the binutils.ObjFile interface, using 'nm' to map
// addresses to symbols (without file/line number information). It is
// faster than fileAddr2Line.
type fileNM struct {
file
addr2linernm *addr2LinerNM
}
func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
f.baseOnce.Do(func() { f.baseErr = f.computeBase(addr) })
if f.baseErr != nil {
return nil, f.baseErr
}
if f.addr2linernm == nil {
addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
if err != nil {
return nil, err
}
f.addr2linernm = addr2liner
}
return f.addr2linernm.addrInfo(addr)
}
// fileAddr2Line implements the binutils.ObjFile interface, using
// llvm-symbolizer, if that's available, or addr2line to map addresses to
// symbols (with file/line number information). It can be slow for large
// binaries with debug information.
type fileAddr2Line struct {
once sync.Once
file
addr2liner *addr2Liner
llvmSymbolizer *llvmSymbolizer
isData bool
}
func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
f.baseOnce.Do(func() { f.baseErr = f.computeBase(addr) })
if f.baseErr != nil {
return nil, f.baseErr
}
f.once.Do(f.init)
if f.llvmSymbolizer != nil {
return f.llvmSymbolizer.addrInfo(addr)
}
if f.addr2liner != nil {
return f.addr2liner.addrInfo(addr)
}
return nil, fmt.Errorf("could not find local addr2liner")
}
func (f *fileAddr2Line) init() {
if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base, f.isData); err == nil {
f.llvmSymbolizer = llvmSymbolizer
return
}
if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
f.addr2liner = addr2liner
// When addr2line encounters some gcc compiled binaries, it
// drops interesting parts of names in anonymous namespaces.
// Fallback to NM for better function names.
if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
f.addr2liner.nm = nm
}
}
}
func (f *fileAddr2Line) Close() error {
if f.llvmSymbolizer != nil {
f.llvmSymbolizer.rw.close()
f.llvmSymbolizer = nil
}
if f.addr2liner != nil {
f.addr2liner.rw.close()
f.addr2liner = nil
}
return nil
}

View File

@@ -0,0 +1,978 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package binutils
import (
"bytes"
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"math"
"path/filepath"
"reflect"
"regexp"
"runtime"
"strings"
"testing"
"m7s.live/v5/plugin/debug/pkg/internal/plugin"
)
var testAddrMap = map[int]string{
1000: "_Z3fooid.clone2",
2000: "_ZNSaIiEC1Ev.clone18",
3000: "_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm",
}
func functionName(level int) (name string) {
if name = testAddrMap[level]; name != "" {
return name
}
return fmt.Sprintf("fun%d", level)
}
func TestAddr2Liner(t *testing.T) {
const offset = 0x500
a := addr2Liner{rw: &mockAddr2liner{}, base: offset}
for i := 1; i < 8; i++ {
addr := i*0x1000 + offset
s, err := a.addrInfo(uint64(addr))
if err != nil {
t.Fatalf("addrInfo(%#x): %v", addr, err)
}
if len(s) != i {
t.Fatalf("addrInfo(%#x): got len==%d, want %d", addr, len(s), i)
}
for l, f := range s {
level := (len(s) - l) * 1000
want := plugin.Frame{Func: functionName(level), File: fmt.Sprintf("file%d", level), Line: level}
if f != want {
t.Errorf("AddrInfo(%#x)[%d]: = %+v, want %+v", addr, l, f, want)
}
}
}
s, err := a.addrInfo(0xFFFF)
if err != nil {
t.Fatalf("addrInfo(0xFFFF): %v", err)
}
if len(s) != 0 {
t.Fatalf("AddrInfo(0xFFFF): got len==%d, want 0", len(s))
}
a.rw.close()
}
type mockAddr2liner struct {
output []string
}
func (a *mockAddr2liner) write(s string) error {
var lines []string
switch s {
case "1000":
lines = []string{"_Z3fooid.clone2", "file1000:1000"}
case "2000":
lines = []string{"_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
case "3000":
lines = []string{"_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm", "file3000:3000", "_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
case "4000":
lines = []string{"fun4000", "file4000:4000", "_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm", "file3000:3000", "_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
case "5000":
lines = []string{"fun5000", "file5000:5000", "fun4000", "file4000:4000", "_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm", "file3000:3000", "_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
case "6000":
lines = []string{"fun6000", "file6000:6000", "fun5000", "file5000:5000", "fun4000", "file4000:4000", "_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm", "file3000:3000", "_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
case "7000":
lines = []string{"fun7000", "file7000:7000", "fun6000", "file6000:6000", "fun5000", "file5000:5000", "fun4000", "file4000:4000", "_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm", "file3000:3000", "_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
case "8000":
lines = []string{"fun8000", "file8000:8000", "fun7000", "file7000:7000", "fun6000", "file6000:6000", "fun5000", "file5000:5000", "fun4000", "file4000:4000", "_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm", "file3000:3000", "_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
case "9000":
lines = []string{"fun9000", "file9000:9000", "fun8000", "file8000:8000", "fun7000", "file7000:7000", "fun6000", "file6000:6000", "fun5000", "file5000:5000", "fun4000", "file4000:4000", "_ZNSt6vectorIS_IS_IiSaIiEESaIS1_EESaIS3_EEixEm", "file3000:3000", "_ZNSaIiEC1Ev.clone18", "file2000:2000", "_Z3fooid.clone2", "file1000:1000"}
default:
lines = []string{"??", "??:0"}
}
a.output = append(a.output, "0x"+s)
a.output = append(a.output, lines...)
return nil
}
func (a *mockAddr2liner) readLine() (string, error) {
if len(a.output) == 0 {
return "", fmt.Errorf("end of file")
}
next := a.output[0]
a.output = a.output[1:]
return next, nil
}
func (a *mockAddr2liner) close() {
}
func TestAddr2LinerLookup(t *testing.T) {
for _, tc := range []struct {
desc string
nmOutput string
wantSymbolized map[uint64]string
wantUnsymbolized []uint64
}{
{
desc: "odd symbol count",
nmOutput: `
0x1000 T 1000 100
0x2000 T 2000 120
0x3000 T 3000 130
`,
wantSymbolized: map[uint64]string{
0x1000: "0x1000",
0x1001: "0x1000",
0x1FFF: "0x1000",
0x2000: "0x2000",
0x2001: "0x2000",
0x3000: "0x3000",
0x312f: "0x3000",
},
wantUnsymbolized: []uint64{0x0fff, 0x3130},
},
{
desc: "even symbol count",
nmOutput: `
0x1000 T 1000 100
0x2000 T 2000 120
0x3000 T 3000 130
0x4000 T 4000 140
`,
wantSymbolized: map[uint64]string{
0x1000: "0x1000",
0x1001: "0x1000",
0x1FFF: "0x1000",
0x2000: "0x2000",
0x2fff: "0x2000",
0x3000: "0x3000",
0x3fff: "0x3000",
0x4000: "0x4000",
0x413f: "0x4000",
},
wantUnsymbolized: []uint64{0x0fff, 0x4140},
},
{
desc: "different symbol types",
nmOutput: `
absolute_0x100 a 100
absolute_0x200 A 200
text_0x1000 t 1000 100
bss_0x2000 b 2000 120
data_0x3000 d 3000 130
rodata_0x4000 r 4000 140
weak_0x5000 v 5000 150
text_0x6000 T 6000 160
bss_0x7000 B 7000 170
data_0x8000 D 8000 180
rodata_0x9000 R 9000 190
weak_0xa000 V a000 1a0
weak_0xb000 W b000 1b0
`,
wantSymbolized: map[uint64]string{
0x1000: "text_0x1000",
0x1FFF: "text_0x1000",
0x2000: "bss_0x2000",
0x211f: "bss_0x2000",
0x3000: "data_0x3000",
0x312f: "data_0x3000",
0x4000: "rodata_0x4000",
0x413f: "rodata_0x4000",
0x5000: "weak_0x5000",
0x514f: "weak_0x5000",
0x6000: "text_0x6000",
0x6fff: "text_0x6000",
0x7000: "bss_0x7000",
0x716f: "bss_0x7000",
0x8000: "data_0x8000",
0x817f: "data_0x8000",
0x9000: "rodata_0x9000",
0x918f: "rodata_0x9000",
0xa000: "weak_0xa000",
0xa19f: "weak_0xa000",
0xb000: "weak_0xb000",
0xb1af: "weak_0xb000",
},
wantUnsymbolized: []uint64{0x100, 0x200, 0x0fff, 0x2120, 0x3130, 0x4140, 0x5150, 0x7170, 0x8180, 0x9190, 0xa1a0, 0xb1b0},
},
} {
t.Run(tc.desc, func(t *testing.T) {
a, err := parseAddr2LinerNM(0, bytes.NewBufferString(tc.nmOutput))
if err != nil {
t.Fatalf("nm parse error: %v", err)
}
for address, want := range tc.wantSymbolized {
if got, _ := a.addrInfo(address); !checkAddress(got, address, want) {
t.Errorf("%x: got %v, want %s", address, got, want)
}
}
for _, unknown := range tc.wantUnsymbolized {
if got, _ := a.addrInfo(unknown); got != nil {
t.Errorf("%x: got %v, want nil", unknown, got)
}
}
})
}
}
func checkAddress(got []plugin.Frame, address uint64, want string) bool {
if len(got) != 1 {
return false
}
return got[0].Func == want
}
func TestSetTools(t *testing.T) {
// Test that multiple calls work.
bu := &Binutils{}
bu.SetTools("")
bu.SetTools("")
}
func TestSetFastSymbolization(t *testing.T) {
// Test that multiple calls work.
bu := &Binutils{}
bu.SetFastSymbolization(true)
bu.SetFastSymbolization(false)
}
func skipUnlessLinuxAmd64(t *testing.T) {
if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
t.Skip("This test only works on x86-64 Linux")
}
}
func skipUnlessDarwinAmd64(t *testing.T) {
if runtime.GOOS != "darwin" || runtime.GOARCH != "amd64" {
t.Skip("This test only works on x86-64 macOS")
}
}
func skipUnlessWindowsAmd64(t *testing.T) {
if runtime.GOOS != "windows" || runtime.GOARCH != "amd64" {
t.Skip("This test only works on x86-64 Windows")
}
}
func testDisasm(t *testing.T, intelSyntax bool) {
_, llvmObjdump, buObjdump := findObjdump([]string{""})
if !(llvmObjdump || buObjdump) {
t.Skip("cannot disasm: no objdump tool available")
}
bu := &Binutils{}
var testexe string
switch runtime.GOOS {
case "linux":
testexe = "exe_linux_64"
case "darwin":
testexe = "exe_mac_64"
case "windows":
testexe = "exe_windows_64.exe"
default:
t.Skipf("unsupported OS %q", runtime.GOOS)
}
insts, err := bu.Disasm(filepath.Join("testdata", testexe), 0, math.MaxUint64, intelSyntax)
if err != nil {
t.Fatalf("Disasm: unexpected error %v", err)
}
mainCount := 0
for _, x := range insts {
// macOS symbols have a leading underscore.
if x.Function == "main" || x.Function == "_main" {
mainCount++
}
}
if mainCount == 0 {
t.Error("Disasm: found no main instructions")
}
}
func TestDisasm(t *testing.T) {
if (runtime.GOOS != "linux" && runtime.GOOS != "darwin" && runtime.GOOS != "windows") || runtime.GOARCH != "amd64" {
t.Skip("This test only works on x86-64 Linux, macOS or Windows")
}
testDisasm(t, false)
}
func TestDisasmIntelSyntax(t *testing.T) {
if (runtime.GOOS != "linux" && runtime.GOOS != "darwin" && runtime.GOOS != "windows") || runtime.GOARCH != "amd64" {
t.Skip("This test only works on x86_64 Linux, macOS or Windows as it tests Intel asm syntax")
}
testDisasm(t, true)
}
func findSymbol(syms []*plugin.Sym, name string) *plugin.Sym {
for _, s := range syms {
for _, n := range s.Name {
if n == name {
return s
}
}
}
return nil
}
func TestObjFile(t *testing.T) {
// If this test fails, check the address for main function in testdata/exe_linux_64
// using the command 'nm -n '. Update the hardcoded addresses below to match
// the addresses from the output.
skipUnlessLinuxAmd64(t)
for _, tc := range []struct {
desc string
start, limit, offset uint64
addr uint64
}{
{"fixed load address", 0x400000, 0x4006fc, 0, 0x40052d},
// True user-mode ASLR binaries are ET_DYN rather than ET_EXEC so this case
// is a bit artificial except that it approximates the
// vmlinux-with-kernel-ASLR case where the binary *is* ET_EXEC.
{"simulated ASLR address", 0x500000, 0x5006fc, 0, 0x50052d},
} {
t.Run(tc.desc, func(t *testing.T) {
bu := &Binutils{}
f, err := bu.Open(filepath.Join("testdata", "exe_linux_64"), tc.start, tc.limit, tc.offset, "")
if err != nil {
t.Fatalf("Open: unexpected error %v", err)
}
defer f.Close()
syms, err := f.Symbols(regexp.MustCompile("main"), 0)
if err != nil {
t.Fatalf("Symbols: unexpected error %v", err)
}
m := findSymbol(syms, "main")
if m == nil {
t.Fatalf("Symbols: did not find main")
}
addr, err := f.ObjAddr(tc.addr)
if err != nil {
t.Fatalf("ObjAddr(%x) failed: %v", tc.addr, err)
}
if addr != m.Start {
t.Errorf("ObjAddr(%x) got %x, want %x", tc.addr, addr, m.Start)
}
gotFrames, err := f.SourceLine(tc.addr)
if err != nil {
t.Fatalf("SourceLine: unexpected error %v", err)
}
wantFrames := []plugin.Frame{
{Func: "main", File: "/tmp/hello.c", Line: 3, StartLine: 3},
}
if !reflect.DeepEqual(gotFrames, wantFrames) {
t.Fatalf("SourceLine for main: got %v; want %v\n", gotFrames, wantFrames)
}
})
}
}
func TestMachoFiles(t *testing.T) {
// If this test fails, check the address for main function in testdata/exe_mac_64
// and testdata/lib_mac_64 using addr2line or gaddr2line. Update the
// hardcoded addresses below to match the addresses from the output.
skipUnlessDarwinAmd64(t)
// Load `file`, pretending it was mapped at `start`. Then get the symbol
// table. Check that it contains the symbol `sym` and that the address
// `addr` gives the `expected` stack trace.
for _, tc := range []struct {
desc string
file string
start, limit, offset uint64
addr uint64
sym string
expected []plugin.Frame
}{
{"normal mapping", "exe_mac_64", 0x100000000, math.MaxUint64, 0,
0x100000f50, "_main",
[]plugin.Frame{
{Func: "main", File: "/tmp/hello.c", Line: 3, StartLine: 3},
}},
{"other mapping", "exe_mac_64", 0x200000000, math.MaxUint64, 0,
0x200000f50, "_main",
[]plugin.Frame{
{Func: "main", File: "/tmp/hello.c", Line: 3, StartLine: 3},
}},
{"lib normal mapping", "lib_mac_64", 0, math.MaxUint64, 0,
0xfa0, "_bar",
[]plugin.Frame{
{Func: "bar", File: "/tmp/lib.c", Line: 5, StartLine: 5},
}},
} {
t.Run(tc.desc, func(t *testing.T) {
bu := &Binutils{}
f, err := bu.Open(filepath.Join("testdata", tc.file), tc.start, tc.limit, tc.offset, "")
if err != nil {
t.Fatalf("Open: unexpected error %v", err)
}
t.Logf("binutils: %v", bu)
if runtime.GOOS == "darwin" && !bu.rep.addr2lineFound && !bu.rep.llvmSymbolizerFound {
// On macOS, user needs to install gaddr2line or llvm-symbolizer with
// Homebrew, skip the test when the environment doesn't have it
// installed.
t.Skip("couldn't find addr2line or gaddr2line")
}
defer f.Close()
syms, err := f.Symbols(nil, 0)
if err != nil {
t.Fatalf("Symbols: unexpected error %v", err)
}
m := findSymbol(syms, tc.sym)
if m == nil {
t.Fatalf("Symbols: could not find symbol %v", tc.sym)
}
gotFrames, err := f.SourceLine(tc.addr)
if err != nil {
t.Fatalf("SourceLine: unexpected error %v", err)
}
if !reflect.DeepEqual(gotFrames, tc.expected) {
t.Fatalf("SourceLine for main: got %v; want %v\n", gotFrames, tc.expected)
}
})
}
}
func TestLLVMSymbolizer(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("testtdata/llvm-symbolizer has only been tested on linux")
}
cmd := filepath.Join("testdata", "fake-llvm-symbolizer")
for _, c := range []struct {
addr uint64
isData bool
frames []plugin.Frame
}{
{0x10, false, []plugin.Frame{
{Func: "Inlined_0x10", File: "foo.h", Line: 0, Column: 0, StartLine: 0},
{Func: "Func_0x10", File: "foo.c", Line: 2, Column: 1, StartLine: 2},
}},
{0x20, true, []plugin.Frame{
{Func: "foo_0x20", File: "0x20 8"},
}},
} {
desc := fmt.Sprintf("Code %x", c.addr)
if c.isData {
desc = fmt.Sprintf("Data %x", c.addr)
}
t.Run(desc, func(t *testing.T) {
symbolizer, err := newLLVMSymbolizer(cmd, "foo", 0, c.isData)
if err != nil {
t.Fatalf("newLLVMSymbolizer: unexpected error %v", err)
}
defer symbolizer.rw.close()
frames, err := symbolizer.addrInfo(c.addr)
if err != nil {
t.Fatalf("LLVM: unexpected error %v", err)
}
if !reflect.DeepEqual(frames, c.frames) {
t.Errorf("LLVM: expect %v; got %v\n", c.frames, frames)
}
})
}
}
func TestPEFile(t *testing.T) {
// If this test fails, check the address for main function in testdata/exe_windows_64.exe
// using the command 'nm -n '. Update the hardcoded addresses below to match
// the addresses from the output.
skipUnlessWindowsAmd64(t)
for _, tc := range []struct {
desc string
start, limit, offset uint64
addr uint64
}{
{"fake mapping", 0, math.MaxUint64, 0, 0x140001594},
{"fixed load address", 0x140000000, 0x140002000, 0, 0x140001594},
{"simulated ASLR address", 0x150000000, 0x150002000, 0, 0x150001594},
} {
t.Run(tc.desc, func(t *testing.T) {
bu := &Binutils{}
f, err := bu.Open(filepath.Join("testdata", "exe_windows_64.exe"), tc.start, tc.limit, tc.offset, "")
if err != nil {
t.Fatalf("Open: unexpected error %v", err)
}
defer f.Close()
syms, err := f.Symbols(regexp.MustCompile("main"), 0)
if err != nil {
t.Fatalf("Symbols: unexpected error %v", err)
}
m := findSymbol(syms, "main")
if m == nil {
t.Fatalf("Symbols: did not find main")
}
addr, err := f.ObjAddr(tc.addr)
if err != nil {
t.Fatalf("ObjAddr(%x) failed: %v", tc.addr, err)
}
if addr != m.Start {
t.Errorf("ObjAddr(%x) got %x, want %x", tc.addr, addr, m.Start)
}
gotFrames, err := f.SourceLine(tc.addr)
if err != nil {
t.Fatalf("SourceLine: unexpected error %v", err)
}
wantFrames := []plugin.Frame{
{Func: "main", File: "hello.c", Line: 3, Column: 12, StartLine: 3},
}
if !reflect.DeepEqual(gotFrames, wantFrames) {
t.Fatalf("SourceLine for main: got %v; want %v\n", gotFrames, wantFrames)
}
})
}
}
func TestOpenMalformedELF(t *testing.T) {
// Test that opening a malformed ELF file will report an error containing
// the word "ELF".
bu := &Binutils{}
_, err := bu.Open(filepath.Join("testdata", "malformed_elf"), 0, 0, 0, "")
if err == nil {
t.Fatalf("Open: unexpected success")
}
if !strings.Contains(err.Error(), "ELF") {
t.Errorf("Open: got %v, want error containing 'ELF'", err)
}
}
func TestOpenMalformedMachO(t *testing.T) {
// Test that opening a malformed Mach-O file will report an error containing
// the word "Mach-O".
bu := &Binutils{}
_, err := bu.Open(filepath.Join("testdata", "malformed_macho"), 0, 0, 0, "")
if err == nil {
t.Fatalf("Open: unexpected success")
}
if !strings.Contains(err.Error(), "Mach-O") {
t.Errorf("Open: got %v, want error containing 'Mach-O'", err)
}
}
func TestObjdumpVersionChecks(t *testing.T) {
// Test that the objdump version strings are parsed properly.
type testcase struct {
desc string
os string
ver string
want bool
}
for _, tc := range []testcase{
{
desc: "Valid Apple LLVM version string with usable version",
os: "darwin",
ver: "Apple LLVM version 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
want: true,
},
{
desc: "Valid Apple LLVM version string with unusable version",
os: "darwin",
ver: "Apple LLVM version 10.0.0 (clang-1000.11.45.5)\nOptimized build.",
want: false,
},
{
desc: "Invalid Apple LLVM version string with usable version",
os: "darwin",
ver: "Apple LLVM versions 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
want: false,
},
{
desc: "Valid LLVM version string with usable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM version 9.0.1\n\nOptimized build.",
want: true,
},
{
desc: "Valid LLVM version string with unusable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM version 6.0.1\n\nOptimized build.",
want: false,
},
{
desc: "Invalid LLVM version string with usable version",
os: "linux",
ver: "LLVM (http://llvm.org/):\nLLVM versions 9.0.1\n\nOptimized build.",
want: false,
},
{
desc: "Valid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nLLVM version custom-trunk 124ffeb592a00bfe\nOptimized build.",
want: true,
},
{
desc: "Invalid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nLLVM version custom-trank 124ffeb592a00bfe\nOptimized build.",
want: false,
},
{
desc: "Invalid LLVM objdump version string with trunk",
os: runtime.GOOS,
ver: "LLVM (http://llvm.org/):\nllvm version custom-trunk 124ffeb592a00bfe\nOptimized build.",
want: false,
},
} {
if runtime.GOOS == tc.os {
if got := isLLVMObjdump(tc.ver); got != tc.want {
t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
}
}
}
for _, tc := range []testcase{
{
desc: "Valid GNU objdump version string",
ver: "GNU objdump (GNU Binutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
want: true,
},
{
desc: "Invalid GNU objdump version string",
ver: "GNU nm (GNU Binutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
want: false,
},
} {
if got := isBuObjdump(tc.ver); got != tc.want {
t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
}
}
}
func TestComputeBase(t *testing.T) {
realELFOpen := elfOpen
defer func() {
elfOpen = realELFOpen
}()
tinyExecFile := &elf.File{
FileHeader: elf.FileHeader{Type: elf.ET_EXEC},
Progs: []*elf.Prog{
{ProgHeader: elf.ProgHeader{Type: elf.PT_PHDR, Flags: elf.PF_R | elf.PF_X, Off: 0x40, Vaddr: 0x400040, Paddr: 0x400040, Filesz: 0x1f8, Memsz: 0x1f8, Align: 8}},
{ProgHeader: elf.ProgHeader{Type: elf.PT_INTERP, Flags: elf.PF_R, Off: 0x238, Vaddr: 0x400238, Paddr: 0x400238, Filesz: 0x1c, Memsz: 0x1c, Align: 1}},
{ProgHeader: elf.ProgHeader{Type: elf.PT_LOAD, Flags: elf.PF_R | elf.PF_X, Off: 0, Vaddr: 0, Paddr: 0, Filesz: 0xc80, Memsz: 0xc80, Align: 0x200000}},
{ProgHeader: elf.ProgHeader{Type: elf.PT_LOAD, Flags: elf.PF_R | elf.PF_W, Off: 0xc80, Vaddr: 0x200c80, Paddr: 0x200c80, Filesz: 0x1f0, Memsz: 0x1f0, Align: 0x200000}},
},
}
tinyBadBSSExecFile := &elf.File{
FileHeader: elf.FileHeader{Type: elf.ET_EXEC},
Progs: []*elf.Prog{
{ProgHeader: elf.ProgHeader{Type: elf.PT_PHDR, Flags: elf.PF_R | elf.PF_X, Off: 0x40, Vaddr: 0x400040, Paddr: 0x400040, Filesz: 0x1f8, Memsz: 0x1f8, Align: 8}},
{ProgHeader: elf.ProgHeader{Type: elf.PT_INTERP, Flags: elf.PF_R, Off: 0x238, Vaddr: 0x400238, Paddr: 0x400238, Filesz: 0x1c, Memsz: 0x1c, Align: 1}},
{ProgHeader: elf.ProgHeader{Type: elf.PT_LOAD, Flags: elf.PF_R | elf.PF_X, Off: 0, Vaddr: 0, Paddr: 0, Filesz: 0xc80, Memsz: 0xc80, Align: 0x200000}},
{ProgHeader: elf.ProgHeader{Type: elf.PT_LOAD, Flags: elf.PF_R | elf.PF_W, Off: 0xc80, Vaddr: 0x200c80, Paddr: 0x200c80, Filesz: 0x100, Memsz: 0x1f0, Align: 0x200000}},
{ProgHeader: elf.ProgHeader{Type: elf.PT_LOAD, Flags: elf.PF_R | elf.PF_W, Off: 0xd80, Vaddr: 0x400d80, Paddr: 0x400d80, Filesz: 0x90, Memsz: 0x90, Align: 0x200000}},
},
}
for _, tc := range []struct {
desc string
file *elf.File
openErr error
mapping *elfMapping
addr uint64
wantError bool
wantBase uint64
wantIsData bool
}{
{
desc: "no elf mapping, no error",
mapping: nil,
addr: 0x1000,
wantBase: 0,
wantIsData: false,
},
{
desc: "address outside mapping bounds means error",
file: &elf.File{},
mapping: &elfMapping{start: 0x2000, limit: 0x5000, offset: 0x1000},
addr: 0x1000,
wantError: true,
},
{
desc: "elf.Open failing means error",
file: &elf.File{FileHeader: elf.FileHeader{Type: elf.ET_EXEC}},
openErr: errors.New("elf.Open failed"),
mapping: &elfMapping{start: 0x2000, limit: 0x5000, offset: 0x1000},
addr: 0x4000,
wantError: true,
},
{
desc: "no loadable segments, no error",
file: &elf.File{FileHeader: elf.FileHeader{Type: elf.ET_EXEC}},
mapping: &elfMapping{start: 0x2000, limit: 0x5000, offset: 0x1000},
addr: 0x4000,
wantBase: 0,
wantIsData: false,
},
{
desc: "unsupported executable type, Get Base returns error",
file: &elf.File{FileHeader: elf.FileHeader{Type: elf.ET_NONE}},
mapping: &elfMapping{start: 0x2000, limit: 0x5000, offset: 0x1000},
addr: 0x4000,
wantError: true,
},
{
desc: "tiny file select executable segment by offset",
file: tinyExecFile,
mapping: &elfMapping{start: 0x5000000, limit: 0x5001000, offset: 0x0},
addr: 0x5000c00,
wantBase: 0x5000000,
wantIsData: false,
},
{
desc: "tiny file select data segment by offset",
file: tinyExecFile,
mapping: &elfMapping{start: 0x5200000, limit: 0x5201000, offset: 0x0},
addr: 0x5200c80,
wantBase: 0x5000000,
wantIsData: true,
},
{
desc: "tiny file offset outside any segment means error",
file: tinyExecFile,
mapping: &elfMapping{start: 0x5200000, limit: 0x5201000, offset: 0x0},
addr: 0x5200e70,
wantError: true,
},
{
desc: "tiny file with bad BSS segment selects data segment by offset in initialized section",
file: tinyBadBSSExecFile,
mapping: &elfMapping{start: 0x5200000, limit: 0x5201000, offset: 0x0},
addr: 0x5200d79,
wantBase: 0x5000000,
wantIsData: true,
},
{
desc: "tiny file with bad BSS segment with offset in uninitialized section means error",
file: tinyBadBSSExecFile,
mapping: &elfMapping{start: 0x5200000, limit: 0x5201000, offset: 0x0},
addr: 0x5200d80,
wantError: true,
},
} {
t.Run(tc.desc, func(t *testing.T) {
elfOpen = func(_ string) (*elf.File, error) {
return tc.file, tc.openErr
}
f := file{m: tc.mapping}
err := f.computeBase(tc.addr)
if (err != nil) != tc.wantError {
t.Errorf("got error %v, want any error=%v", err, tc.wantError)
}
if err != nil {
return
}
if f.base != tc.wantBase {
t.Errorf("got base %x, want %x", f.base, tc.wantBase)
}
if f.isData != tc.wantIsData {
t.Errorf("got isData %v, want %v", f.isData, tc.wantIsData)
}
})
}
}
func TestELFObjAddr(t *testing.T) {
// The exe_linux_64 has two loadable program headers:
// LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
// 0x00000000000006fc 0x00000000000006fc R E 0x200000
// LOAD 0x0000000000000e10 0x0000000000600e10 0x0000000000600e10
// 0x0000000000000230 0x0000000000000238 RW 0x200000
name := filepath.Join("testdata", "exe_linux_64")
for _, tc := range []struct {
desc string
start, limit, offset uint64
wantOpenError bool
addr uint64
wantObjAddr uint64
wantAddrError bool
}{
{"exec mapping, good address", 0x5400000, 0x5401000, 0, false, 0x5400400, 0x400400, false},
{"exec mapping, address outside segment", 0x5400000, 0x5401000, 0, false, 0x5400800, 0, true},
{"short data mapping, good address", 0x5600e00, 0x5602000, 0xe00, false, 0x5600e10, 0x600e10, false},
{"short data mapping, address outside segment", 0x5600e00, 0x5602000, 0xe00, false, 0x5600e00, 0x600e00, false},
{"page aligned data mapping, good address", 0x5600000, 0x5602000, 0, false, 0x5601000, 0x601000, false},
{"page aligned data mapping, address outside segment", 0x5600000, 0x5602000, 0, false, 0x5601048, 0, true},
{"bad file offset, no matching segment", 0x5600000, 0x5602000, 0x2000, false, 0x5600e10, 0, true},
{"large mapping size, match by sample offset", 0x5600000, 0x5603000, 0, false, 0x5600e10, 0x600e10, false},
} {
t.Run(tc.desc, func(t *testing.T) {
b := binrep{}
o, err := b.openELF(name, tc.start, tc.limit, tc.offset, "")
if (err != nil) != tc.wantOpenError {
t.Errorf("openELF got error %v, want any error=%v", err, tc.wantOpenError)
}
if err != nil {
return
}
got, err := o.ObjAddr(tc.addr)
if (err != nil) != tc.wantAddrError {
t.Errorf("ObjAddr got error %v, want any error=%v", err, tc.wantAddrError)
}
if err != nil {
return
}
if got != tc.wantObjAddr {
t.Errorf("got ObjAddr %x; want %x\n", got, tc.wantObjAddr)
}
})
}
}
type buf struct {
data []byte
}
// write appends a null-terminated string and returns its starting index.
func (b *buf) write(s string) uint32 {
res := uint32(len(b.data))
b.data = append(b.data, s...)
b.data = append(b.data, '\x00')
return res
}
// fakeELFFile generates a minimal valid ELF file, with fake .head.text and
// .text sections, and their corresponding _text and _stext start symbols,
// mimicking a kernel vmlinux image.
func fakeELFFile(t *testing.T) *elf.File {
var (
sizeHeader64 = binary.Size(elf.Header64{})
sizeProg64 = binary.Size(elf.Prog64{})
sizeSection64 = binary.Size(elf.Section64{})
)
const (
textAddr = 0xffff000010080000
stextAddr = 0xffff000010081000
)
// Generate magic to identify as an ELF file.
var ident [16]uint8
ident[0] = '\x7f'
ident[1] = 'E'
ident[2] = 'L'
ident[3] = 'F'
ident[elf.EI_CLASS] = uint8(elf.ELFCLASS64)
ident[elf.EI_DATA] = uint8(elf.ELFDATA2LSB)
ident[elf.EI_VERSION] = uint8(elf.EV_CURRENT)
ident[elf.EI_OSABI] = uint8(elf.ELFOSABI_NONE)
// A single program header, containing code and starting at the _text address.
progs := []elf.Prog64{{
Type: uint32(elf.PT_LOAD), Flags: uint32(elf.PF_R | elf.PF_X), Off: 0x10000, Vaddr: textAddr, Paddr: textAddr, Filesz: 0x1234567, Memsz: 0x1234567, Align: 0x10000}}
symNames := buf{}
syms := []elf.Sym64{
{}, // first symbol empty by convention
{Name: symNames.write("_text"), Info: 0, Other: 0, Shndx: 0, Value: textAddr, Size: 0},
{Name: symNames.write("_stext"), Info: 0, Other: 0, Shndx: 0, Value: stextAddr, Size: 0},
}
const numSections = 5
// We'll write `textSize` zero bytes as contents of the .head.text and .text sections.
const textSize = 16
// Offset of section contents in the byte stream -- after header, program headers, and section headers.
sectionsStart := uint64(sizeHeader64 + len(progs)*sizeProg64 + numSections*sizeSection64)
secNames := buf{}
sections := [numSections]elf.Section64{
{Name: secNames.write(".head.text"), Type: uint32(elf.SHT_PROGBITS), Flags: uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR), Addr: textAddr, Off: sectionsStart, Size: textSize, Link: 0, Info: 0, Addralign: 2048, Entsize: 0},
{Name: secNames.write(".text"), Type: uint32(elf.SHT_PROGBITS), Flags: uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR), Addr: stextAddr, Off: sectionsStart + textSize, Size: textSize, Link: 0, Info: 0, Addralign: 2048, Entsize: 0},
{Name: secNames.write(".symtab"), Type: uint32(elf.SHT_SYMTAB), Flags: 0, Addr: 0, Off: sectionsStart + 2*textSize, Size: uint64(len(syms) * elf.Sym64Size), Link: 3 /*index of .strtab*/, Info: 0, Addralign: 8, Entsize: elf.Sym64Size},
{Name: secNames.write(".strtab"), Type: uint32(elf.SHT_STRTAB), Flags: 0, Addr: 0, Off: sectionsStart + 2*textSize + uint64(len(syms)*elf.Sym64Size), Size: uint64(len(symNames.data)), Link: 0, Info: 0, Addralign: 1, Entsize: 0},
{Name: secNames.write(".shstrtab"), Type: uint32(elf.SHT_STRTAB), Flags: 0, Addr: 0, Off: sectionsStart + 2*textSize + uint64(len(syms)*elf.Sym64Size+len(symNames.data)), Size: uint64(len(secNames.data)), Link: 0, Info: 0, Addralign: 1, Entsize: 0},
}
hdr := elf.Header64{
Ident: ident,
Type: uint16(elf.ET_DYN),
Machine: uint16(elf.EM_AARCH64),
Version: uint32(elf.EV_CURRENT),
Entry: textAddr,
Phoff: uint64(sizeHeader64),
Shoff: uint64(sizeHeader64 + len(progs)*sizeProg64),
Flags: 0,
Ehsize: uint16(sizeHeader64),
Phentsize: uint16(sizeProg64),
Phnum: uint16(len(progs)),
Shentsize: uint16(sizeSection64),
Shnum: uint16(len(sections)),
Shstrndx: 4, // index of .shstrtab
}
// Serialize all headers and sections into a single binary stream.
var data bytes.Buffer
for i, b := range []interface{}{hdr, progs, sections, [textSize]byte{}, [textSize]byte{}, syms, symNames.data, secNames.data} {
err := binary.Write(&data, binary.LittleEndian, b)
if err != nil {
t.Fatalf("Write(%v) got err %v, want nil", i, err)
}
}
// ... and parse it as and ELF file.
ef, err := elf.NewFile(bytes.NewReader(data.Bytes()))
if err != nil {
t.Fatalf("elf.NewFile got err %v, want nil", err)
}
return ef
}
func TestELFKernelOffset(t *testing.T) {
realELFOpen := elfOpen
defer func() {
elfOpen = realELFOpen
}()
wantAddr := uint64(0xffff000010082000)
elfOpen = func(_ string) (*elf.File, error) {
return fakeELFFile(t), nil
}
for _, tc := range []struct {
name string
relocationSymbol string
start uint64
}{
{"text", "_text", 0xffff000020080000},
{"stext", "_stext", 0xffff000020081000},
} {
b := binrep{}
o, err := b.openELF("vmlinux", tc.start, 0xffffffffffffffff, tc.start, tc.relocationSymbol)
if err != nil {
t.Errorf("%v: openELF got error %v, want nil", tc.name, err)
continue
}
addr, err := o.ObjAddr(0xffff000020082000)
if err != nil {
t.Errorf("%v: ObjAddr got err %v, want nil", tc.name, err)
continue
}
if addr != wantAddr {
t.Errorf("%v: ObjAddr got %x, want %x", tc.name, addr, wantAddr)
}
}
}

View File

@@ -0,0 +1,180 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package binutils
import (
"bytes"
"io"
"regexp"
"strconv"
"strings"
"github.com/ianlancetaylor/demangle"
"m7s.live/v5/plugin/debug/pkg/internal/plugin"
)
var (
nmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
objdumpAsmOutputRE = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
objdumpOutputFileLine = regexp.MustCompile(`^;?\s?(.*):([0-9]+)`)
objdumpOutputFunction = regexp.MustCompile(`^;?\s?(\S.*)\(\):`)
objdumpOutputFunctionLLVM = regexp.MustCompile(`^([[:xdigit:]]+)?\s?(.*):`)
)
func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
// Collect all symbols from the nm output, grouping names mapped to
// the same address into a single symbol.
// The symbols to return.
var symbols []*plugin.Sym
// The current group of symbol names, and the address they are all at.
names, start := []string{}, uint64(0)
buf := bytes.NewBuffer(syms)
for {
symAddr, name, err := nextSymbol(buf)
if err == io.EOF {
// Done. If there was an unfinished group, append it.
if len(names) != 0 {
if match := matchSymbol(names, start, symAddr-1, r, address); match != nil {
symbols = append(symbols, &plugin.Sym{Name: match, File: file, Start: start, End: symAddr - 1})
}
}
// And return the symbols.
return symbols, nil
}
if err != nil {
// There was some kind of serious error reading nm's output.
return nil, err
}
// If this symbol is at the same address as the current group, add it to the group.
if symAddr == start {
names = append(names, name)
continue
}
// Otherwise append the current group to the list of symbols.
if match := matchSymbol(names, start, symAddr-1, r, address); match != nil {
symbols = append(symbols, &plugin.Sym{Name: match, File: file, Start: start, End: symAddr - 1})
}
// And start a new group.
names, start = []string{name}, symAddr
}
}
// matchSymbol checks if a symbol is to be selected by checking its
// name to the regexp and optionally its address. It returns the name(s)
// to be used for the matched symbol, or nil if no match
func matchSymbol(names []string, start, end uint64, r *regexp.Regexp, address uint64) []string {
if address != 0 && address >= start && address <= end {
return names
}
for _, name := range names {
if r == nil || r.MatchString(name) {
return []string{name}
}
// Match all possible demangled versions of the name.
for _, o := range [][]demangle.Option{
{demangle.NoClones},
{demangle.NoParams, demangle.NoEnclosingParams},
{demangle.NoParams, demangle.NoEnclosingParams, demangle.NoTemplateParams},
} {
if demangled, err := demangle.ToString(name, o...); err == nil && r.MatchString(demangled) {
return []string{demangled}
}
}
}
return nil
}
// disassemble parses the output of the objdump command and returns
// the assembly instructions in a slice.
func disassemble(asm []byte) ([]plugin.Inst, error) {
buf := bytes.NewBuffer(asm)
function, file, line := "", "", 0
var assembly []plugin.Inst
for {
input, err := buf.ReadString('\n')
if err != nil {
if err != io.EOF {
return nil, err
}
if input == "" {
break
}
}
input = strings.TrimSpace(input)
if fields := objdumpAsmOutputRE.FindStringSubmatch(input); len(fields) == 3 {
if address, err := strconv.ParseUint(fields[1], 16, 64); err == nil {
assembly = append(assembly,
plugin.Inst{
Addr: address,
Text: fields[2],
Function: function,
File: file,
Line: line,
})
continue
}
}
if fields := objdumpOutputFileLine.FindStringSubmatch(input); len(fields) == 3 {
if l, err := strconv.ParseUint(fields[2], 10, 32); err == nil {
file, line = fields[1], int(l)
}
continue
}
if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
function = fields[1]
continue
} else {
if fields := objdumpOutputFunctionLLVM.FindStringSubmatch(input); len(fields) == 3 {
function = fields[2]
continue
}
}
// Reset on unrecognized lines.
function, file, line = "", "", 0
}
return assembly, nil
}
// nextSymbol parses the nm output to find the next symbol listed.
// Skips over any output it cannot recognize.
func nextSymbol(buf *bytes.Buffer) (uint64, string, error) {
for {
line, err := buf.ReadString('\n')
if err != nil {
if err != io.EOF || line == "" {
return 0, "", err
}
}
line = strings.TrimSpace(line)
if fields := nmOutputRE.FindStringSubmatch(line); len(fields) == 4 {
if address, err := strconv.ParseUint(fields[1], 16, 64); err == nil {
return address, fields[3], nil
}
}
}
}

View File

@@ -0,0 +1,160 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package binutils
import (
"fmt"
"regexp"
"testing"
"m7s.live/v5/plugin/debug/pkg/internal/plugin"
)
// TestFindSymbols tests the FindSymbols routine using a hardcoded nm output.
func TestFindSymbols(t *testing.T) {
type testcase struct {
query, syms string
want []plugin.Sym
}
testsyms := `0000000000001000 t lineA001
0000000000001000 t lineA002
0000000000001000 t line1000
0000000000002000 t line200A
0000000000002000 t line2000
0000000000002000 t line200B
0000000000003000 t line3000
0000000000003000 t _ZNK4DumbclEPKc
0000000000003000 t lineB00C
0000000000003000 t line300D
0000000000004000 t _the_end
`
testcases := []testcase{
{
"line.*[AC]",
testsyms,
[]plugin.Sym{
{Name: []string{"lineA001"}, File: "object.o", Start: 0x1000, End: 0x1FFF},
{Name: []string{"line200A"}, File: "object.o", Start: 0x2000, End: 0x2FFF},
{Name: []string{"lineB00C"}, File: "object.o", Start: 0x3000, End: 0x3FFF},
},
},
{
"Dumb::operator",
testsyms,
[]plugin.Sym{
{Name: []string{"Dumb::operator()(char const*) const"}, File: "object.o", Start: 0x3000, End: 0x3FFF},
},
},
}
for _, tc := range testcases {
syms, err := findSymbols([]byte(tc.syms), "object.o", regexp.MustCompile(tc.query), 0)
if err != nil {
t.Fatalf("%q: findSymbols: %v", tc.query, err)
}
if err := checkSymbol(syms, tc.want); err != nil {
t.Errorf("%q: %v", tc.query, err)
}
}
}
func checkSymbol(got []*plugin.Sym, want []plugin.Sym) error {
if len(got) != len(want) {
return fmt.Errorf("unexpected number of symbols %d (want %d)", len(got), len(want))
}
for i, g := range got {
w := want[i]
if len(g.Name) != len(w.Name) {
return fmt.Errorf("names, got %d, want %d", len(g.Name), len(w.Name))
}
for n := range g.Name {
if g.Name[n] != w.Name[n] {
return fmt.Errorf("name %d, got %q, want %q", n, g.Name[n], w.Name[n])
}
}
if g.File != w.File {
return fmt.Errorf("filename, got %q, want %q", g.File, w.File)
}
if g.Start != w.Start {
return fmt.Errorf("start address, got %#x, want %#x", g.Start, w.Start)
}
if g.End != w.End {
return fmt.Errorf("end address, got %#x, want %#x", g.End, w.End)
}
}
return nil
}
// TestFunctionAssembly tests the FunctionAssembly routine by using a
// fake objdump script.
func TestFunctionAssembly(t *testing.T) {
type testcase struct {
s plugin.Sym
asm string
want []plugin.Inst
}
testcases := []testcase{
{
plugin.Sym{Name: []string{"symbol1"}, Start: 0x1000, End: 0x1FFF},
" 1000: instruction one\n 1001: instruction two\n 1002: instruction three\n 1003: instruction four",
[]plugin.Inst{
{Addr: 0x1000, Text: "instruction one"},
{Addr: 0x1001, Text: "instruction two"},
{Addr: 0x1002, Text: "instruction three"},
{Addr: 0x1003, Text: "instruction four"},
},
},
{
plugin.Sym{Name: []string{"symbol2"}, Start: 0x2000, End: 0x2FFF},
" 2000: instruction one\n 2001: instruction two",
[]plugin.Inst{
{Addr: 0x2000, Text: "instruction one"},
{Addr: 0x2001, Text: "instruction two"},
},
},
{
plugin.Sym{Name: []string{"_main"}, Start: 0x30000, End: 0x3FFF},
"_main:\n; /tmp/hello.c:3\n30001: push %rbp",
[]plugin.Inst{
{Addr: 0x30001, Text: "push %rbp", Function: "_main", File: "/tmp/hello.c", Line: 3},
},
},
{
plugin.Sym{Name: []string{"main"}, Start: 0x4000, End: 0x4FFF},
"000000000040052d <main>:\nmain():\n/tmp/hello.c:3\n40001: push %rbp",
[]plugin.Inst{
{Addr: 0x40001, Text: "push %rbp", Function: "main", File: "/tmp/hello.c", Line: 3},
},
},
}
for _, tc := range testcases {
insts, err := disassemble([]byte(tc.asm))
if err != nil {
t.Fatalf("FunctionAssembly: %v", err)
}
if len(insts) != len(tc.want) {
t.Errorf("Unexpected number of assembly instructions %d (want %d)\n", len(insts), len(tc.want))
}
for i := range insts {
if insts[i] != tc.want[i] {
t.Errorf("Expected symbol %v, got %v\n", tc.want[i], insts[i])
}
}
}
}

View File

@@ -0,0 +1,94 @@
// Copyright 2019 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a script that generates the test executables for MacOS and Linux
// in this directory. It should be needed very rarely to run this script.
// It is mostly provided as a future reference on how the original binary
// set was created.
// When a new executable is generated, hardcoded addresses in the
// functions TestObjFile, TestMachoFiles, TestPEFile in binutils_test.go must be updated.
package main
import (
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
)
func main() {
wd, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
switch runtime.GOOS {
case "linux":
if err := removeGlob("exe_linux_64*"); err != nil {
log.Fatal(err)
}
out, err := exec.Command("cc", "-g", "-ffile-prefix-map="+wd+"="+"/tmp", "-o", "exe_linux_64", "hello.c").CombinedOutput()
log.Println(string(out))
if err != nil {
log.Fatal(err)
}
case "darwin":
if err := removeGlob("exe_mac_64*", "lib_mac_64"); err != nil {
log.Fatal(err)
}
out, err := exec.Command("clang", "-g", "-ffile-prefix-map="+wd+"="+"/tmp", "-o", "exe_mac_64", "hello.c").CombinedOutput()
log.Println(string(out))
if err != nil {
log.Fatal(err)
}
out, err = exec.Command("clang", "-g", "-ffile-prefix-map="+wd+"="+"/tmp", "-o", "lib_mac_64", "-dynamiclib", "lib.c").CombinedOutput()
log.Println(string(out))
if err != nil {
log.Fatal(err)
}
case "windows":
// Many gcc environments may create binaries that trigger false-positives
// in antiviruses. MSYS2 with gcc 10.2.0 is a working environment for
// compiling. To setup the environment follow the guide at
// https://www.msys2.org/ and install gcc with `pacman -S gcc`.
out, err := exec.Command("gcc", "-g", "-ffile-prefix-map="+wd+"=", "-o", "exe_windows_64.exe", "hello.c").CombinedOutput()
log.Println(string(out))
if err != nil {
log.Fatal(err)
}
log.Println("Please verify that exe_windows_64.exe does not trigger any antivirus on `virustotal.com`.")
default:
log.Fatalf("Unsupported OS %q", runtime.GOOS)
}
}
func removeGlob(globs ...string) error {
for _, glob := range globs {
matches, err := filepath.Glob(glob)
if err != nil {
return err
}
for _, p := range matches {
os.Remove(p)
}
}
return nil
}

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleIdentifier</key>
<string>com.apple.xcode.dsym.exe_mac_64</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundlePackageType</key>
<string>dSYM</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
</dict>
</plist>

Binary file not shown.

View File

@@ -0,0 +1,39 @@
#!/bin/sh
#
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Fake llvm-symbolizer to use in tests
set -f
IFS=" "
while read line; do
# line has form:
# filename 0xaddr
# Emit dummy output that matches llvm-symbolizer JSON output format.
set -- ${line}
kind=$1
fname=$2
addr=$3
case ${kind} in
CODE)
echo "{\"Address\":\"${addr}\",\"ModuleName\":\"${fname}\",\"Symbol\":[{\"Column\":0,\"FileName\":\"${fname}.h\",\"FunctionName\":\"Inlined_${addr}\",\"Line\":0,\"StartLine\":0},{\"Column\":1,\"FileName\":\"${fname}.c\",\"FunctionName\":\"Func_${addr}\",\"Line\":2,\"StartLine\":2}]}"
;;
DATA)
echo "{\"Address\":\"${addr}\",\"ModuleName\":\"${fname}\",\"Data\":{\"Name\":\"${fname}_${addr}\",\"Size\":\"0x8\",\"Start\":\"${addr}\"}}"
;;
*) exit 1;;
esac
done

View File

@@ -0,0 +1,6 @@
#include <stdio.h>
int main() {
printf("Hello, world!\n");
return 0;
}

View File

@@ -0,0 +1,7 @@
int foo() {
return 1;
}
int bar() {
return 2;
}

Binary file not shown.

View File

@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleIdentifier</key>
<string>com.apple.xcode.dsym.lib_mac_64</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundlePackageType</key>
<string>dSYM</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
</dict>
</plist>

View File

@@ -0,0 +1 @@
ELF<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>

View File

@@ -0,0 +1 @@
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>