Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

btf: split string table ahead of time #637

Merged
merged 1 commit into from
Apr 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions internal/btf/btf.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type ID uint32
type Spec struct {
// Data from .BTF.
rawTypes []rawType
strings stringTable
strings *stringTable

// Inflated Types.
types []Type
Expand Down Expand Up @@ -447,15 +447,14 @@ func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder {

// parseBTF reads a .BTF section into memory and parses it into a list of
// raw types and a string table.
func parseBTF(btf io.ReaderAt, bo binary.ByteOrder) ([]rawType, stringTable, error) {
func parseBTF(btf io.ReaderAt, bo binary.ByteOrder) ([]rawType, *stringTable, error) {
buf := internal.NewBufferedSectionReader(btf, 0, math.MaxInt64)
header, err := parseBTFHeader(buf, bo)
if err != nil {
return nil, nil, fmt.Errorf("parsing .BTF header: %v", err)
}

buf.Reset(io.NewSectionReader(btf, header.stringStart(), int64(header.StringLen)))
rawStrings, err := readStringTable(buf)
rawStrings, err := readStringTable(io.NewSectionReader(btf, header.stringStart(), int64(header.StringLen)))
if err != nil {
return nil, nil, fmt.Errorf("can't read type names: %w", err)
}
Expand All @@ -474,7 +473,7 @@ type variable struct {
name string
}

func fixupDatasec(rawTypes []rawType, rawStrings stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
func fixupDatasec(rawTypes []rawType, rawStrings *stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
for i, rawType := range rawTypes {
if rawType.Kind() != kindDatasec {
continue
Expand Down Expand Up @@ -580,7 +579,11 @@ func (s *Spec) marshal(opts marshalOpts) ([]byte, error) {
typeLen := uint32(buf.Len() - headerLen)

// Write string section after type section.
_, _ = buf.Write(s.strings)
stringsLen := s.strings.Length()
buf.Grow(stringsLen)
if err := s.strings.Marshal(&buf); err != nil {
return nil, err
}

// Fill out the header, and write it out.
header = &btfHeader{
Expand All @@ -591,7 +594,7 @@ func (s *Spec) marshal(opts marshalOpts) ([]byte, error) {
TypeOff: 0,
TypeLen: typeLen,
StringOff: typeLen,
StringLen: uint32(len(s.strings)),
StringLen: uint32(stringsLen),
}

raw := buf.Bytes()
Expand Down
13 changes: 13 additions & 0 deletions internal/btf/btf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,19 @@ func TestParseCurrentKernelBTF(t *testing.T) {
if len(spec.namedTypes) == 0 {
t.Fatal("Empty kernel BTF")
}

totalBytes := 0
distinct := 0
seen := make(map[string]bool)
for _, str := range spec.strings.strings {
totalBytes += len(str)
if !seen[str] {
distinct++
seen[str] = true
}
}
t.Logf("%d strings total, %d distinct", len(spec.strings.strings), distinct)
t.Logf("Average string size: %.0f", float64(totalBytes)/float64(len(spec.strings.strings)))
}

func TestFindVMLinux(t *testing.T) {
Expand Down
12 changes: 6 additions & 6 deletions internal/btf/ext_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type extInfo struct {
}

// loadExtInfos parses the .BTF.ext section into its constituent parts.
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, strings stringTable) (*extInfo, error) {
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, strings *stringTable) (*extInfo, error) {
// Open unbuffered section reader. binary.Read() calls io.ReadFull on
// the header structs, resulting in one syscall per header.
headerRd := io.NewSectionReader(r, 0, math.MaxInt64)
Expand Down Expand Up @@ -154,7 +154,7 @@ type btfExtInfoSec struct {
// appearing within func_info and line_info sub-sections.
// These headers appear once for each program section in the ELF and are
// followed by one or more func/line_info records for the section.
func parseExtInfoSec(r io.Reader, bo binary.ByteOrder, strings stringTable) (string, *btfExtInfoSec, error) {
func parseExtInfoSec(r io.Reader, bo binary.ByteOrder, strings *stringTable) (string, *btfExtInfoSec, error) {
var infoHeader btfExtInfoSec
if err := binary.Read(r, bo, &infoHeader); err != nil {
return "", nil, fmt.Errorf("read ext info header: %w", err)
Expand Down Expand Up @@ -227,7 +227,7 @@ func (fi *FuncInfo) Marshal(w io.Writer, offset uint64) error {

// parseLineInfos parses a func_info sub-section within .BTF.ext ito a map of
// func infos indexed by section name.
func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string][]bpfFuncInfo, error) {
func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfFuncInfo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
Expand Down Expand Up @@ -374,7 +374,7 @@ func (li LineInfos) Marshal(w io.Writer, offset uint64) error {

// parseLineInfos parses a line_info sub-section within .BTF.ext ito a map of
// line infos indexed by section name.
func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string][]bpfLineInfo, error) {
func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfLineInfo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
Expand Down Expand Up @@ -462,7 +462,7 @@ var extInfoReloSize = binary.Size(bpfCORERelo{})

// parseCORERelos parses a core_relos sub-section within .BTF.ext ito a map of
// CO-RE relocations indexed by section name.
func parseCORERelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]CORERelos, error) {
func parseCORERelos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string]CORERelos, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
Expand Down Expand Up @@ -494,7 +494,7 @@ func parseCORERelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[
// parseCOREReloRecords parses a stream of CO-RE relocation entries into a
// coreRelos. These records appear after a btf_ext_info_sec header in the
// core_relos sub-section of .BTF.ext.
func parseCOREReloRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, strings stringTable) (CORERelos, error) {
func parseCOREReloRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, strings *stringTable) (CORERelos, error) {
var out CORERelos

var relo bpfCORERelo
Expand Down
6 changes: 5 additions & 1 deletion internal/btf/ext_info_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package btf

import (
"bytes"
"strings"
"testing"

Expand All @@ -9,7 +10,10 @@ import (

func TestParseExtInfoBigRecordSize(t *testing.T) {
rd := strings.NewReader("\xff\xff\xff\xff\x00\x00\x00\x000709171295166016")
table := stringTable("\x00")
table, err := readStringTable(bytes.NewReader([]byte{0}))
if err != nil {
t.Fatal(err)
}

if _, err := parseFuncInfos(rd, internal.NativeEndian, table); err == nil {
t.Error("Parsing func info with large record size doesn't return an error")
Expand Down
6 changes: 5 additions & 1 deletion internal/btf/fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ func FuzzExtInfo(f *testing.F) {
t.Skip("data is too short")
}

table := stringTable(strings)
table, err := readStringTable(bytes.NewReader(strings))
if err != nil {
t.Skip("invalid string table")
}

info, err := loadExtInfos(bytes.NewReader(data), internal.NativeEndian, table)
if err != nil {
if info != nil {
Expand Down
108 changes: 83 additions & 25 deletions internal/btf/strings.go
Original file line number Diff line number Diff line change
@@ -1,54 +1,112 @@
package btf

import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
)

type stringTable []byte
type stringTable struct {
offsets []uint32
strings []string
}

// sizedReader is implemented by bytes.Reader, io.SectionReader, strings.Reader, etc.
type sizedReader interface {
io.Reader
Size() int64
}

func readStringTable(r sizedReader) (*stringTable, error) {
// Derived from vmlinux BTF.
const averageStringLength = 16

func readStringTable(r io.Reader) (stringTable, error) {
contents, err := io.ReadAll(r)
if err != nil {
return nil, fmt.Errorf("can't read string table: %v", err)
n := int(r.Size() / averageStringLength)
offsets := make([]uint32, 0, n)
strings := make([]string, 0, n)

offset := uint32(0)
scanner := bufio.NewScanner(r)
scanner.Split(splitNull)
for scanner.Scan() {
str := scanner.Text()
offsets = append(offsets, offset)
strings = append(strings, str)
offset += uint32(len(str)) + 1
}
if err := scanner.Err(); err != nil {
return nil, err
}

if len(contents) < 1 {
if len(strings) == 0 {
return nil, errors.New("string table is empty")
}

if contents[0] != '\x00' {
if strings[0] != "" {
return nil, errors.New("first item in string table is non-empty")
}

if contents[len(contents)-1] != '\x00' {
return nil, errors.New("string table isn't null terminated")
}

return stringTable(contents), nil
return &stringTable{offsets, strings}, nil
}

func (st stringTable) Lookup(offset uint32) (string, error) {
if int64(offset) > int64(^uint(0)>>1) {
return "", fmt.Errorf("offset %d overflows int", offset)
func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) {
i := bytes.IndexByte(data, 0)
if i == -1 {
if atEOF && len(data) > 0 {
return 0, nil, errors.New("string table isn't null terminated")
}
return 0, nil, nil
}

pos := int(offset)
if pos >= len(st) {
return "", fmt.Errorf("offset %d is out of bounds", offset)
}
return i + 1, data[:i], nil
}

if pos > 0 && st[pos-1] != '\x00' {
func (st *stringTable) Lookup(offset uint32) (string, error) {
i := search(st.offsets, offset)
if i == len(st.offsets) || st.offsets[i] != offset {
return "", fmt.Errorf("offset %d isn't start of a string", offset)
}

str := st[pos:]
end := bytes.IndexByte(str, '\x00')
if end == -1 {
return "", fmt.Errorf("offset %d isn't null terminated", offset)
return st.strings[i], nil
}

func (st *stringTable) Length() int {
last := len(st.offsets) - 1
return int(st.offsets[last]) + len(st.strings[last]) + 1
}

func (st *stringTable) Marshal(w io.Writer) error {
for _, str := range st.strings {
_, err := io.WriteString(w, str)
if err != nil {
return err
}
_, err = w.Write([]byte{0})
if err != nil {
return err
}
}
return nil
}

return string(str[:end]), nil
// search is a copy of sort.Search specialised for uint32.
//
// Licensed under https://go.dev/LICENSE
func search(ints []uint32, needle uint32) int {
// Define f(-1) == false and f(n) == true.
// Invariant: f(i-1) == false, f(j) == true.
i, j := 0, len(ints)
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if !(ints[h] >= needle) {
i = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
// i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
return i
}
7 changes: 6 additions & 1 deletion internal/btf/strings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ func TestStringTable(t *testing.T) {
t.Fatal(err)
}

if !bytes.Equal([]byte(in), []byte(st)) {
var buf bytes.Buffer
if err := st.Marshal(&buf); err != nil {
t.Fatal("Can't marshal string table:", err)
}

if !bytes.Equal([]byte(in), buf.Bytes()) {
t.Error("String table doesn't match input")
}

Expand Down
2 changes: 1 addition & 1 deletion internal/btf/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ func countFixups(rawTypes []rawType) int {
// Returns a map of named types (so, where NameOff is non-zero) and a slice of types
// indexed by TypeID. Since BTF ignores compilation units, multiple types may share
// the same name. A Type may form a cyclic graph by pointing at itself.
func inflateRawTypes(rawTypes []rawType, rawStrings stringTable) ([]Type, map[essentialName][]Type, error) {
func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable) ([]Type, map[essentialName][]Type, error) {
type fixupDef struct {
id TypeID
expectedKind btfKind
Expand Down