Add utility for getting processes and process group leaders

2023-12-19 10:26:10 +01:00
parent ceaf1546d2
commit 30fee07a89
13 changed files with 796 additions and 0 deletions
--- a/firewall/interception/ebpf/bandwidth/bpf_bpfeb.o
+++ b/firewall/interception/ebpf/bandwidth/bpf_bpfeb.o
--- a/firewall/interception/ebpf/bandwidth/bpf_bpfel.o
+++ b/firewall/interception/ebpf/bandwidth/bpf_bpfel.o
--- a/firewall/interception/ebpf/connection_listener/bpf_bpfeb.o
+++ b/firewall/interception/ebpf/connection_listener/bpf_bpfeb.o
--- a/firewall/interception/ebpf/connection_listener/bpf_bpfel.o
+++ b/firewall/interception/ebpf/connection_listener/bpf_bpfel.o
--- a/firewall/interception/ebpf/exec/bpf_bpfeb.go
+++ b/firewall/interception/ebpf/exec/bpf_bpfeb.go
@@ -0,0 +1,119 @@
+// Code generated by bpf2go; DO NOT EDIT.
+//go:build arm64be || armbe || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
+
+package ebpf
+
+import (
+	"bytes"
+	_ "embed"
+	"fmt"
+	"io"
+
+	"github.com/cilium/ebpf"
+)
+
+// loadBpf returns the embedded CollectionSpec for bpf.
+func loadBpf() (*ebpf.CollectionSpec, error) {
+	reader := bytes.NewReader(_BpfBytes)
+	spec, err := ebpf.LoadCollectionSpecFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("can't load bpf: %w", err)
+	}
+
+	return spec, err
+}
+
+// loadBpfObjects loads bpf and converts it into a struct.
+//
+// The following types are suitable as obj argument:
+//
+//	*bpfObjects
+//	*bpfPrograms
+//	*bpfMaps
+//
+// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
+func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
+	spec, err := loadBpf()
+	if err != nil {
+		return err
+	}
+
+	return spec.LoadAndAssign(obj, opts)
+}
+
+// bpfSpecs contains maps and programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfSpecs struct {
+	bpfProgramSpecs
+	bpfMapSpecs
+}
+
+// bpfSpecs contains programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfProgramSpecs struct {
+	EnterExecve *ebpf.ProgramSpec `ebpf:"enter_execve"`
+}
+
+// bpfMapSpecs contains maps before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfMapSpecs struct {
+	PmExecMap *ebpf.MapSpec `ebpf:"pm_exec_map"`
+}
+
+// bpfObjects contains all objects after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfObjects struct {
+	bpfPrograms
+	bpfMaps
+}
+
+func (o *bpfObjects) Close() error {
+	return _BpfClose(
+		&o.bpfPrograms,
+		&o.bpfMaps,
+	)
+}
+
+// bpfMaps contains all maps after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfMaps struct {
+	PmExecMap *ebpf.Map `ebpf:"pm_exec_map"`
+}
+
+func (m *bpfMaps) Close() error {
+	return _BpfClose(
+		m.PmExecMap,
+	)
+}
+
+// bpfPrograms contains all programs after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfPrograms struct {
+	EnterExecve *ebpf.Program `ebpf:"enter_execve"`
+}
+
+func (p *bpfPrograms) Close() error {
+	return _BpfClose(
+		p.EnterExecve,
+	)
+}
+
+func _BpfClose(closers ...io.Closer) error {
+	for _, closer := range closers {
+		if err := closer.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Do not access this directly.
+//
+//go:embed bpf_bpfeb.o
+var _BpfBytes []byte
--- a/firewall/interception/ebpf/exec/bpf_bpfel.go
+++ b/firewall/interception/ebpf/exec/bpf_bpfel.go
@@ -0,0 +1,119 @@
+// Code generated by bpf2go; DO NOT EDIT.
+//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mips64le || mips64p32le || mipsle || ppc64le || riscv64
+
+package ebpf
+
+import (
+	"bytes"
+	_ "embed"
+	"fmt"
+	"io"
+
+	"github.com/cilium/ebpf"
+)
+
+// loadBpf returns the embedded CollectionSpec for bpf.
+func loadBpf() (*ebpf.CollectionSpec, error) {
+	reader := bytes.NewReader(_BpfBytes)
+	spec, err := ebpf.LoadCollectionSpecFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("can't load bpf: %w", err)
+	}
+
+	return spec, err
+}
+
+// loadBpfObjects loads bpf and converts it into a struct.
+//
+// The following types are suitable as obj argument:
+//
+//	*bpfObjects
+//	*bpfPrograms
+//	*bpfMaps
+//
+// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
+func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
+	spec, err := loadBpf()
+	if err != nil {
+		return err
+	}
+
+	return spec.LoadAndAssign(obj, opts)
+}
+
+// bpfSpecs contains maps and programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfSpecs struct {
+	bpfProgramSpecs
+	bpfMapSpecs
+}
+
+// bpfSpecs contains programs before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfProgramSpecs struct {
+	EnterExecve *ebpf.ProgramSpec `ebpf:"enter_execve"`
+}
+
+// bpfMapSpecs contains maps before they are loaded into the kernel.
+//
+// It can be passed ebpf.CollectionSpec.Assign.
+type bpfMapSpecs struct {
+	PmExecMap *ebpf.MapSpec `ebpf:"pm_exec_map"`
+}
+
+// bpfObjects contains all objects after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfObjects struct {
+	bpfPrograms
+	bpfMaps
+}
+
+func (o *bpfObjects) Close() error {
+	return _BpfClose(
+		&o.bpfPrograms,
+		&o.bpfMaps,
+	)
+}
+
+// bpfMaps contains all maps after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfMaps struct {
+	PmExecMap *ebpf.Map `ebpf:"pm_exec_map"`
+}
+
+func (m *bpfMaps) Close() error {
+	return _BpfClose(
+		m.PmExecMap,
+	)
+}
+
+// bpfPrograms contains all programs after they have been loaded into the kernel.
+//
+// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
+type bpfPrograms struct {
+	EnterExecve *ebpf.Program `ebpf:"enter_execve"`
+}
+
+func (p *bpfPrograms) Close() error {
+	return _BpfClose(
+		p.EnterExecve,
+	)
+}
+
+func _BpfClose(closers ...io.Closer) error {
+	for _, closer := range closers {
+		if err := closer.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Do not access this directly.
+//
+//go:embed bpf_bpfel.o
+var _BpfBytes []byte
--- a/firewall/interception/ebpf/exec/exec.go
+++ b/firewall/interception/ebpf/exec/exec.go
@@ -0,0 +1,246 @@
+package ebpf
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"runtime"
+	"runtime/debug"
+	"strings"
+	"sync"
+
+	"github.com/cilium/ebpf/link"
+	"github.com/cilium/ebpf/ringbuf"
+	"github.com/cilium/ebpf/rlimit"
+	"github.com/hashicorp/go-multierror"
+	"github.com/safing/portbase/log"
+	"golang.org/x/sys/unix"
+)
+
+//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -cflags "-O2 -g -Wall -Werror" bpf ../programs/exec.c
+
+// These constants are defined in `bpf/handler.c` and must be kept in sync.
+const (
+	arglen  = 32
+	argsize = 1024
+)
+
+var errTracerClosed = errors.New("tracer is closed")
+
+// event contains details about each exec call, sent from the eBPF program to
+// userspace through a perf ring buffer. This type must be kept in sync with
+// `event_t` in `bpf/handler.c`.
+type event struct {
+	// Details about the process being launched.
+	Filename [argsize]byte
+	Argv     [arglen][argsize]byte
+	Argc     uint32
+	UID      uint32
+	GID      uint32
+	PID      uint32
+
+	// Name of the calling process.
+	Comm [argsize]byte
+}
+
+// Event contains data about each exec event with many fields for easy
+// filtering and logging.
+type Event struct {
+	Filename string `json:"filename"`
+	// Argv contains the raw argv supplied to the process, including argv[0]
+	// (which is equal to `filepath.Base(e.Filename)` in most circumstances).
+	Argv []string `json:"argv"`
+	// Truncated is true if we were unable to read all process arguments into
+	// Argv because there were more than ARGLEN arguments.
+	Truncated bool `json:"truncated"`
+
+	// These values are of the new process. Keep in mind that the exec call may
+	// fail and the PID will be released in such a case.
+	PID uint32 `json:"pid"`
+	UID uint32 `json:"uid"`
+	GID uint32 `json:"gid"`
+
+	// Comm is the "name" of the parent process, usually the filename of the
+	// executable (but not always).
+	Comm string `json:"comm"`
+}
+
+type Tracer struct {
+	objs bpfObjects
+	tp   link.Link
+	rb   *ringbuf.Reader
+
+	closeLock sync.Mutex
+	closed    chan struct{}
+}
+
+// New instantiates all of the BPF objects into the running kernel, starts
+// tracing, and returns the created Tracer. After calling this successfully, the
+// caller should immediately attach a for loop running `h.Read()`.
+//
+// The returned Tracer MUST be closed when not needed anymore otherwise kernel
+// resources may be leaked.
+func New() (*Tracer, error) {
+	t := &Tracer{
+		tp: nil,
+		rb: nil,
+
+		closeLock: sync.Mutex{},
+		closed:    make(chan struct{}),
+	}
+
+	if err := loadBpfObjects(&t.objs, nil); err != nil {
+		return nil, fmt.Errorf("ebpf: failed to load ebpf object: %w", err)
+	}
+
+	if err := t.start(); err != nil {
+		// Best effort.
+		_ = t.Close()
+		return nil, fmt.Errorf("start tracer: %w", err)
+	}
+
+	// It could be very bad if someone forgot to close this, so we'll try to
+	// detect when it doesn't get closed and log a warning.
+	stack := debug.Stack()
+	runtime.SetFinalizer(t, func(t *Tracer) {
+		err := t.Close()
+		if errors.Is(err, errTracerClosed) {
+			return
+		}
+
+		log.Infof("tracer was finalized but was not closed, created at: %s", stack)
+		log.Infof("tracers must be closed when finished with to avoid leaked kernel resources")
+		if err != nil {
+			log.Errorf("closing tracer failed: %+v", err)
+		}
+	})
+
+	return t, nil
+}
+
+// start loads the eBPF programs and maps into the kernel and starts them.
+// You should immediately attach a for loop running `h.Read()` after calling
+// this successfully.
+func (t *Tracer) start() error {
+	// If we don't startup successfully, we need to make sure all of the
+	// stuff is cleaned up properly or we'll be leaking kernel resources.
+	ok := false
+	defer func() {
+		if !ok {
+			// Best effort.
+			_ = t.Close()
+		}
+	}()
+
+	// Allow the current process to lock memory for eBPF resources. This
+	// does nothing on 5.11+ kernels which don't need this.
+	err := rlimit.RemoveMemlock()
+	if err != nil {
+		return fmt.Errorf("remove memlock: %w", err)
+	}
+
+	// Attach the eBPF program to the `sys_enter_execve` tracepoint, which
+	// is triggered at the beginning of each `execve()` syscall.
+	t.tp, err = link.Tracepoint("syscalls", "sys_enter_execve", t.objs.EnterExecve, nil)
+	if err != nil {
+		return fmt.Errorf("open tracepoint: %w", err)
+	}
+
+	// Create the reader for the event ringbuf.
+	t.rb, err = ringbuf.NewReader(t.objs.PmExecMap)
+	if err != nil {
+		return fmt.Errorf("open ringbuf reader: %w", err)
+	}
+
+	ok = true
+	return nil
+}
+
+// Read reads an event from the eBPF program via the ringbuf, parses it and
+// returns it. If the *tracer is closed during the blocked call, and error that
+// wraps io.EOF will be returned.
+func (t *Tracer) Read() (*Event, error) {
+	rb := t.rb
+	if rb == nil {
+		return nil, errors.New("ringbuf reader is not initialized, tracer may not be open or may have been closed")
+	}
+
+	record, err := rb.Read()
+	if err != nil {
+		if errors.Is(err, ringbuf.ErrClosed) {
+			return nil, fmt.Errorf("tracer closed: %w", io.EOF)
+		}
+
+		return nil, fmt.Errorf("read from ringbuf: %w", err)
+	}
+
+	// Parse the ringbuf event entry into an event structure.
+	var rawEvent event
+	err = binary.Read(bytes.NewBuffer(record.RawSample), binary.NativeEndian, &rawEvent)
+	if err != nil {
+		return nil, fmt.Errorf("parse raw ringbuf entry into event struct: %w", err)
+	}
+
+	ev := &Event{
+		Filename:  unix.ByteSliceToString(rawEvent.Filename[:]),
+		Argv:      []string{}, // populated below
+		Truncated: rawEvent.Argc == arglen+1,
+		PID:       rawEvent.PID,
+		UID:       rawEvent.UID,
+		GID:       rawEvent.GID,
+		Comm:      unix.ByteSliceToString(rawEvent.Comm[:]),
+	}
+
+	// Copy only the args we're allowed to read from the array. If we read more
+	// than rawEvent.Argc, we could be copying non-zeroed memory.
+	argc := int(rawEvent.Argc)
+	if argc > arglen {
+		argc = arglen
+	}
+	for i := 0; i < argc; i++ {
+		str := unix.ByteSliceToString(rawEvent.Argv[i][:])
+		if strings.TrimSpace(str) != "" {
+			ev.Argv = append(ev.Argv, str)
+		}
+	}
+
+	return ev, nil
+}
+
+// Close gracefully closes and frees all resources associated with the eBPF
+// tracepoints, maps and other resources. Any blocked `Read()` operations will
+// return an error that wraps `io.EOF`.
+func (t *Tracer) Close() error {
+	t.closeLock.Lock()
+	defer t.closeLock.Unlock()
+	select {
+	case <-t.closed:
+		return errTracerClosed
+	default:
+	}
+	close(t.closed)
+	runtime.SetFinalizer(t, nil)
+
+	// Close everything started in h.Start() in reverse order.
+	var merr error
+	if t.rb != nil {
+		err := t.rb.Close()
+		if err != nil {
+			merr = multierror.Append(merr, fmt.Errorf("close ringbuf reader: %w", err))
+		}
+	}
+	if t.tp != nil {
+		err := t.tp.Close()
+		if err != nil {
+			merr = multierror.Append(merr, fmt.Errorf("close tracepoint: %w", err))
+		}
+	}
+	err := t.objs.Close()
+	if err != nil {
+		merr = multierror.Append(merr, fmt.Errorf("close eBPF objects: %w", err))
+	}
+
+	return merr
+}
--- a/firewall/interception/ebpf/programs/exec.c
+++ b/firewall/interception/ebpf/programs/exec.c
@@ -0,0 +1,116 @@
+#include "vmlinux-x86.h"
+#include "bpf/bpf_helpers.h"
+#include "bpf/bpf_tracing.h"
+
+#define ARGLEN  32   // maximum amount of args in argv we'll copy
+#define ARGSIZE 1024 // maximum byte length of each arg in argv we'll copy
+
+char __license[] SEC("license") = "GPL";
+
+// Ring buffer for all connection events
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 24);
+} pm_exec_map SEC(".maps");
+
+// This struct is defined according to
+// /sys/kernel/debug/tracing/events/syscalls/sys_enter_execve/format
+struct exec_info {
+	u16 common_type;            // offset=0,  size=2
+	u8  common_flags;           // offset=2,  size=1
+	u8  common_preempt_count;   // offset=3,  size=1
+	s32 common_pid;             // offset=4,  size=4
+
+	s32             syscall_nr; // offset=8,  size=4
+	u32             pad;        // offset=12, size=4 (pad)
+	const u8        *filename;  // offset=16, size=8 (ptr)
+	const u8 *const *argv;      // offset=24, size=8 (ptr)
+	const u8 *const *envp;      // offset=32, size=8 (ptr)
+};
+
+// The event struct. This struct must be kept in sync with the Golang
+// counterpart.
+struct event_t {
+	// Details about the process being launched.
+	u8  filename[ARGSIZE];
+	u8  argv[ARGLEN][ARGSIZE];
+	u32 argc; // set to ARGLEN + 1 if there were more than ARGLEN arguments
+	u32 uid;
+	u32 gid;
+	u32 pid;
+
+	// Name of the calling process.
+	u8  comm[ARGSIZE];
+};
+
+// Tracepoint at the top of execve() syscall.
+SEC("tracepoint/syscalls/sys_enter_execve")
+s32 enter_execve(struct exec_info *ctx) {
+	// Reserve memory for our event on the `events` ring buffer defined above.
+	struct event_t *event;
+	event = bpf_ringbuf_reserve(&pm_exec_map, sizeof(struct event_t), 0);
+	if (!event) {
+		bpf_printk("could not reserve ringbuf memory");
+
+		return 1;
+	}
+
+	// Store process/calling process details.
+	u64 uidgid = bpf_get_current_uid_gid();
+	u64 pidtgid = bpf_get_current_pid_tgid();
+	event->uid = uidgid;       // uid is the first 32 bits
+	event->gid = uidgid >> 32; // gid is the last 32 bits NOLINT(readability-magic-numbers)
+	event->pid = pidtgid;      // pid is the first 32 bits
+	s32 ret = bpf_get_current_comm(&event->comm, sizeof(event->comm));
+	if (ret) {
+		bpf_printk("could not get current comm: %d", ret);
+		bpf_ringbuf_discard(event, 0);
+		return 1;
+	}
+
+	// Write the filename in addition to argv[0] because the filename contains
+	// the full path to the file which could be more useful in some situations.
+	ret = bpf_probe_read_user_str(event->filename, sizeof(event->filename), ctx->filename);
+	if (ret < 0) {
+		bpf_printk("could not read filename into event struct: %d", ret);
+		bpf_ringbuf_discard(event, 0);
+		return 1;
+	}
+
+	// Copy everything from ctx->argv to event->argv, incrementing event->argc
+	// as we go.
+	for (s32 i = 0; i < ARGLEN; i++) {
+		if (!(&ctx->argv[i])) {
+			goto out;
+		}
+
+		// Copying the arg into it's own variable before copying it into
+		// event->argv[i] prevents memory corruption.
+		const u8 *argp = NULL;
+		ret = bpf_probe_read_user(&argp, sizeof(argp), &ctx->argv[i]);
+		if (ret || !argp) {
+			goto out;
+		}
+
+		// Copy argp to event->argv[i].
+		ret = bpf_probe_read_user_str(event->argv[i], sizeof(event->argv[i]), argp);
+		if (ret < 0) {
+			bpf_printk("read argv %d: %d", i, ret);
+			goto out;
+		}
+
+		event->argc++;
+	}
+
+	// This won't get hit if we `goto out` in the loop above. This is to signify
+	// to userspace that we couldn't copy all of the arguments because it
+	// exceeded ARGLEN.
+	event->argc++;
+
+out:
+	// Write the event to the ring buffer and notify userspace. This will cause
+	// the `Read()` call in userspace to return if it was blocked.
+	bpf_ringbuf_submit(event, 0);
+
+	return 0;
+}