Add support for cmdline matching and add basic interpreter support

This commit is contained in:
Patrick Pacher
2022-10-10 15:28:57 +02:00
committed by Daniel
parent b3007b71db
commit 77c0d954a9
11 changed files with 338 additions and 66 deletions

1
go.mod
View File

@@ -10,6 +10,7 @@ require (
github.com/ghodss/yaml v1.0.0
github.com/godbus/dbus/v5 v5.1.0
github.com/google/gopacket v1.1.19
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/hashicorp/go-multierror v1.1.1
github.com/hashicorp/go-version v1.6.0
github.com/jackc/puddle/v2 v2.0.0-beta.1

2
go.sum
View File

@@ -92,6 +92,8 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=

View File

@@ -193,6 +193,20 @@ func (db *Database) ApplyMigrations() error {
return fmt.Errorf("failed to create schema: %w", err)
}
// create a few indexes
indexes := []string{
`CREATE INDEX profile_id_index ON %s (profile)`,
`CREATE INDEX started_time_index ON %s (strftime('%%s', started)+0)`,
`CREATE INDEX started_ended_time_index ON %s (strftime('%%s', started)+0, strftime('%%s', ended)+0) WHERE ended IS NOT NULL`,
}
for _, idx := range indexes {
stmt := fmt.Sprintf(idx, db.Schema.Name)
if err := sqlitex.ExecuteTransient(db.writeConn, stmt, nil); err != nil {
return fmt.Errorf("failed to create index: %q: %w", idx, err)
}
}
return nil
}

View File

@@ -101,6 +101,7 @@ func (m *module) start() error {
if err != nil {
return fmt.Errorf("failed to subscribe to network tree: %w", err)
}
defer close(m.feed)
defer func() {
_ = sub.Cancel()
}()
@@ -162,7 +163,6 @@ func (m *module) start() error {
}
func (m *module) stop() error {
close(m.feed)
return nil
}

6
package-lock.json generated Normal file
View File

@@ -0,0 +1,6 @@
{
"name": "workspace",
"lockfileVersion": 2,
"requires": true,
"packages": {}
}

View File

@@ -68,6 +68,15 @@ type Process struct {
ExecHashes map[string]string
}
func (p *Process) GetTag(tagID string) (profile.Tag, bool) {
for _, t := range p.Tags {
if t.Key == tagID {
return t, true
}
}
return profile.Tag{}, false
}
// Profile returns the assigned layered profile.
func (p *Process) Profile() *profile.LayeredProfile {
if p == nil {
@@ -226,11 +235,13 @@ func loadProcess(ctx context.Context, pid int) (*Process, error) {
_, process.ExecName = filepath.Split(process.Path)
// Current working directory
// net yet implemented for windows
// new.Cwd, err = pInfo.Cwd()
// if err != nil {
// log.Warningf("process: failed to get Cwd: %w", err)
// }
// not yet implemented for windows
if runtime.GOOS != "windows" {
process.Cwd, err = pInfo.Cwd()
if err != nil {
log.Warningf("process: failed to get Cwd: %w", err)
}
}
// Command line arguments
process.CmdLine, err = pInfo.CmdlineWithContext(ctx)
@@ -292,3 +303,6 @@ func (md *MatchingData) Path() string { return md.p.Path }
// MatchingPath returns process.MatchingPath.
func (md *MatchingData) MatchingPath() string { return md.p.MatchingPath }
// Cmdline returns the command line of the process.
func (md *MatchingData) Cmdline() string { return md.p.CmdLine }

View File

@@ -69,22 +69,21 @@ func (h *AppImageHandler) AddTags(p *process.Process) {
// CreateProfile creates a profile based on the tags of the process.
// Returns nil to skip.
func (h *AppImageHandler) CreateProfile(p *process.Process) *profile.Profile {
for _, tag := range p.Tags {
if tag.Key == appImagePathTagKey {
return profile.New(&profile.Profile{
Source: profile.SourceLocal,
Name: osdetail.GenerateBinaryNameFromPath(tag.Value),
PresentationPath: p.Path,
UsePresentationPath: true,
Fingerprints: []profile.Fingerprint{
{
Type: profile.FingerprintTypePathID,
Operation: profile.FingerprintOperationEqualsID,
Value: tag.Value, // Value of appImagePathTagKey.
},
if tag, ok := p.GetTag(appImagePathTagKey); ok {
return profile.New(&profile.Profile{
Source: profile.SourceLocal,
Name: osdetail.GenerateBinaryNameFromPath(tag.Value),
PresentationPath: p.Path,
UsePresentationPath: true,
Fingerprints: []profile.Fingerprint{
{
Type: profile.FingerprintTypePathID,
Operation: profile.FingerprintOperationEqualsID,
Value: tag.Value, // Value of appImagePathTagKey.
},
})
}
},
})
}
return nil
}

View File

@@ -0,0 +1,220 @@
package tags
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"unicode/utf8"
"github.com/google/shlex"
"github.com/safing/portmaster/process"
"github.com/safing/portmaster/profile"
)
func init() {
if err := process.RegisterTagHandler(new(InterpHandler)); err != nil {
panic(err)
}
}
type interpType struct {
process.TagDescription
Regex *regexp.Regexp
}
var knownInterperters = []interpType{
{
TagDescription: process.TagDescription{
ID: "python-script",
Name: "Python Script",
},
Regex: regexp.MustCompile(`^(/usr)?/bin/python[23]\.[0-9]+$`),
},
{
TagDescription: process.TagDescription{
ID: "shell-script",
Name: "Shell Script",
},
Regex: regexp.MustCompile(`^(/usr)?/bin/(ba|k|z|a)?sh$`),
},
{
TagDescription: process.TagDescription{
ID: "perl-script",
Name: "Perl Script",
},
Regex: regexp.MustCompile(`^(/usr)?/bin/perl$`),
},
{
TagDescription: process.TagDescription{
ID: "ruby-script",
Name: "Ruby Script",
},
Regex: regexp.MustCompile(`^(/usr)?/bin/ruby$`),
},
{
TagDescription: process.TagDescription{
ID: "nodejs-script",
Name: "NodeJS Script",
},
Regex: regexp.MustCompile(`^(/usr)?/bin/node(js)?$`),
},
/*
While similar to nodejs, electron is a bit harder as it uses a multiple processes
like Chromium and thus a interpreter match on them will but those processes into
different groups.
I'm still not sure how this could work in the future. Maybe processes should try to
inherit the profile of the parents if there is no profile that matches the current one....
{
TagDescription: process.TagDescription{
ID: "electron-app",
Name: "Electron App",
},
Regex: regexp.MustCompile(`^(/usr)?/bin/electron([0-9]+)?$`),
},
*/
}
func fileMustBeUTF8(path string) bool {
f, err := os.Open(path)
if err != nil {
return false
}
defer f.Close()
// read the first chunk of bytes
buf := new(bytes.Buffer)
size, _ := io.CopyN(buf, f, 128)
if size == 0 {
return false
}
b := buf.Bytes()[:size]
for len(b) > 0 {
r, runeSize := utf8.DecodeRune(b)
if r == utf8.RuneError {
return false
}
b = b[runeSize:]
}
return true
}
type InterpHandler struct{}
func (h *InterpHandler) Name() string {
return "Interpreter"
}
func (h *InterpHandler) TagDescriptions() []process.TagDescription {
l := make([]process.TagDescription, len(knownInterperters))
for idx, it := range knownInterperters {
l[idx] = it.TagDescription
}
return l
}
func (h *InterpHandler) CreateProfile(p *process.Process) *profile.Profile {
for _, it := range knownInterperters {
if tag, ok := p.GetTag(it.ID); ok {
// we can safely ignore the error
args, err := shlex.Split(p.CmdLine)
if err != nil {
// this should not happen since we already called shlex.Split()
// when adding the tag. Though, make the linter happy and bail out
return nil
}
// if arg0 is the interpreter name itself strip it away
// and use the next one
if it.Regex.MatchString(args[0]) && len(args) > 1 {
args = args[1:]
}
return profile.New(&profile.Profile{
Source: profile.SourceLocal,
Name: fmt.Sprintf("%s: %s", it.Name, args[0]),
PresentationPath: tag.Value,
UsePresentationPath: true,
Fingerprints: []profile.Fingerprint{
{
Type: profile.FingerprintTypeTagID,
Operation: profile.FingerprintOperationEqualsID,
Key: it.ID,
Value: tag.Value,
},
},
})
}
}
return nil
}
func (h *InterpHandler) AddTags(p *process.Process) {
// check if we have a matching interpreter
var matched interpType
for _, it := range knownInterperters {
if it.Regex.MatchString(p.Path) {
matched = it
}
}
// zero value means we did not find any interpreter matches.
if matched.ID == "" {
return
}
args, err := shlex.Split(p.CmdLine)
if err != nil {
// give up if we failed to parse the command line
return
}
// if args[0] matches the interpreter name we expect
// the second arg to be a file-name
if matched.Regex.MatchString(args[0]) {
if len(args) == 1 {
// there's no argument given, this is likely an interactive
// interpreter session
return
}
filePath := args[1]
if !filepath.IsAbs(filePath) {
filePath = filepath.Join(
p.Cwd,
filePath,
)
}
// TODO(ppacher): there could be some other arguments as well
// so it may be better to scan the whole command line for a path to a UTF8
// file and use that one.
if !fileMustBeUTF8(filePath) {
return
}
p.Tags = append(p.Tags, profile.Tag{
Key: matched.ID,
Value: filePath,
})
return
}
// we know that this process is interpreted by some known interpreter but args[0]
// does not contain the path to the interpreter.
p.Tags = append(p.Tags, profile.Tag{
Key: matched.ID,
Value: args[0],
})
}

View File

@@ -77,24 +77,23 @@ func (h *SVCHostTagHandler) AddTags(p *process.Process) {
// CreateProfile creates a profile based on the tags of the process.
// Returns nil to skip.
func (h *SVCHostTagHandler) CreateProfile(p *process.Process) *profile.Profile {
for _, tag := range p.Tags {
if tag.Key == svchostTagKey {
return profile.New(&profile.Profile{
Source: profile.SourceLocal,
Name: "Windows Service: " + osdetail.GenerateBinaryNameFromPath(tag.Value),
Icon: `C:\Windows\System32\@WLOGO_48x48.png`,
IconType: profile.IconTypeFile,
UsePresentationPath: false,
Fingerprints: []profile.Fingerprint{
profile.Fingerprint{
Type: profile.FingerprintTypeTagID,
Key: tag.Key,
Operation: profile.FingerprintOperationEqualsID,
Value: tag.Value,
},
if tag, ok := p.GetTag(svchostTagKey); ok {
return profile.New(&profile.Profile{
Source: profile.SourceLocal,
Name: "Windows Service: " + osdetail.GenerateBinaryNameFromPath(tag.Value),
Icon: `C:\Windows\System32\@WLOGO_48x48.png`,
IconType: profile.IconTypeFile,
UsePresentationPath: false,
Fingerprints: []profile.Fingerprint{
profile.Fingerprint{
Type: profile.FingerprintTypeTagID,
Key: tag.Key,
Operation: profile.FingerprintOperationEqualsID,
Value: tag.Value,
},
})
}
},
})
}
return nil
}

View File

@@ -101,23 +101,22 @@ func (h *WinStoreHandler) AddTags(p *process.Process) {
// CreateProfile creates a profile based on the tags of the process.
// Returns nil to skip.
func (h *WinStoreHandler) CreateProfile(p *process.Process) *profile.Profile {
for _, tag := range p.Tags {
if tag.Key == winStoreAppNameTagKey {
return profile.New(&profile.Profile{
Source: profile.SourceLocal,
Name: osdetail.GenerateBinaryNameFromPath(tag.Value),
PresentationPath: p.Path,
UsePresentationPath: true,
Fingerprints: []profile.Fingerprint{
{
Type: profile.FingerprintTypeTagID,
Key: tag.Key,
Operation: profile.FingerprintOperationEqualsID,
Value: tag.Value, // Value of appImagePathTagKey.
},
if tag, ok := p.GetTag(winStoreAppNameTagKey); ok {
return profile.New(&profile.Profile{
Source: profile.SourceLocal,
Name: osdetail.GenerateBinaryNameFromPath(tag.Value),
PresentationPath: p.Path,
UsePresentationPath: true,
Fingerprints: []profile.Fingerprint{
{
Type: profile.FingerprintTypeTagID,
Key: tag.Key,
Operation: profile.FingerprintOperationEqualsID,
Value: tag.Value, // Value of appImagePathTagKey.
},
})
}
},
})
}
return nil
}

View File

@@ -11,10 +11,11 @@ import (
// There are three levels:
//
// 1. Type: What matched?
// 1. Tag: 40.000 points
// 2. Env: 30.000 points
// 3. MatchingPath: 20.000 points
// 4. Path: 10.000 points
// 1. Tag: 50.000 points
// 2. Cmdline: 40.000 points
// 3. Env: 30.000 points
// 4. MatchingPath: 20.000 points
// 5. Path: 10.000 points
// 2. Operation: How was it mached?
// 1. Equals: 3.000 points
// 2. Prefix: 2.000 points
@@ -32,15 +33,17 @@ import (
// Fingerprint Type IDs.
const (
FingerprintTypeTagID = "tag"
FingerprintTypeEnvID = "env"
FingerprintTypePathID = "path" // Matches both MatchingPath and Path.
FingerprintTypeTagID = "tag"
FingerprintTypeCmdlineID = "cmdline"
FingerprintTypeEnvID = "env"
FingerprintTypePathID = "path" // Matches both MatchingPath and Path.
FingerprintOperationEqualsID = "equals"
FingerprintOperationPrefixID = "prefix"
FingerprintOperationRegexID = "regex"
tagMatchBaseScore = 40_000
tagMatchBaseScore = 50_000
cmdlineMatchBaseScore = 40_000
envMatchBaseScore = 30_000
matchingPathMatchBaseScore = 20_000
pathMatchBaseScore = 10_000
@@ -75,6 +78,7 @@ type (
Env() map[string]string
Path() string
MatchingPath() string
Cmdline() string
}
matchingFingerprint interface {
@@ -155,9 +159,10 @@ func (fp fingerprintRegex) Match(value string) (score int) {
}
type parsedFingerprints struct {
tagPrints []matchingFingerprint
envPrints []matchingFingerprint
pathPrints []matchingFingerprint
tagPrints []matchingFingerprint
envPrints []matchingFingerprint
pathPrints []matchingFingerprint
cmdlinePrints []matchingFingerprint
}
func parseFingerprints(raw []Fingerprint, deprecatedLinkedPath string) (parsed *parsedFingerprints, firstErr error) {
@@ -187,7 +192,7 @@ func parseFingerprints(raw []Fingerprint, deprecatedLinkedPath string) (parsed *
}
continue
}
case FingerprintTypePathID:
case FingerprintTypePathID, FingerprintTypeCmdlineID:
// Don't need a key.
default:
// Unknown type.
@@ -236,6 +241,8 @@ func (parsed *parsedFingerprints) addMatchingFingerprint(fp Fingerprint, matchin
parsed.envPrints = append(parsed.envPrints, matchingPrint)
case FingerprintTypePathID:
parsed.pathPrints = append(parsed.pathPrints, matchingPrint)
case FingerprintTypeCmdlineID:
parsed.cmdlinePrints = append(parsed.cmdlinePrints, matchingPrint)
default:
// This should never happen, as the types are checked already.
panic(fmt.Sprintf("unknown fingerprint type: %q", fp.Type))
@@ -265,6 +272,17 @@ func MatchFingerprints(prints *parsedFingerprints, md MatchingData) (highestScor
return tagMatchBaseScore + highestScore
}
cmdline := md.Cmdline()
for _, cmdlinePrint := range prints.cmdlinePrints {
if score := cmdlinePrint.Match(cmdline); score > highestScore {
highestScore = score
}
}
if highestScore > 0 {
return cmdlineMatchBaseScore + highestScore
}
// Check env.
for _, envPrint := range prints.envPrints {
for key, value := range md.Env() {