wip: migrate to mono-repo. SPN has already been moved to spn/

This commit is contained in:
Patrick Pacher
2024-03-15 11:55:13 +01:00
parent b30fd00ccf
commit 8579430db9
577 changed files with 35981 additions and 818 deletions

View File

@@ -0,0 +1,103 @@
package intel
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/miekg/dns"
"github.com/safing/portbase/log"
"github.com/safing/portmaster/service/nameserver/nsutil"
)
// ListMatch represents an entity that has been
// matched against filterlists.
type ListMatch struct {
Entity string
ActiveLists []string
InactiveLists []string
}
func (lm *ListMatch) String() string {
inactive := ""
if len(lm.InactiveLists) > 0 {
inactive = " and in deactivated lists " + strings.Join(lm.InactiveLists, ", ")
}
return fmt.Sprintf(
"%s in activated lists %s%s",
lm.Entity,
strings.Join(lm.ActiveLists, ","),
inactive,
)
}
// ListBlockReason is a list of list matches.
type ListBlockReason []ListMatch
func (br ListBlockReason) String() string {
if len(br) == 0 {
return ""
}
matches := make([]string, len(br))
for idx, lm := range br {
matches[idx] = lm.String()
}
return strings.Join(matches, " and ")
}
// Context returns br wrapped into a map. It implements
// the endpoints.Reason interface.
func (br ListBlockReason) Context() interface{} {
return br
}
// MarshalJSON marshals the list block reason into a map
// prefixed with filterlists.
func (br ListBlockReason) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
// we convert to []ListMatch to avoid recursing
// here.
"filterlists": []ListMatch(br),
})
}
// GetExtraRRs implements the nsutil.RRProvider interface
// and adds additional TXT records justifying the reason
// the request was blocked.
func (br ListBlockReason) GetExtraRRs(ctx context.Context, _ *dns.Msg) []dns.RR {
rrs := make([]dns.RR, 0, len(br))
for _, lm := range br {
blockedBy, err := nsutil.MakeMessageRecord(log.InfoLevel, fmt.Sprintf(
"%s is blocked by filter lists %s",
lm.Entity,
strings.Join(lm.ActiveLists, ", "),
))
if err == nil {
rrs = append(rrs, blockedBy)
} else {
log.Tracer(ctx).Errorf("intel: failed to create TXT RR for block reason: %s", err)
}
if len(lm.InactiveLists) > 0 {
wouldBeBlockedBy, err := nsutil.MakeMessageRecord(log.InfoLevel, fmt.Sprintf(
"%s would be blocked by filter lists %s",
lm.Entity,
strings.Join(lm.InactiveLists, ", "),
))
if err == nil {
rrs = append(rrs, wouldBeBlockedBy)
} else {
log.Tracer(ctx).Errorf("intel: failed to create TXT RR for block reason: %s", err)
}
}
}
return rrs
}
var _ nsutil.RRProvider = ListBlockReason(nil)

View File

@@ -0,0 +1,55 @@
package customlists
import (
"github.com/safing/portbase/config"
)
var (
// CfgOptionCustomListFileKey is the config key for custom filter list file.
CfgOptionCustomListFileKey = "filter/customListFile"
cfgOptionCustomListFileOrder = 35
cfgOptionCustomListCategoryAnnotation = "Filter Lists"
)
var getFilePath config.StringOption
func registerConfig() error {
help := `The file (.txt) is checked every couple minutes and will be automatically reloaded when it has changed.
Entries (one per line) may be one of:
- Domain: "example.com"
- IP Address: "10.0.0.1"
- Country Code (based on IP): "US"
- AS (Autonomous System): "AS1234"
Everything after the first element of a line, comments starting with a '#', and empty lines are ignored.
The settings "Block Subdomains of Filter List Entries" and "Block Domain Aliases" also apply to the custom filter list.
Lists in the "Hosts" format are not supported.
Please note that the custom filter list is fully loaded into memory. This can have a negative impact on your device if big lists are loaded.`
// Register a setting for the file path in the ui
err := config.Register(&config.Option{
Name: "Custom Filter List",
Key: CfgOptionCustomListFileKey,
Description: "Specify the file path to a custom filter list (.txt), which will be automatically refreshed. Any connections matching a domain, IP address, Country or ASN in the file will be blocked.",
Help: help,
OptType: config.OptTypeString,
ExpertiseLevel: config.ExpertiseLevelExpert,
ReleaseLevel: config.ReleaseLevelStable,
DefaultValue: "",
RequiresRestart: false,
Annotations: config.Annotations{
config.DisplayOrderAnnotation: cfgOptionCustomListFileOrder,
config.CategoryAnnotation: cfgOptionCustomListCategoryAnnotation,
config.DisplayHintAnnotation: config.DisplayHintFilePicker,
},
})
if err != nil {
return err
}
getFilePath = config.GetAsString(CfgOptionCustomListFileKey, "")
return nil
}

View File

@@ -0,0 +1,195 @@
package customlists
import (
"bufio"
"fmt"
"net"
"os"
"strconv"
"strings"
"github.com/miekg/dns"
"github.com/safing/portbase/log"
"github.com/safing/portbase/notifications"
"github.com/safing/portmaster/service/network/netutils"
)
var (
countryCodesFilterList map[string]struct{}
ipAddressesFilterList map[string]struct{}
autonomousSystemsFilterList map[uint]struct{}
domainsFilterList map[string]struct{}
)
const (
rationForInvalidLinesUntilWarning = 0.1
parseStatusNotificationID = "customlists:parse-status"
parseWarningNotificationID = "customlists:parse-warning"
zeroIPNotificationID = "customlists:too-many-zero-ips"
)
func initFilterLists() {
countryCodesFilterList = make(map[string]struct{})
ipAddressesFilterList = make(map[string]struct{})
autonomousSystemsFilterList = make(map[uint]struct{})
domainsFilterList = make(map[string]struct{})
}
// IsLoaded returns whether a custom filter list is loaded.
func IsLoaded() bool {
filterListLock.RLock()
defer filterListLock.RUnlock()
switch {
case len(domainsFilterList) > 0:
return true
case len(ipAddressesFilterList) > 0:
return true
case len(countryCodesFilterList) > 0:
return true
case len(autonomousSystemsFilterList) > 0:
return true
default:
return false
}
}
func parseFile(filePath string) error {
// Reset all maps, previous (if any) settings will be lost.
for key := range countryCodesFilterList {
delete(countryCodesFilterList, key)
}
for key := range ipAddressesFilterList {
delete(ipAddressesFilterList, key)
}
for key := range autonomousSystemsFilterList {
delete(autonomousSystemsFilterList, key)
}
for key := range domainsFilterList {
delete(domainsFilterList, key)
}
// Ignore empty file path.
if filePath == "" {
return nil
}
// Open the file if possible
file, err := os.Open(filePath)
if err != nil {
log.Warningf("intel/customlists: failed to parse file %s", err)
module.Warning(parseWarningNotificationID, "Failed to open custom filter list", err.Error())
return err
}
defer func() { _ = file.Close() }()
var allLinesCount uint64
var invalidLinesCount uint64
// Read filter file line by line.
scanner := bufio.NewScanner(file)
// The scanner will error out if the line is greater than 64K, in this case it is enough.
for scanner.Scan() {
allLinesCount++
// Parse and count invalid lines (comment, empty lines, zero IPs...)
if !parseLine(scanner.Text()) {
invalidLinesCount++
}
}
// Check for scanner error.
if err := scanner.Err(); err != nil {
return err
}
invalidLinesRation := float32(invalidLinesCount) / float32(allLinesCount)
if invalidLinesRation > rationForInvalidLinesUntilWarning {
log.Warning("intel/customlists: Too many invalid lines")
module.Warning(zeroIPNotificationID, "Custom filter list has many invalid lines",
fmt.Sprintf(`%d out of %d lines are invalid.
Check if you are using the correct file format and if the path to the custom filter list is correct.`, invalidLinesCount, allLinesCount))
} else {
module.Resolve(zeroIPNotificationID)
}
allEntriesCount := len(domainsFilterList) + len(ipAddressesFilterList) + len(autonomousSystemsFilterList) + len(countryCodesFilterList)
log.Infof("intel/customlists: loaded %d entries from %s", allEntriesCount, filePath)
notifications.NotifyInfo(parseStatusNotificationID,
"Custom filter list loaded successfully.",
fmt.Sprintf(`Custom filter list loaded from file %s:
%d Domains
%d IPs
%d Autonomous Systems
%d Countries`,
filePath,
len(domainsFilterList),
len(ipAddressesFilterList),
len(autonomousSystemsFilterList),
len(countryCodesFilterList)))
module.Resolve(parseWarningNotificationID)
return nil
}
func parseLine(line string) (valid bool) {
// Everything after the first field will be ignored.
fields := strings.Fields(line)
// Ignore empty lines.
if len(fields) == 0 {
return true // Not an entry, but a valid line.
}
field := fields[0]
// Ignore comments
if strings.HasPrefix(field, "#") {
return true // Not an entry, but a valid line.
}
// Go through all possible field types.
// Parsing is ordered by
// 1. Parsing options (ie. the domain has most variation and goes last.)
// 2. Speed
// Check if it'a a country code.
if isCountryCode(field) {
countryCodesFilterList[field] = struct{}{}
return true
}
// Check if it's a Autonomous system (example AS123).
if isAutonomousSystem(field) {
asNumber, err := strconv.ParseUint(field[2:], 10, 32)
if err != nil {
return false
}
autonomousSystemsFilterList[uint(asNumber)] = struct{}{}
return true
}
// Try to parse IP address.
ip := net.ParseIP(field)
if ip != nil {
// Check for zero ip.
if net.IP.Equal(ip, net.IPv4zero) || net.IP.Equal(ip, net.IPv6zero) {
return false
}
ipAddressesFilterList[ip.String()] = struct{}{}
return true
}
// Check if it's a domain.
domain := dns.Fqdn(field)
if netutils.IsValidFqdn(domain) {
domainsFilterList[domain] = struct{}{}
return true
}
return false
}

View File

@@ -0,0 +1,207 @@
package customlists
import (
"context"
"errors"
"net"
"os"
"regexp"
"strings"
"sync"
"time"
"golang.org/x/net/publicsuffix"
"github.com/safing/portbase/api"
"github.com/safing/portbase/modules"
)
var module *modules.Module
const (
configModuleName = "config"
configChangeEvent = "config change"
)
// Helper variables for parsing the input file.
var (
isCountryCode = regexp.MustCompile("^[A-Z]{2}$").MatchString
isAutonomousSystem = regexp.MustCompile(`^AS[0-9]+$`).MatchString
)
var (
filterListFilePath string
filterListFileModifiedTime time.Time
filterListLock sync.RWMutex
parserTask *modules.Task
// ErrNotConfigured is returned when updating the custom filter list, but it
// is not configured.
ErrNotConfigured = errors.New("custom filter list not configured")
)
func init() {
module = modules.Register("customlists", prep, start, nil, "base")
}
func prep() error {
initFilterLists()
// Register the config in the ui.
err := registerConfig()
if err != nil {
return err
}
// Register api endpoint for updating the filter list.
if err := api.RegisterEndpoint(api.Endpoint{
Path: "customlists/update",
Write: api.PermitUser,
BelongsTo: module,
ActionFunc: func(ar *api.Request) (msg string, err error) {
errCheck := checkAndUpdateFilterList()
if errCheck != nil {
return "", errCheck
}
return "Custom filter list loaded successfully.", nil
},
Name: "Update custom filter list",
Description: "Reload the filter list from the configured file.",
}); err != nil {
return err
}
return nil
}
func start() error {
// Register to hook to update after config change.
if err := module.RegisterEventHook(
configModuleName,
configChangeEvent,
"update custom filter list",
func(ctx context.Context, obj interface{}) error {
if err := checkAndUpdateFilterList(); !errors.Is(err, ErrNotConfigured) {
return err
}
return nil
},
); err != nil {
return err
}
// Create parser task and enqueue for execution. "checkAndUpdateFilterList" will schedule the next execution.
parserTask = module.NewTask("intel/customlists:file-update-check", func(context.Context, *modules.Task) error {
_ = checkAndUpdateFilterList()
return nil
}).Schedule(time.Now().Add(20 * time.Second))
return nil
}
func checkAndUpdateFilterList() error {
filterListLock.Lock()
defer filterListLock.Unlock()
// Get path and return error if empty
filePath := getFilePath()
if filePath == "" {
return ErrNotConfigured
}
// Schedule next update check
parserTask.Schedule(time.Now().Add(1 * time.Minute))
// Try to get file info
modifiedTime := time.Now()
if fileInfo, err := os.Stat(filePath); err == nil {
modifiedTime = fileInfo.ModTime()
}
// Check if file path has changed or if modified time has changed
if filterListFilePath != filePath || !filterListFileModifiedTime.Equal(modifiedTime) {
err := parseFile(filePath)
if err != nil {
return err
}
filterListFileModifiedTime = modifiedTime
filterListFilePath = filePath
}
return nil
}
// LookupIP checks if the IP address is in a custom filter list.
func LookupIP(ip net.IP) bool {
filterListLock.RLock()
defer filterListLock.RUnlock()
_, ok := ipAddressesFilterList[ip.String()]
return ok
}
// LookupDomain checks if the Domain is in a custom filter list.
func LookupDomain(fullDomain string, filterSubdomains bool) (bool, string) {
filterListLock.RLock()
defer filterListLock.RUnlock()
if filterSubdomains {
// Check if domain is in the list and all its subdomains.
listOfDomains := splitDomain(fullDomain)
for _, domain := range listOfDomains {
_, ok := domainsFilterList[domain]
if ok {
return true, domain
}
}
} else {
// Check only if the domain is in the list
_, ok := domainsFilterList[fullDomain]
return ok, fullDomain
}
return false, ""
}
// LookupASN checks if the Autonomous system number is in a custom filter list.
func LookupASN(number uint) bool {
filterListLock.RLock()
defer filterListLock.RUnlock()
_, ok := autonomousSystemsFilterList[number]
return ok
}
// LookupCountry checks if the country code is in a custom filter list.
func LookupCountry(countryCode string) bool {
filterListLock.RLock()
defer filterListLock.RUnlock()
_, ok := countryCodesFilterList[countryCode]
return ok
}
func splitDomain(domain string) []string {
domain = strings.Trim(domain, ".")
suffix, _ := publicsuffix.PublicSuffix(domain)
if suffix == domain {
return []string{domain}
}
domainWithoutSuffix := domain[:len(domain)-len(suffix)]
domainWithoutSuffix = strings.Trim(domainWithoutSuffix, ".")
splitted := strings.FieldsFunc(domainWithoutSuffix, func(r rune) bool {
return r == '.'
})
domains := make([]string, 0, len(splitted))
for idx := range splitted {
d := strings.Join(splitted[idx:], ".") + "." + suffix
if d[len(d)-1] != '.' {
d += "."
}
domains = append(domains, d)
}
return domains
}

614
service/intel/entity.go Normal file
View File

@@ -0,0 +1,614 @@
package intel
import (
"context"
"fmt"
"net"
"sort"
"strings"
"sync"
"golang.org/x/net/publicsuffix"
"github.com/safing/portbase/log"
"github.com/safing/portmaster/service/intel/filterlists"
"github.com/safing/portmaster/service/intel/geoip"
"github.com/safing/portmaster/service/network/netutils"
)
// Entity describes a remote endpoint in many different ways.
// It embeddes a sync.Mutex but none of the endpoints own
// functions performs locking. The caller MUST ENSURE
// proper locking and synchronization when accessing
// any properties of Entity.
type Entity struct { //nolint:maligned
sync.Mutex
// lists exist for most entity information and
// we need to know which one we loaded
domainListLoaded bool
ipListLoaded bool
countryListLoaded bool
asnListLoaded bool
reverseResolveEnabled bool
resolveSubDomainLists bool
checkCNAMEs bool
// IP is the IP address of the connection. If domain is
// set, IP has been resolved by following all CNAMEs.
IP net.IP
// IPScope holds the network scope of the IP.
// For DNS requests, this signifies in which scope the DNS request was resolved.
IPScope netutils.IPScope
// Protocol is the protcol number used by the connection.
Protocol uint8
// Port is the remote port of the connection
Port uint16
// dstPort is the destination port of the connection
dstPort uint16
// Domain is the target domain of the connection.
Domain string
// ReverseDomain is the domain the IP address points to. This is only
// resolved and populated when needed.
ReverseDomain string
// CNAME is a list of domain names that have been
// resolved for Domain.
CNAME []string
// Country holds the country the IP address (ASN) is
// located in.
Country string
// Coordinates holds the approximate coordinates of the IP address.
Coordinates *geoip.Coordinates
// ASN holds the autonomous system number of the IP.
ASN uint
// ASOrg holds the owner's name of the autonomous system.
ASOrg string
// LocationError holds an error message if fetching the location failed.
LocationError string
location *geoip.Location
// BlockedByLists holds list source IDs that
// are used to block the entity.
BlockedByLists []string
// BlockedEntities holds a list of entities that
// have been blocked. Values can be used as a key
// for the ListOccurences map.
BlockedEntities []string
// ListOccurences is a map that matches an entity (Domain, IPs, ASN, Country, Sub-domain)
// to a list of sources where the entity has been observed in.
ListOccurences map[string][]string
// ListsError holds an error message if fetching the lists failed.
ListsError string
// we only load each data above at most once
fetchLocationOnce sync.Once
reverseResolveOnce sync.Once
loadDomainListOnce sync.Once
loadIPListOnce sync.Once
loadCountryListOnce sync.Once
loadAsnListOnce sync.Once
}
// Init initializes internal metadata about the entity.
// If the entity does not describe a destination, you can supply a different
// destination port for endpoint matching.
// It returns the entity itself for single line formatting.
func (e *Entity) Init(dstPort uint16) *Entity {
// Get IP scope.
if e.IP != nil {
e.IPScope = netutils.GetIPScope(e.IP)
} else {
e.IPScope = netutils.Undefined
}
// Set dst port to given value or fall back to entity.
if dstPort > 0 {
e.dstPort = dstPort
} else {
e.dstPort = e.Port
}
return e
}
// DstPort returns the destination port.
func (e *Entity) DstPort() uint16 {
return e.dstPort
}
// FetchData fetches additional information, meant to be called before persisting an entity record.
func (e *Entity) FetchData(ctx context.Context) {
e.getLocation(ctx)
e.getLists(ctx)
}
// ResetLists resets the current list data and forces
// all list sources to be re-acquired when calling GetLists().
func (e *Entity) ResetLists() {
// TODO(ppacher): our actual goal is to reset the domain
// list right now so we could be more efficient by keeping
// the other lists around.
e.BlockedByLists = nil
e.BlockedEntities = nil
e.ListOccurences = nil
e.domainListLoaded = false
e.ipListLoaded = false
e.countryListLoaded = false
e.asnListLoaded = false
e.resolveSubDomainLists = false
e.checkCNAMEs = false
e.loadDomainListOnce = sync.Once{}
e.loadIPListOnce = sync.Once{}
e.loadCountryListOnce = sync.Once{}
e.loadAsnListOnce = sync.Once{}
}
// ResolveSubDomainLists enables or disables list lookups for
// sub-domains.
func (e *Entity) ResolveSubDomainLists(ctx context.Context, enabled bool) {
if e.domainListLoaded && enabled != e.resolveSubDomainLists {
log.Tracer(ctx).Warningf("intel/filterlists: tried to change sub-domain resolving for %s but lists are already fetched", e.Domain)
}
e.resolveSubDomainLists = enabled
}
// EnableCNAMECheck enalbes or disables list lookups for
// entity CNAMEs.
func (e *Entity) EnableCNAMECheck(ctx context.Context, enabled bool) {
if e.domainListLoaded && enabled != e.checkCNAMEs {
log.Tracer(ctx).Warningf("intel/filterlists: tried to change CNAME resolving for %s but lists are already fetched", e.Domain)
}
e.checkCNAMEs = enabled
}
// CNAMECheckEnabled returns true if the entities CNAMEs should
// also be checked.
func (e *Entity) CNAMECheckEnabled() bool {
return e.checkCNAMEs
}
// Domain and IP
// EnableReverseResolving enables reverse resolving the domain from the IP on demand.
func (e *Entity) EnableReverseResolving() {
e.reverseResolveEnabled = true
}
func (e *Entity) reverseResolve(ctx context.Context) {
e.reverseResolveOnce.Do(func() {
// need IP!
if e.IP == nil {
return
}
// reverse resolve
if reverseResolver == nil {
return
}
// TODO: security level
domain, err := reverseResolver(ctx, e.IP.String())
if err != nil {
log.Tracer(ctx).Warningf("intel: failed to resolve IP %s: %s", e.IP, err)
return
}
e.ReverseDomain = domain
})
}
// GetDomain returns the domain and whether it is set.
func (e *Entity) GetDomain(ctx context.Context, mayUseReverseDomain bool) (string, bool) {
if mayUseReverseDomain && e.reverseResolveEnabled {
e.reverseResolve(ctx)
if e.ReverseDomain == "" {
return "", false
}
return e.ReverseDomain, true
}
if e.Domain == "" {
return "", false
}
return e.Domain, true
}
// GetIP returns the IP and whether it is set.
func (e *Entity) GetIP() (net.IP, bool) {
if e.IP == nil {
return nil, false
}
return e.IP, true
}
// Location
func (e *Entity) getLocation(ctx context.Context) {
e.fetchLocationOnce.Do(func() {
// Only check if we have a global IP address.
if e.IP == nil || !e.IPScope.IsGlobal() {
return
}
// get location data
loc, err := geoip.GetLocation(e.IP)
if err != nil {
log.Tracer(ctx).Warningf("intel: failed to get location data for %s: %s", e.IP, err)
e.LocationError = err.Error()
return
}
e.location = loc
e.Country = loc.Country.Code
e.Coordinates = &loc.Coordinates
e.ASN = loc.AutonomousSystemNumber
e.ASOrg = loc.AutonomousSystemOrganization
// Log result.
if log.GetLogLevel() == log.TraceLevel {
// Build flags
var flags string
if loc.IsAnycast {
flags += " anycast"
}
if loc.IsSatelliteProvider {
flags += " satellite"
}
if loc.IsAnonymousProxy {
flags += " anonymous"
}
// Log location
log.Tracer(ctx).Tracef(
"intel: located %s in %s (%s), as part of AS%d by %s%s",
e.IP,
loc.Country.Name,
loc.Country.Code,
loc.AutonomousSystemNumber,
loc.AutonomousSystemOrganization,
flags,
)
}
})
}
// GetLocation returns the raw location data and whether it is set.
func (e *Entity) GetLocation(ctx context.Context) (*geoip.Location, bool) {
e.getLocation(ctx)
if e.location == nil {
return nil, false
}
return e.location, true
}
// GetCountry returns the two letter ISO country code and whether it is set.
func (e *Entity) GetCountry(ctx context.Context) (string, bool) {
e.getLocation(ctx)
if e.LocationError != "" {
return "", false
}
return e.Country, true
}
// GetCountryInfo returns the two letter ISO country code and whether it is set.
func (e *Entity) GetCountryInfo(ctx context.Context) *geoip.CountryInfo {
e.getLocation(ctx)
if e.LocationError != "" {
return nil
}
return &e.location.Country
}
// GetASN returns the AS number and whether it is set.
func (e *Entity) GetASN(ctx context.Context) (uint, bool) {
e.getLocation(ctx)
if e.LocationError != "" {
return 0, false
}
return e.ASN, true
}
// Lists
func (e *Entity) getLists(ctx context.Context) {
e.getDomainLists(ctx)
e.getASNLists(ctx)
e.getIPLists(ctx)
e.getCountryLists(ctx)
}
func (e *Entity) mergeList(key string, list []string) {
if len(list) == 0 {
return
}
if e.ListOccurences == nil {
e.ListOccurences = make(map[string][]string)
}
e.ListOccurences[key] = mergeStringList(e.ListOccurences[key], list)
}
func (e *Entity) getDomainLists(ctx context.Context) {
if e.domainListLoaded {
return
}
domain, ok := e.GetDomain(ctx, false /* mayUseReverseDomain */)
if !ok {
return
}
e.loadDomainListOnce.Do(func() {
domainsToInspect := []string{domain}
if e.checkCNAMEs && len(e.CNAME) > 0 {
log.Tracer(ctx).Tracef("intel: CNAME filtering enabled, checking %v too", e.CNAME)
domainsToInspect = append(domainsToInspect, e.CNAME...)
}
var domains []string
if e.resolveSubDomainLists {
for _, domain := range domainsToInspect {
subdomains := splitDomain(domain)
domains = append(domains, subdomains...)
}
} else {
domains = domainsToInspect
}
domains = makeDistinct(domains)
for _, d := range domains {
list, err := filterlists.LookupDomain(d)
if err != nil {
log.Tracer(ctx).Errorf("intel: failed to get domain blocklists for %s: %s", d, err)
e.ListsError = err.Error()
return
}
if len(list) > 0 {
log.Tracer(ctx).Tracef("intel: loaded domain lists for %s: %s", d, strings.Join(list, ", "))
e.mergeList(d, list)
}
}
e.domainListLoaded = true
})
}
func splitDomain(domain string) []string {
domain = strings.Trim(domain, ".")
suffix, _ := publicsuffix.PublicSuffix(domain)
if suffix == domain {
return []string{domain}
}
domainWithoutSuffix := domain[:len(domain)-len(suffix)]
domainWithoutSuffix = strings.Trim(domainWithoutSuffix, ".")
splitted := strings.FieldsFunc(domainWithoutSuffix, func(r rune) bool {
return r == '.'
})
domains := make([]string, 0, len(splitted))
for idx := range splitted {
d := strings.Join(splitted[idx:], ".") + "." + suffix
if d[len(d)-1] != '.' {
d += "."
}
domains = append(domains, d)
}
return domains
}
func (e *Entity) getASNLists(ctx context.Context) {
if e.asnListLoaded {
return
}
asn, ok := e.GetASN(ctx)
if !ok || asn == 0 {
return
}
e.loadAsnListOnce.Do(func() {
asnStr := fmt.Sprintf("%d", asn)
list, err := filterlists.LookupASNString(asnStr)
if err != nil {
log.Tracer(ctx).Errorf("intel: failed to get ASN blocklist for %d: %s", asn, err)
e.ListsError = err.Error()
return
}
if len(list) > 0 {
log.Tracer(ctx).Tracef("intel: loaded ASN lists for %s: %s", asnStr, strings.Join(list, ", "))
e.mergeList(asnStr, list)
}
e.asnListLoaded = true
})
}
func (e *Entity) getCountryLists(ctx context.Context) {
if e.countryListLoaded {
return
}
country, ok := e.GetCountry(ctx)
if !ok || country == "" {
return
}
e.loadCountryListOnce.Do(func() {
list, err := filterlists.LookupCountry(country)
if err != nil {
log.Tracer(ctx).Errorf("intel: failed to load country blocklist for %s: %s", country, err)
e.ListsError = err.Error()
return
}
if len(list) > 0 {
log.Tracer(ctx).Tracef("intel: loaded country lists for %s: %s", country, strings.Join(list, ", "))
e.mergeList(country, list)
}
e.countryListLoaded = true
})
}
func (e *Entity) getIPLists(ctx context.Context) {
if e.ipListLoaded {
return
}
ip, ok := e.GetIP()
if !ok || ip == nil {
return
}
// only load lists for IP addresses that are classified as global.
if !e.IPScope.IsGlobal() {
return
}
e.loadIPListOnce.Do(func() {
list, err := filterlists.LookupIP(ip)
if err != nil {
log.Tracer(ctx).Errorf("intel: failed to get IP blocklist for %s: %s", ip.String(), err)
e.ListsError = err.Error()
return
}
if len(list) > 0 {
log.Tracer(ctx).Tracef("intel: loaded IP lists for %s: %s", ip.String(), strings.Join(list, ", "))
e.mergeList(ip.String(), list)
}
e.ipListLoaded = true
})
}
// LoadLists searches all filterlists for all occurrences of
// this entity.
func (e *Entity) LoadLists(ctx context.Context) {
e.getLists(ctx)
}
// MatchLists matches the entities lists against a slice
// of source IDs and updates various entity properties
// like BlockedByLists, ListOccurences and BlockedEntitites.
func (e *Entity) MatchLists(lists []string) bool {
if len(lists) == 0 {
return false
}
e.BlockedByLists = nil
e.BlockedEntities = nil
lm := makeMap(lists)
for key, keyLists := range e.ListOccurences {
for _, keyListID := range keyLists {
if _, ok := lm[keyListID]; ok {
e.BlockedByLists = append(e.BlockedByLists, keyListID)
e.BlockedEntities = append(e.BlockedEntities, key)
}
}
}
e.BlockedByLists = makeDistinct(e.BlockedByLists)
e.BlockedEntities = makeDistinct(e.BlockedEntities)
return len(e.BlockedByLists) > 0
}
// ListBlockReason returns the block reason for this entity.
func (e *Entity) ListBlockReason() ListBlockReason {
blockedBy := make([]ListMatch, len(e.BlockedEntities))
lm := makeMap(e.BlockedByLists)
for idx, blockedEntity := range e.BlockedEntities {
if entityLists, ok := e.ListOccurences[blockedEntity]; ok {
var activeLists []string
var inactiveLists []string
for _, l := range entityLists {
if _, ok := lm[l]; ok {
activeLists = append(activeLists, l)
} else {
inactiveLists = append(inactiveLists, l)
}
}
blockedBy[idx] = ListMatch{
Entity: blockedEntity,
ActiveLists: activeLists,
InactiveLists: inactiveLists,
}
}
}
return blockedBy
}
func mergeStringList(a, b []string) []string {
listMap := make(map[string]struct{})
for _, s := range a {
listMap[s] = struct{}{}
}
for _, s := range b {
listMap[s] = struct{}{}
}
res := make([]string, 0, len(listMap))
for s := range listMap {
res = append(res, s)
}
sort.Strings(res)
return res
}
func makeDistinct(slice []string) []string {
m := make(map[string]struct{}, len(slice))
result := make([]string, 0, len(slice))
for _, v := range slice {
if _, ok := m[v]; ok {
continue
}
m[v] = struct{}{}
result = append(result, v)
}
return result
}
func makeMap(slice []string) map[string]struct{} {
lm := make(map[string]struct{})
for _, v := range slice {
lm[v] = struct{}{}
}
return lm
}

View File

@@ -0,0 +1,218 @@
package filterlists
import (
"encoding/hex"
"fmt"
"strings"
"sync"
"github.com/tannerryan/ring"
"github.com/safing/portbase/database/record"
"github.com/safing/portbase/log"
)
var defaultFilter = newScopedBloom()
// scopedBloom is a wrapper around a bloomfilter implementation
// providing scoped filters for different entity types.
type scopedBloom struct {
rw sync.RWMutex
domain *ring.Ring
asn *ring.Ring
country *ring.Ring
ipv4 *ring.Ring
ipv6 *ring.Ring
}
func newScopedBloom() *scopedBloom {
mustInit := func(size int) *ring.Ring {
f, err := ring.Init(size, bfFalsePositiveRate)
if err != nil {
// we panic here as those values cannot be controlled
// by the user and invalid values shouldn't be
// in a release anyway.
panic("Invalid bloom filter parameters!")
}
return f
}
return &scopedBloom{
domain: mustInit(domainBfSize),
asn: mustInit(asnBfSize),
country: mustInit(countryBfSize),
ipv4: mustInit(ipv4BfSize),
ipv6: mustInit(ipv6BfSize),
}
}
func (bf *scopedBloom) getBloomForType(entityType string) (*ring.Ring, error) {
var r *ring.Ring
switch strings.ToLower(entityType) {
case "domain":
r = bf.domain
case "asn":
r = bf.asn
case "ipv4":
r = bf.ipv4
case "ipv6":
r = bf.ipv6
case "country":
r = bf.country
default:
return nil, fmt.Errorf("unsupported filterlists entity type %q", entityType)
}
return r, nil
}
func (bf *scopedBloom) add(scope, value string) {
bf.rw.Lock()
defer bf.rw.Unlock()
r, err := bf.getBloomForType(scope)
if err != nil {
// If we don't have a bloom filter for that scope
// we are probably running an older version that does
// not have support for it. We just drop the value
// as a call to Test() for that scope will always
// return "true"
log.Warningf("failed to add unknown entity type %q with value %q", scope, value)
return
}
r.Add([]byte(value))
}
func (bf *scopedBloom) test(scope, value string) bool {
bf.rw.RLock()
defer bf.rw.RUnlock()
r, err := bf.getBloomForType(scope)
if err != nil {
log.Warningf("testing for unknown entity type %q", scope)
return true // simulate a match to the caller
}
return r.Test([]byte(value))
}
func (bf *scopedBloom) loadFromCache() error {
bf.rw.Lock()
defer bf.rw.Unlock()
if err := loadBloomFromCache(bf.domain, "domain"); err != nil {
return err
}
if err := loadBloomFromCache(bf.asn, "asn"); err != nil {
return err
}
if err := loadBloomFromCache(bf.country, "country"); err != nil {
return err
}
if err := loadBloomFromCache(bf.ipv4, "ipv4"); err != nil {
return err
}
if err := loadBloomFromCache(bf.ipv6, "ipv6"); err != nil {
return err
}
return nil
}
func (bf *scopedBloom) saveToCache() error {
bf.rw.RLock()
defer bf.rw.RUnlock()
if err := saveBloomToCache(bf.domain, "domain"); err != nil {
return err
}
if err := saveBloomToCache(bf.asn, "asn"); err != nil {
return err
}
if err := saveBloomToCache(bf.country, "country"); err != nil {
return err
}
if err := saveBloomToCache(bf.ipv4, "ipv4"); err != nil {
return err
}
if err := saveBloomToCache(bf.ipv6, "ipv6"); err != nil {
return err
}
return nil
}
func (bf *scopedBloom) replaceWith(other *scopedBloom) {
bf.rw.Lock()
defer bf.rw.Unlock()
other.rw.RLock()
defer other.rw.RUnlock()
bf.domain = other.domain
bf.asn = other.asn
bf.country = other.country
bf.ipv4 = other.ipv4
bf.ipv6 = other.ipv6
}
type bloomFilterRecord struct {
record.Base
sync.Mutex
Filter string
}
// loadBloomFromCache loads the bloom filter stored under scope
// into bf.
func loadBloomFromCache(bf *ring.Ring, scope string) error {
r, err := cache.Get(makeBloomCacheKey(scope))
if err != nil {
return err
}
var filterRecord *bloomFilterRecord
if r.IsWrapped() {
filterRecord = new(bloomFilterRecord)
if err := record.Unwrap(r, filterRecord); err != nil {
return err
}
} else {
var ok bool
filterRecord, ok = r.(*bloomFilterRecord)
if !ok {
return fmt.Errorf("invalid type, expected bloomFilterRecord but got %T", r)
}
}
blob, err := hex.DecodeString(filterRecord.Filter)
if err != nil {
return err
}
if err := bf.UnmarshalBinary(blob); err != nil {
return err
}
return nil
}
// saveBloomToCache saves the bitset of the bloomfilter bf
// in the cache db.
func saveBloomToCache(bf *ring.Ring, scope string) error {
blob, err := bf.MarshalBinary()
if err != nil {
return err
}
filter := hex.EncodeToString(blob)
r := &bloomFilterRecord{
Filter: filter,
}
r.SetKey(makeBloomCacheKey(scope))
return cache.Put(r)
}

View File

@@ -0,0 +1,67 @@
package filterlists
import (
"fmt"
"sync"
"github.com/hashicorp/go-version"
"github.com/safing/portbase/database"
"github.com/safing/portbase/database/record"
)
const resetVersion = "v0.6.0"
type cacheVersionRecord struct {
record.Base
sync.Mutex
Version string
Reset string
}
// getCacheDatabaseVersion reads and returns the cache
// database version record.
func getCacheDatabaseVersion() (*version.Version, error) {
r, err := cache.Get(filterListCacheVersionKey)
if err != nil {
return nil, err
}
var verRecord *cacheVersionRecord
if r.IsWrapped() {
verRecord = new(cacheVersionRecord)
if err := record.Unwrap(r, verRecord); err != nil {
return nil, err
}
} else {
var ok bool
verRecord, ok = r.(*cacheVersionRecord)
if !ok {
return nil, fmt.Errorf("invalid type, expected cacheVersionRecord but got %T", r)
}
}
if verRecord.Reset != resetVersion {
return nil, database.ErrNotFound
}
ver, err := version.NewSemver(verRecord.Version)
if err != nil {
return nil, err
}
return ver, nil
}
// setCacheDatabaseVersion updates the cache database
// version record to ver.
func setCacheDatabaseVersion(ver string) error {
verRecord := &cacheVersionRecord{
Version: ver,
Reset: resetVersion,
}
verRecord.SetKey(filterListCacheVersionKey)
return cache.Put(verRecord)
}

View File

@@ -0,0 +1,240 @@
package filterlists
import (
"context"
"fmt"
"os"
"sort"
"strings"
"sync"
"time"
"golang.org/x/sync/errgroup"
"github.com/safing/portbase/database"
"github.com/safing/portbase/database/record"
"github.com/safing/portbase/log"
"github.com/safing/portbase/updater"
"github.com/safing/portmaster/service/updates"
)
const (
baseListFilePath = "intel/lists/base.dsdl"
intermediateListFilePath = "intel/lists/intermediate.dsdl"
urgentListFilePath = "intel/lists/urgent.dsdl"
listIndexFilePath = "intel/lists/index.dsd"
)
// default bloomfilter element sizes (estimated).
const (
domainBfSize = 1000000
asnBfSize = 1000
countryBfSize = 100
ipv4BfSize = 100
ipv6BfSize = 100
)
const bfFalsePositiveRate = 0.001
var (
filterListLock sync.RWMutex
// Updater files for tracking upgrades.
baseFile *updater.File
intermediateFile *updater.File
urgentFile *updater.File
filterListsLoaded chan struct{}
)
var cache = database.NewInterface(&database.Options{
Local: true,
Internal: true,
CacheSize: 2 ^ 8,
})
// getFileFunc is the function used to get a file from
// the updater. It's basically updates.GetFile and used
// for unit testing.
type getFileFunc func(string) (*updater.File, error)
// getFile points to updates.GetFile but may be set to
// something different during unit testing.
var getFile getFileFunc = updates.GetFile
func init() {
filterListsLoaded = make(chan struct{})
}
// isLoaded returns true if the filterlists have been
// loaded.
func isLoaded() bool {
select {
case <-filterListsLoaded:
return true
default:
return false
}
}
// processListFile opens the latest version of file and decodes it's DSDL
// content. It calls processEntry for each decoded filterlists entry.
func processListFile(ctx context.Context, filter *scopedBloom, file *updater.File) error {
f, err := os.Open(file.Path())
if err != nil {
return err
}
defer func() {
_ = f.Close()
}()
values := make(chan *listEntry, 100)
records := make(chan record.Record, 100)
g, ctx := errgroup.WithContext(ctx)
// startSafe runs fn inside the error group but wrapped
// in recovered function.
startSafe := func(fn func() error) {
g.Go(func() (err error) {
defer func() {
if x := recover(); x != nil {
if e, ok := x.(error); ok {
err = e
} else {
err = fmt.Errorf("%v", x)
}
}
}()
err = fn()
return err
})
}
startSafe(func() (err error) {
defer close(values)
err = decodeFile(ctx, f, values)
return
})
startSafe(func() error {
defer close(records)
for entry := range values {
if err := processEntry(ctx, filter, entry, records); err != nil {
return err
}
}
return nil
})
persistRecords(startSafe, records)
return g.Wait()
}
func persistRecords(startJob func(func() error), records <-chan record.Record) {
var cnt int
start := time.Now()
logProgress := func() {
if cnt == 0 {
// protection against panic
return
}
timePerEntity := time.Since(start) / time.Duration(cnt)
speed := float64(time.Second) / float64(timePerEntity)
log.Debugf("processed %d entities in %s with %s / entity (%.2f entities/second)", cnt, time.Since(start), timePerEntity, speed)
}
batch := database.NewInterface(&database.Options{Local: true, Internal: true})
var processBatch func() error
processBatch = func() error {
batchPut := batch.PutMany("cache")
for r := range records {
if err := batchPut(r); err != nil {
return err
}
cnt++
if cnt%10000 == 0 {
logProgress()
}
if cnt%1000 == 0 {
if err := batchPut(nil); err != nil {
return err
}
startJob(processBatch)
return nil
}
}
// log final batch
if cnt%10000 != 0 { // avoid duplicate logging
logProgress()
}
return batchPut(nil)
}
startJob(processBatch)
}
func normalizeEntry(entry *listEntry) {
switch strings.ToLower(entry.Type) { //
case "domain":
entry.Entity = strings.ToLower(entry.Entity)
if entry.Entity[len(entry.Entity)-1] != '.' {
// ensure domains from the filter list are fully qualified and end in dot.
entry.Entity += "."
}
default:
}
}
func processEntry(ctx context.Context, filter *scopedBloom, entry *listEntry, records chan<- record.Record) error {
normalizeEntry(entry)
// Only add the entry to the bloom filter if it has any sources.
if len(entry.Resources) > 0 {
filter.add(entry.Type, entry.Entity)
}
r := &entityRecord{
Value: entry.Entity,
Type: entry.Type,
Sources: entry.getSources(),
UpdatedAt: time.Now().Unix(),
}
// If the entry is a "delete" update, actually delete it to save space.
if entry.Whitelist {
r.CreateMeta()
r.Meta().Delete()
}
key := makeListCacheKey(strings.ToLower(r.Type), r.Value)
r.SetKey(key)
select {
case records <- r:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func mapKeys(m map[string]struct{}) []string {
sl := make([]string, 0, len(m))
for s := range m {
sl = append(sl, s)
}
sort.Strings(sl)
return sl
}

View File

@@ -0,0 +1,143 @@
package filterlists
import (
"compress/gzip"
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/safing/portbase/formats/dsd"
"github.com/safing/portbase/utils"
)
type listEntry struct {
Type string `json:"type"`
Entity string `json:"entity"`
Whitelist bool `json:"whitelist"`
Resources []entryResource `json:"resources"`
}
type entryResource struct {
SourceID string `json:"sourceID"`
ResourceID string `json:"resourceID"`
}
func (entry *listEntry) getSources() (sourceIDs []string) {
sourceIDs = make([]string, 0, len(entry.Resources))
for _, resource := range entry.Resources {
if !utils.StringInSlice(sourceIDs, resource.SourceID) {
sourceIDs = append(sourceIDs, resource.SourceID)
}
}
return
}
// decodeFile decodes a DSDL filterlists file and sends decoded entities to
// ch. It blocks until all list entries have been consumed or ctx is cancelled.
func decodeFile(ctx context.Context, r io.Reader, ch chan<- *listEntry) error {
compressed, format, err := parseHeader(r)
if err != nil {
return fmt.Errorf("failed to parser header: %w", err)
}
if compressed {
r, err = gzip.NewReader(r)
if err != nil {
return fmt.Errorf("failed to open gzip reader: %w", err)
}
}
// we need a reader that supports io.ByteReader
reader := &byteReader{r}
var entryCount int
for {
entryCount++
length, readErr := binary.ReadUvarint(reader)
if readErr != nil {
if errors.Is(readErr, io.EOF) {
return nil
}
return fmt.Errorf("failed to load varint entity length: %w", readErr)
}
blob := make([]byte, length)
_, readErr = io.ReadFull(reader, blob)
if readErr != nil {
if errors.Is(readErr, io.EOF) {
// there shouldn't be an EOF here because
// we actually got a length above. Return
// ErrUnexpectedEOF instead of just EOF.
// io.ReadFull already returns ErrUnexpectedEOF
// if it failed to read blob as a whole but my
// return io.EOF if it read exactly 0 bytes.
readErr = io.ErrUnexpectedEOF
}
return readErr
}
// we don't really care about the format here but it must be
// something that can encode/decode complex structures like
// JSON, BSON or GenCode. So LoadAsFormat MUST return the value
// passed as the third parameter. String or RAW encoding IS AN
// error here.
entry := &listEntry{}
err := dsd.LoadAsFormat(blob, format, entry)
if err != nil {
return fmt.Errorf("failed to decoded DSD encoded entity: %w", err)
}
select {
case ch <- entry:
case <-ctx.Done():
return ctx.Err()
}
}
}
func parseHeader(r io.Reader) (compressed bool, format byte, err error) {
var listHeader [1]byte
if _, err = r.Read(listHeader[:]); err != nil {
// if we have an error here we can safely abort because
// the file must be broken
return
}
if listHeader[0] != dsd.LIST {
err = fmt.Errorf("unexpected file type: %d (%c), expected dsd list", listHeader[0], listHeader[0])
return
}
var compression [1]byte
if _, err = r.Read(compression[:]); err != nil {
// same here, a DSDL file must have at least 2 bytes header
return
}
if compression[0] == dsd.GZIP {
compressed = true
var formatSlice [1]byte
if _, err = r.Read(formatSlice[:]); err != nil {
return
}
format = formatSlice[0]
return
}
format = compression[0]
return // nolint:nakedret
}
// byteReader extends an io.Reader to implement the ByteReader interface.
type byteReader struct{ io.Reader }
func (br *byteReader) ReadByte() (byte, error) {
var b [1]byte
_, err := br.Read(b[:])
return b[0], err
}

View File

@@ -0,0 +1,288 @@
package filterlists
import (
"errors"
"fmt"
"os"
"strings"
"sync"
"github.com/safing/portbase/database"
"github.com/safing/portbase/database/record"
"github.com/safing/portbase/formats/dsd"
"github.com/safing/portbase/log"
"github.com/safing/portbase/updater"
"github.com/safing/portmaster/service/updates"
)
// the following definitions are copied from the intelhub repository
// and stripped down to only include data required by portmaster.
// Category is used to group different list sources by the type
// of entity they are blocking. Categories may be nested using
// the Parent field.
type Category struct {
// ID is a unique ID for the category. For sub-categories
// this ID must be used in the Parent field of any directly
// nesteded categories.
ID string `json:"id"`
// Parent may hold the ID of another category. If set, this
// category is made a sub-category of it's parent.
Parent string `json:"parent,omitempty"`
// Name is a human readable name for the category and can
// be used in user interfaces.
Name string `json:"name"`
// Description is a human readable description that may be
// displayed in user interfaces.
Description string `json:"description,omitempty"`
}
// Source defines an external filterlists source.
type Source struct {
// ID is a unique ID for the source. Entities always reference the
// sources they have been observed in using this ID. Refer to the
// Entry struct for more information.
ID string `json:"id"`
// Name is a human readable name for the source and can be used
// in user interfaces.
Name string `json:"name"`
// Description may hold a human readable description for the source.
// It may be used in user interfaces.
Description string `json:"description"`
// Type describes the type of entities the source provides. Refer
// to the Type definition for more information and well-known types.
Type string `json:"type"`
// URL points to the filterlists file.
URL string `json:"url"`
// Category holds the unique ID of a category the source belongs to. Since
// categories can be nested the source is automatically part of all categories
// in the hierarchy. Refer to the Category struct for more information.
Category string `json:"category"`
// Website may holds the URL of the source maintainers website.
Website string `json:"website,omitempty"`
// License holds the license that is used for the source.
License string `json:"license"`
// Contribute may hold an opaque string that informs a user on how to
// contribute to the source. This may be a URL or mail address.
Contribute string `json:"contribute"`
}
// ListIndexFile describes the structure of the released list
// index file.
type ListIndexFile struct {
record.Base
sync.RWMutex
Version string `json:"version"`
SchemaVersion string `json:"schemaVersion"`
Categories []Category `json:"categories"`
Sources []Source `json:"sources"`
}
func (index *ListIndexFile) getCategorySources(id string) []string {
ids := make(map[string]struct{})
// find all sources that match against cat
for _, s := range index.Sources {
if s.Category == id {
ids[s.ID] = struct{}{}
}
}
// find all child-categories recursing into getCategorySources.
for _, c := range index.Categories {
if c.Parent == id {
for _, sid := range index.getCategorySources(c.ID) {
ids[sid] = struct{}{}
}
}
}
return mapKeys(ids)
}
func (index *ListIndexFile) getSourcesMatching(id string) []string {
// if id is already a source ID we just return it
for _, s := range index.Sources {
if s.ID == id {
return []string{s.ID}
}
}
// otherwise we need to check the category tree
return index.getCategorySources(id)
}
func (index *ListIndexFile) getDistictSourceIDs(ids ...string) []string {
index.RLock()
defer index.RUnlock()
distinctIDs := make(map[string]struct{})
for _, id := range ids {
for _, sid := range index.getSourcesMatching(id) {
distinctIDs[sid] = struct{}{}
}
}
return mapKeys(distinctIDs)
}
func getListIndexFromCache() (*ListIndexFile, error) {
r, err := cache.Get(filterListIndexKey)
if err != nil {
return nil, err
}
var index *ListIndexFile
if r.IsWrapped() {
index = new(ListIndexFile)
if err := record.Unwrap(r, index); err != nil {
return nil, err
}
} else {
var ok bool
index, ok = r.(*ListIndexFile)
if !ok {
return nil, fmt.Errorf("invalid type, expected ListIndexFile but got %T", r)
}
}
return index, nil
}
var (
// listIndexUpdate must only be used by updateListIndex.
listIndexUpdate *updater.File
listIndexUpdateLock sync.Mutex
)
func updateListIndex() error {
listIndexUpdateLock.Lock()
defer listIndexUpdateLock.Unlock()
// Check if an update is needed.
switch {
case listIndexUpdate == nil:
// This is the first time this function is run, get updater file for index.
var err error
listIndexUpdate, err = updates.GetFile(listIndexFilePath)
if err != nil {
return err
}
// Check if the version in the cache is current.
index, err := getListIndexFromCache()
switch {
case errors.Is(err, database.ErrNotFound):
log.Info("filterlists: index not in cache, starting update")
case err != nil:
log.Warningf("filterlists: failed to load index from cache, starting update: %s", err)
case !listIndexUpdate.EqualsVersion(strings.TrimPrefix(index.Version, "v")):
log.Infof(
"filterlists: index from cache is outdated, starting update (%s != %s)",
strings.TrimPrefix(index.Version, "v"),
listIndexUpdate.Version(),
)
default:
// List is in cache and current, there is nothing to do.
log.Debug("filterlists: index is up to date")
// Update the unbreak filter list IDs on initial load.
updateUnbreakFilterListIDs()
return nil
}
case listIndexUpdate.UpgradeAvailable():
log.Info("filterlists: index update available, starting update")
default:
// Index is loaded and no update is available, there is nothing to do.
return nil
}
// Update list index from updates.
blob, err := os.ReadFile(listIndexUpdate.Path())
if err != nil {
return err
}
index := &ListIndexFile{}
_, err = dsd.Load(blob, index)
if err != nil {
return err
}
index.SetKey(filterListIndexKey)
if err := cache.Put(index); err != nil {
return err
}
log.Debugf("intel/filterlists: updated list index in cache to %s", index.Version)
// Update the unbreak filter list IDs after an update.
updateUnbreakFilterListIDs()
return nil
}
// ResolveListIDs resolves a slice of source or category IDs into
// a slice of distinct source IDs.
func ResolveListIDs(ids []string) ([]string, error) {
index, err := getListIndexFromCache()
if err != nil {
if errors.Is(err, database.ErrNotFound) {
if err := updateListIndex(); err != nil {
return nil, err
}
// retry resolving IDs
return ResolveListIDs(ids)
}
log.Errorf("failed to resolved ids %v: %s", ids, err)
return nil, err
}
resolved := index.getDistictSourceIDs(ids...)
log.Debugf("intel/filterlists: resolved ids %v to %v", ids, resolved)
return resolved, nil
}
var (
unbreakCategoryIDs = []string{"UNBREAK"}
unbreakIDs []string
unbreakIDsLock sync.Mutex
)
// GetUnbreakFilterListIDs returns the resolved list of all unbreak filter lists.
func GetUnbreakFilterListIDs() []string {
unbreakIDsLock.Lock()
defer unbreakIDsLock.Unlock()
return unbreakIDs
}
func updateUnbreakFilterListIDs() {
unbreakIDsLock.Lock()
defer unbreakIDsLock.Unlock()
resolvedIDs, err := ResolveListIDs(unbreakCategoryIDs)
if err != nil {
log.Warningf("filter: failed to resolve unbreak filter list IDs: %s", err)
} else {
unbreakIDs = resolvedIDs
}
}

View File

@@ -0,0 +1,26 @@
package filterlists
const (
cacheDBPrefix = "cache:intel/filterlists"
// filterListCacheVersionKey is used to store the highest version
// of a filterlists file (base, intermediate or urgent) in the
// cache database. It's used to decide if the cache database and
// bloomfilters need to be resetted and rebuilt.
filterListCacheVersionKey = cacheDBPrefix + "/version"
// filterListIndexKey is used to store the filterlists index.
filterListIndexKey = cacheDBPrefix + "/index"
// filterListKeyPrefix is the prefix inside that cache database
// used for filter list entries.
filterListKeyPrefix = cacheDBPrefix + "/lists/"
)
func makeBloomCacheKey(scope string) string {
return cacheDBPrefix + "/bloom/" + scope
}
func makeListCacheKey(scope, key string) string {
return filterListKeyPrefix + scope + "/" + key
}

View File

@@ -0,0 +1,129 @@
package filterlists
import (
"errors"
"net"
"github.com/safing/portbase/database"
"github.com/safing/portbase/log"
)
// lookupBlockLists loads the entity record for key from
// cache and returns the list of blocklist sources the
// key is part of. It is not considered an error if
// key does not exist, instead, an empty slice is
// returned.
func lookupBlockLists(entity, value string) ([]string, error) {
key := makeListCacheKey(entity, value)
if !isLoaded() {
log.Warningf("intel/filterlists: not searching for %s because filterlists not loaded", key)
// filterLists have not yet been loaded so
// there's no point querying into the cache
// database.
return nil, nil
}
filterListLock.RLock()
defer filterListLock.RUnlock()
if !defaultFilter.test(entity, value) {
return nil, nil
}
// log.Debugf("intel/filterlists: searching for entries with %s", key)
entry, err := getEntityRecordByKey(key)
if err != nil {
if errors.Is(err, database.ErrNotFound) {
return nil, nil
}
log.Errorf("intel/filterlists: failed to get entries for key %s: %s", key, err)
return nil, err
}
return entry.Sources, nil
}
// LookupCountry returns a list of sources that mark the country
// as blocked. If country is not stored in the cache database
// a nil slice is returned.
func LookupCountry(country string) ([]string, error) {
return lookupBlockLists("country", country)
}
// LookupDomain returns a list of sources that mark the domain
// as blocked. If domain is not stored in the cache database
// a nil slice is returned. The caller is responsible for making
// sure that the given domain is valid and canonical.
func LookupDomain(domain string) ([]string, error) {
switch domain {
case "", ".":
// Return no lists for empty domains and the root zone.
return nil, nil
default:
return lookupBlockLists("domain", domain)
}
}
// LookupASNString returns a list of sources that mark the ASN
// as blocked. If ASN is not stored in the cache database
// a nil slice is returned.
func LookupASNString(asn string) ([]string, error) {
return lookupBlockLists("asn", asn)
}
// LookupIP returns a list of block sources that contain
// a reference to ip. LookupIP automatically checks the IPv4 or
// IPv6 lists respectively.
func LookupIP(ip net.IP) ([]string, error) {
if ip.To4() == nil {
return LookupIPv6(ip)
}
return LookupIPv4(ip)
}
// LookupIPString is like LookupIP but accepts an IPv4 or
// IPv6 address in their string representations.
func LookupIPString(ipStr string) ([]string, error) {
ip := net.ParseIP(ipStr)
if ip == nil {
return nil, errors.New("invalid IP")
}
return LookupIP(ip)
}
// LookupIPv4String returns a list of block sources that
// contain a reference to ip. If the IP is not stored in the
// cache database a nil slice is returned.
func LookupIPv4String(ipv4 string) ([]string, error) {
return lookupBlockLists("ipv4", ipv4)
}
// LookupIPv4 is like LookupIPv4String but accepts a net.IP.
func LookupIPv4(ipv4 net.IP) ([]string, error) {
ip := ipv4.To4()
if ip == nil {
return nil, errors.New("invalid IPv4")
}
return LookupIPv4String(ip.String())
}
// LookupIPv6String returns a list of block sources that
// contain a reference to ip. If the IP is not stored in the
// cache database a nil slice is returned.
func LookupIPv6String(ipv6 string) ([]string, error) {
return lookupBlockLists("ipv6", ipv6)
}
// LookupIPv6 is like LookupIPv6String but accepts a net.IP.
func LookupIPv6(ipv6 net.IP) ([]string, error) {
ip := ipv6.To16()
if ip == nil {
return nil, errors.New("invalid IPv6")
}
return LookupIPv6String(ip.String())
}

View File

@@ -0,0 +1,110 @@
package filterlists
import (
"context"
"fmt"
"github.com/tevino/abool"
"github.com/safing/portbase/log"
"github.com/safing/portbase/modules"
"github.com/safing/portmaster/service/netenv"
"github.com/safing/portmaster/service/updates"
)
var module *modules.Module
const (
filterlistsDisabled = "filterlists:disabled"
filterlistsUpdateFailed = "filterlists:update-failed"
filterlistsStaleDataSurvived = "filterlists:staledata"
)
// booleans mainly used to decouple the module
// during testing.
var (
ignoreUpdateEvents = abool.New()
ignoreNetEnvEvents = abool.New()
)
func init() {
ignoreNetEnvEvents.Set()
module = modules.Register("filterlists", prep, start, stop, "base", "updates")
}
func prep() error {
if err := module.RegisterEventHook(
updates.ModuleName,
updates.ResourceUpdateEvent,
"Check for blocklist updates",
func(ctx context.Context, _ interface{}) error {
if ignoreUpdateEvents.IsSet() {
return nil
}
return tryListUpdate(ctx)
},
); err != nil {
return fmt.Errorf("failed to register resource update event handler: %w", err)
}
if err := module.RegisterEventHook(
netenv.ModuleName,
netenv.OnlineStatusChangedEvent,
"Check for blocklist updates",
func(ctx context.Context, _ interface{}) error {
if ignoreNetEnvEvents.IsSet() {
return nil
}
// Nothing to do if we went offline.
if !netenv.Online() {
return nil
}
return tryListUpdate(ctx)
},
); err != nil {
return fmt.Errorf("failed to register online status changed event handler: %w", err)
}
return nil
}
func start() error {
filterListLock.Lock()
defer filterListLock.Unlock()
ver, err := getCacheDatabaseVersion()
if err == nil {
log.Debugf("intel/filterlists: cache database has version %s", ver.String())
if err = defaultFilter.loadFromCache(); err != nil {
err = fmt.Errorf("failed to initialize bloom filters: %w", err)
}
}
if err != nil {
log.Debugf("intel/filterlists: blocklists disabled, waiting for update (%s)", err)
warnAboutDisabledFilterLists()
} else {
log.Debugf("intel/filterlists: using cache database")
close(filterListsLoaded)
}
return nil
}
func stop() error {
filterListsLoaded = make(chan struct{})
return nil
}
func warnAboutDisabledFilterLists() {
module.Warning(
filterlistsDisabled,
"Filter Lists Are Initializing",
"Filter lists are being downloaded and set up in the background. They will be activated as configured when finished.",
)
}

View File

@@ -0,0 +1,86 @@
package filterlists
/*
func TestMain(m *testing.M) {
// we completely ignore netenv events during testing.
ignoreNetEnvEvents.Set()
if err := updates.DisableUpdateSchedule(); err != nil {
fmt.Fprintf(os.Stderr, "failed to disable update schedule: %s", err)
os.Exit(1)
}
pmtesting.TestMainWithHooks(m, module, loadOnStart, nil)
}
func loadOnStart() error {
log.SetLogLevel(log.TraceLevel)
ch := make(chan struct{})
defer close(ch)
if err := updates.TriggerUpdate(); err != nil {
return fmt.Errorf("failed to trigger update: %w", err)
}
var err error
go func() {
select {
case <-ch:
return
case <-time.After(time.Minute):
err = fmt.Errorf("timeout loading")
close(filterListsLoaded) // let waitUntilLoaded() return
}
}()
waitUntilLoaded()
time.Sleep(time.Second * 10)
if err != nil {
return err
}
failureStatus, failureID, failureMsg := module.FailureStatus()
if failureStatus == modules.FailureError || failureStatus == modules.FailureWarning {
return fmt.Errorf("module in failure state: %s %q", failureID, failureMsg)
}
// ignore update events from now on during testing.
ignoreUpdateEvents.Set()
testSources := []string{"TEST"}
testEntries := []*listEntry{
{
Entity: "example.com",
Sources: testSources,
Type: "Domain",
},
{
Entity: "1.1.1.1",
Sources: testSources,
Type: "IPv4",
},
{
Entity: "AT",
Sources: testSources,
Type: "Country",
},
{
Entity: "123",
Sources: testSources,
Type: "ASN",
},
}
for _, e := range testEntries {
// add some test entries
if err := processEntry(e); err != nil {
return err
}
}
return nil
}
*/

View File

@@ -0,0 +1,40 @@
package filterlists
import (
"fmt"
"sync"
"github.com/safing/portbase/database/record"
)
type entityRecord struct {
record.Base `json:"-"`
sync.Mutex `json:"-"`
Value string
Sources []string
Type string
UpdatedAt int64
}
func getEntityRecordByKey(key string) (*entityRecord, error) {
r, err := cache.Get(key)
if err != nil {
return nil, err
}
if r.IsWrapped() {
newER := &entityRecord{}
if err := record.Unwrap(r, newER); err != nil {
return nil, err
}
return newER, nil
}
newER, ok := r.(*entityRecord)
if !ok {
return nil, fmt.Errorf("record not of type *entityRecord, but %T", r)
}
return newER, nil
}

View File

@@ -0,0 +1,270 @@
package filterlists
import (
"context"
"errors"
"fmt"
"sort"
"time"
"github.com/hashicorp/go-version"
"github.com/tevino/abool"
"github.com/safing/portbase/database"
"github.com/safing/portbase/database/query"
"github.com/safing/portbase/log"
"github.com/safing/portbase/modules"
"github.com/safing/portbase/updater"
)
var updateInProgress = abool.New()
// tryListUpdate wraps performUpdate but ensures the module's
// error state is correctly set or resolved.
func tryListUpdate(ctx context.Context) error {
err := performUpdate(ctx)
if err != nil {
// Check if we are shutting down.
if module.IsStopping() {
return nil
}
// Check if the module already has a failure status set. If not, set a
// generic one with the returned error.
failureStatus, _, _ := module.FailureStatus()
if failureStatus < modules.FailureWarning {
module.Warning(
filterlistsUpdateFailed,
"Filter Lists Update Failed",
fmt.Sprintf("The Portmaster failed to process a filter lists update. Filtering capabilities are currently either impaired or not available at all. Error: %s", err.Error()),
)
}
return err
}
return nil
}
func performUpdate(ctx context.Context) error {
if !updateInProgress.SetToIf(false, true) {
log.Debugf("intel/filterlists: upgrade already in progress")
return nil
}
defer updateInProgress.UnSet()
// First, update the list index.
err := updateListIndex()
if err != nil {
log.Errorf("intel/filterlists: failed update list index: %s", err)
}
upgradables, err := getUpgradableFiles()
if err != nil {
return err
}
log.Debugf("intel/filterlists: resources to update: %v", upgradables)
if len(upgradables) == 0 {
log.Debugf("intel/filterlists: ignoring update, latest version is already used")
return nil
}
cleanupRequired := false
filterToUpdate := defaultFilter
// perform the actual upgrade by processing each file
// in the returned order.
for idx, file := range upgradables {
log.Debugf("intel/filterlists: applying update (%d) %s version %s", idx, file.Identifier(), file.Version())
if file == baseFile {
if idx != 0 {
log.Warningf("intel/filterlists: upgrade order is wrong, base file needs to be updated first not at idx %d", idx)
// we still continue because after processing the base
// file everything is correct again, we just used some
// CPU and IO resources for nothing when processing
// the previous files.
}
cleanupRequired = true
// since we are processing a base update we will create our
// bloom filters from scratch.
filterToUpdate = newScopedBloom()
}
if err := processListFile(ctx, filterToUpdate, file); err != nil {
return fmt.Errorf("failed to process upgrade %s: %w", file.Identifier(), err)
}
}
if filterToUpdate != defaultFilter {
// replace the bloom filters in our default
// filter.
defaultFilter.replaceWith(filterToUpdate)
}
// from now on, the database is ready and can be used if
// it wasn't loaded yet.
if !isLoaded() {
close(filterListsLoaded)
}
if err := defaultFilter.saveToCache(); err != nil {
// just handle the error by logging as it's only consequence
// is that we will need to reprocess all files during the next
// start.
log.Errorf("intel/filterlists: failed to persist bloom filters in cache database: %s", err)
}
// if we processed the base file we need to perform
// some cleanup on filterlists entities that have not
// been updated now. Once we are done, start a worker
// for that purpose.
if cleanupRequired {
if err := module.RunWorker("filterlists:cleanup", removeAllObsoleteFilterEntries); err != nil {
// if we failed to remove all stale cache entries
// we abort now WITHOUT updating the database version. This means
// we'll try again during the next update.
module.Warning(
filterlistsStaleDataSurvived,
"Filter Lists May Overblock",
fmt.Sprintf("The Portmaster failed to delete outdated filter list data. Filtering capabilities are fully available, but overblocking may occur. Error: %s", err.Error()), //nolint:misspell // overblocking != overclocking
)
return fmt.Errorf("failed to cleanup stale cache records: %w", err)
}
}
// try to save the highest version of our files.
highestVersion := upgradables[len(upgradables)-1]
if err := setCacheDatabaseVersion(highestVersion.Version()); err != nil {
log.Errorf("intel/filterlists: failed to save cache database version: %s", err)
} else {
log.Infof("intel/filterlists: successfully migrated cache database to %s", highestVersion.Version())
}
// The list update succeeded, resolve any states.
module.Resolve("")
return nil
}
func removeAllObsoleteFilterEntries(ctx context.Context) error {
log.Debugf("intel/filterlists: cleanup task started, removing obsolete filter list entries ...")
n, err := cache.Purge(ctx, query.New(filterListKeyPrefix).Where(
// TODO(ppacher): remember the timestamp we started the last update
// and use that rather than "one hour ago"
query.Where("UpdatedAt", query.LessThan, time.Now().Add(-time.Hour).Unix()),
))
if err != nil {
return err
}
log.Debugf("intel/filterlists: successfully removed %d obsolete entries", n)
return nil
}
// getUpgradableFiles returns a slice of filterlists files
// that should be updated. The files MUST be updated and
// processed in the returned order!
func getUpgradableFiles() ([]*updater.File, error) {
var updateOrder []*updater.File
cacheDBInUse := isLoaded()
if baseFile == nil || baseFile.UpgradeAvailable() || !cacheDBInUse {
var err error
baseFile, err = getFile(baseListFilePath)
if err != nil {
return nil, err
}
log.Tracef("intel/filterlists: base file needs update, selected version %s", baseFile.Version())
updateOrder = append(updateOrder, baseFile)
}
if intermediateFile == nil || intermediateFile.UpgradeAvailable() || !cacheDBInUse {
var err error
intermediateFile, err = getFile(intermediateListFilePath)
if err != nil && !errors.Is(err, updater.ErrNotFound) {
return nil, err
}
if err == nil {
log.Tracef("intel/filterlists: intermediate file needs update, selected version %s", intermediateFile.Version())
updateOrder = append(updateOrder, intermediateFile)
}
}
if urgentFile == nil || urgentFile.UpgradeAvailable() || !cacheDBInUse {
var err error
urgentFile, err = getFile(urgentListFilePath)
if err != nil && !errors.Is(err, updater.ErrNotFound) {
return nil, err
}
if err == nil {
log.Tracef("intel/filterlists: urgent file needs update, selected version %s", urgentFile.Version())
updateOrder = append(updateOrder, urgentFile)
}
}
return resolveUpdateOrder(updateOrder)
}
func resolveUpdateOrder(updateOrder []*updater.File) ([]*updater.File, error) {
// sort the update order by ascending version
sort.Sort(byAscVersion(updateOrder))
log.Tracef("intel/filterlists: order of updates: %v", updateOrder)
var cacheDBVersion *version.Version
if !isLoaded() {
cacheDBVersion, _ = version.NewSemver("v0.0.0")
} else {
var err error
cacheDBVersion, err = getCacheDatabaseVersion()
if err != nil {
if !errors.Is(err, database.ErrNotFound) {
log.Errorf("intel/filterlists: failed to get cache database version: %s", err)
}
cacheDBVersion, _ = version.NewSemver("v0.0.0")
}
}
startAtIdx := -1
for idx, file := range updateOrder {
ver, _ := version.NewSemver(file.Version())
log.Tracef("intel/filterlists: checking file with version %s against %s", ver, cacheDBVersion)
if ver.GreaterThan(cacheDBVersion) && (startAtIdx == -1 || file == baseFile) {
startAtIdx = idx
}
}
// if startAtIdx == -1 we don't have any upgradables to
// process.
if startAtIdx == -1 {
log.Tracef("intel/filterlists: nothing to process, latest version %s already in use", cacheDBVersion)
return nil, nil
}
// skip any files that are lower then the current cache db version
// or after which a base upgrade would be performed.
return updateOrder[startAtIdx:], nil
}
type byAscVersion []*updater.File
func (fs byAscVersion) Len() int { return len(fs) }
func (fs byAscVersion) Less(i, j int) bool {
vi, _ := version.NewSemver(fs[i].Version())
vj, _ := version.NewSemver(fs[j].Version())
return vi.LessThan(vj)
}
func (fs byAscVersion) Swap(i, j int) {
fi := fs[i]
fj := fs[j]
fs[i] = fj
fs[j] = fi
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,54 @@
package geoip
import (
"strings"
"testing"
)
func TestCountryInfo(t *testing.T) {
t.Parallel()
for key, country := range countries {
if key != country.Code {
t.Errorf("%s has a wrong country code of %q", key, country.Code)
}
if country.Name == "" {
t.Errorf("%s is missing name", key)
}
if country.Continent.Code == "" {
t.Errorf("%s is missing continent", key)
}
if country.Continent.Region == "" {
t.Errorf("%s is missing continent region", key)
}
if country.Continent.Name == "" {
t.Errorf("%s is missing continent name", key)
}
generatedContinentCode, _, _ := strings.Cut(country.Continent.Region, "-")
if country.Continent.Code != generatedContinentCode {
t.Errorf("%s is has wrong continent code or region", key)
}
if country.Center.Latitude == 0 && country.Center.Longitude == 0 {
t.Errorf("%s is missing coords", key)
}
if country.Center.AccuracyRadius == 0 {
t.Errorf("%s is missing accuracy radius", key)
}
// Generate map source from data:
// fmt.Printf(
// `"%s": {Name:%q,Region:%q,ContinentCode:%q,Center:Coordinates{AccuracyRadius:%d,Latitude:%f,Longitude:%f},},`,
// key,
// country.Name,
// country.Region,
// country.ContinentCode,
// country.Center.AccuracyRadius,
// country.Center.Latitude,
// country.Center.Longitude,
// )
// fmt.Println()
}
if len(countries) < 247 {
t.Errorf("dataset only includes %d countries", len(countries))
}
}

View File

@@ -0,0 +1,213 @@
package geoip
import (
"context"
"fmt"
"sync"
"time"
maxminddb "github.com/oschwald/maxminddb-golang"
"github.com/safing/portbase/log"
"github.com/safing/portbase/updater"
"github.com/safing/portmaster/service/updates"
)
var worker *updateWorker
func init() {
worker = &updateWorker{
trigger: make(chan struct{}),
}
}
const (
v4MMDBResource = "intel/geoip/geoipv4.mmdb.gz"
v6MMDBResource = "intel/geoip/geoipv6.mmdb.gz"
)
type geoIPDB struct {
*maxminddb.Reader
file *updater.File
}
// updateBroadcaster stores a geoIPDB and provides synchronized
// access to the MMDB reader. It also supports broadcasting to
// multiple waiters when a new database becomes available.
type updateBroadcaster struct {
rw sync.RWMutex
db *geoIPDB
waiter chan struct{}
}
// NeedsUpdate returns true if the current broadcaster needs a
// database update.
func (ub *updateBroadcaster) NeedsUpdate() bool {
ub.rw.RLock()
defer ub.rw.RUnlock()
return ub.db == nil || ub.db.file.UpgradeAvailable()
}
// ReplaceDatabase replaces (or initially sets) the mmdb database.
// It also notifies all waiters about the availability of the new
// database.
func (ub *updateBroadcaster) ReplaceDatabase(db *geoIPDB) {
ub.rw.Lock()
defer ub.rw.Unlock()
if ub.db != nil {
_ = ub.db.Close()
}
ub.db = db
ub.notifyWaiters()
}
// notifyWaiters notifies and removes all waiters. Must be called
// with ub.rw locked.
func (ub *updateBroadcaster) notifyWaiters() {
if ub.waiter == nil {
return
}
waiter := ub.waiter
ub.waiter = nil
close(waiter)
}
// getWaiter appends and returns a new waiter channel that gets closed
// when a new database version is available. Must be called with
// ub.rw locked.
func (ub *updateBroadcaster) getWaiter() chan struct{} {
if ub.waiter != nil {
return ub.waiter
}
ub.waiter = make(chan struct{})
return ub.waiter
}
type updateWorker struct {
trigger chan struct{}
once sync.Once
v4 updateBroadcaster
v6 updateBroadcaster
}
// GetReader returns a MMDB reader for either the IPv4 or the IPv6 database.
// If wait is true GetReader will wait at most 1 second for the database to
// become available. If no database is available or GetReader times-out while
// waiting nil is returned.
func (upd *updateWorker) GetReader(v6 bool, wait bool) *maxminddb.Reader {
// check which updateBroadcaster we need to use
ub := &upd.v4
if v6 {
ub = &upd.v6
}
// lock the updateBroadcaster and - if we are allowed to wait -
// create a new waiter channel, trigger an update and wait for at
// least 1 second for the update to complete.
ub.rw.Lock()
if ub.db == nil {
if wait {
waiter := ub.getWaiter()
ub.rw.Unlock()
upd.triggerUpdate()
select {
case <-waiter:
// call this method again but this time we don't allow
// it to wait since there must be a open database anyway ...
return upd.GetReader(v6, false)
case <-time.After(time.Second):
// we tried hard but failed so give up here
return nil
}
}
ub.rw.Unlock()
return nil
}
rd := ub.db.Reader
ub.rw.Unlock()
return rd
}
// triggerUpdate triggers a database update check.
func (upd *updateWorker) triggerUpdate() {
upd.start()
select {
case upd.trigger <- struct{}{}:
default:
}
}
func (upd *updateWorker) start() {
upd.once.Do(func() {
module.StartServiceWorker("geoip-updater", time.Second*10, upd.run)
})
}
func (upd *updateWorker) run(ctx context.Context) error {
for {
if upd.v4.NeedsUpdate() {
if v4, err := getGeoIPDB(v4MMDBResource); err == nil {
upd.v4.ReplaceDatabase(v4)
} else {
log.Warningf("geoip: failed to get v4 database: %s", err)
}
}
if upd.v6.NeedsUpdate() {
if v6, err := getGeoIPDB(v6MMDBResource); err == nil {
upd.v6.ReplaceDatabase(v6)
} else {
log.Warningf("geoip: failed to get v6 database: %s", err)
}
}
select {
case <-ctx.Done():
return nil
case <-upd.trigger:
}
}
}
func getGeoIPDB(resource string) (*geoIPDB, error) {
log.Debugf("geoip: opening database %s", resource)
file, unpackedPath, err := openAndUnpack(resource)
if err != nil {
return nil, err
}
reader, err := maxminddb.Open(unpackedPath)
if err != nil {
return nil, fmt.Errorf("failed to open: %w", err)
}
log.Debugf("geoip: successfully opened database %s", resource)
return &geoIPDB{
Reader: reader,
file: file,
}, nil
}
func openAndUnpack(resource string) (*updater.File, string, error) {
f, err := updates.GetFile(resource)
if err != nil {
return nil, "", fmt.Errorf("getting file: %w", err)
}
unpacked, err := f.Unpack(".gz", updater.UnpackGZIP)
if err != nil {
return nil, "", fmt.Errorf("unpacking file: %w", err)
}
return f, unpacked, nil
}

View File

@@ -0,0 +1,212 @@
package geoip
import (
"encoding/binary"
"net"
"strings"
"github.com/umahmood/haversine"
"github.com/safing/portbase/utils"
)
const (
earthCircumferenceInKm = 40100 // earth circumference in km
defaultLocationAccuracy = 100
)
// Location holds information regarding the geographical and network location of an IP address.
// TODO: We are currently re-using the Continent-Code for the region. Update this and all dependencies.
type Location struct {
Country CountryInfo `maxminddb:"country"`
Coordinates Coordinates `maxminddb:"location"`
AutonomousSystemNumber uint `maxminddb:"autonomous_system_number"`
AutonomousSystemOrganization string `maxminddb:"autonomous_system_organization"`
IsAnycast bool `maxminddb:"is_anycast"`
IsSatelliteProvider bool `maxminddb:"is_satellite_provider"`
IsAnonymousProxy bool `maxminddb:"is_anonymous_proxy"`
}
// Coordinates holds geographic coordinates and their estimated accuracy.
type Coordinates struct {
AccuracyRadius uint16 `maxminddb:"accuracy_radius"`
Latitude float64 `maxminddb:"latitude"`
Longitude float64 `maxminddb:"longitude"`
}
/*
Location Estimation
Distance Value
- 0: Other side of the Internet.
- 100: Very near, up to same network / datacenter.
Weighting Goal
- Exposure to different networks shall be limited as much as possible.
- A single network should not see a connection over a large distance.
- Latency should be low.
Weighting Intentions
- Being on the same continent is better than being in the same AS.
- Being in the same country is better than having low coordinate distance.
- Coordinate distance is only a tie breaker, as accuracy varies heavily.
- Same AS with lower coordinate distance beats being on the same continent.
Weighting Configuration
*/
const (
weightCountryMatch = 10
weightRegionMatch = 10
weightRegionalNeighborMatch = 10
weightASNMatch = 10
weightASOrgMatch = 10
weightCoordinateDistance = 50
)
/*
About the Accuracy Radius
- Range: 1-1000
- Seen values (estimation): 1,5,10,20,50,100,200,500,1000
- The default seems to be 100.
Cxamples
- 1.1.1/24 has 1000: Anycast
- 8.8.0/19 has 1000: Anycast
- 8.8.52/22 has 1: City of Westfield
Conclusion
- Ignore or penalize high accuracy radius.
*/
// EstimateNetworkProximity aims to calculate the distance between two network locations. Returns a proximity value between 0 (far away) and 100 (nearby).
func (l *Location) EstimateNetworkProximity(to *Location) (proximity float32) {
switch {
case l.Country.Code != "" && l.Country.Code == to.Country.Code:
proximity += weightCountryMatch + weightRegionMatch + weightRegionalNeighborMatch
case l.Country.Continent.Region != "" && l.Country.Continent.Region == to.Country.Continent.Region:
proximity += weightRegionMatch + weightRegionalNeighborMatch
case l.IsRegionalNeighbor(to):
proximity += weightRegionalNeighborMatch
}
switch {
case l.AutonomousSystemNumber == to.AutonomousSystemNumber &&
l.AutonomousSystemNumber != 0:
// Rely more on the ASN data, as it is more accurate than the ASOrg data,
// especially when combining location data from multiple sources.
proximity += weightASNMatch + weightASOrgMatch
case l.AutonomousSystemOrganization == to.AutonomousSystemOrganization &&
l.AutonomousSystemNumber != 0 && // Check if an ASN is set. If the ASOrg is known, the ASN must be too.
!ASOrgUnknown(l.AutonomousSystemOrganization): // Check if the ASOrg name is valid.
proximity += weightASOrgMatch
}
// Check coordinates and adjust accuracy value.
accuracy := l.Coordinates.AccuracyRadius
switch {
case l.Coordinates.Latitude == 0 && l.Coordinates.Longitude == 0:
fallthrough
case to.Coordinates.Latitude == 0 && to.Coordinates.Longitude == 0:
// If we don't have any coordinates, return.
return proximity
case to.Coordinates.AccuracyRadius > accuracy:
// If the destination accuracy is worse, use that one.
accuracy = to.Coordinates.AccuracyRadius
}
// Apply the default location accuracy if there is none.
if accuracy == 0 {
accuracy = defaultLocationAccuracy
}
// Calculate coordinate distance in kilometers.
fromCoords := haversine.Coord{Lat: l.Coordinates.Latitude, Lon: l.Coordinates.Longitude}
toCoords := haversine.Coord{Lat: to.Coordinates.Latitude, Lon: to.Coordinates.Longitude}
_, km := haversine.Distance(fromCoords, toCoords)
if km <= 100 && accuracy <= 100 {
// Give the full value for highly accurate coordinates within 100km.
proximity += weightCoordinateDistance
} else {
// Else, take a percentage.
proximityInPercent := (earthCircumferenceInKm - km) / earthCircumferenceInKm
// Apply penalty for locations with low accuracy (targeting accuracy radius >100).
// Take away at most 50% of the weight through inaccuracy.
accuracyModifier := 1 - float64(accuracy)/2000
// Add proximiy weight.
proximity += float32(
weightCoordinateDistance * // Maxmimum weight for this data point.
proximityInPercent * // Range: 0-1
accuracyModifier, // Range: 0.5-1
)
}
return proximity
}
// PrimitiveNetworkProximity calculates the numerical distance between two IP addresses. Returns a proximity value between 0 (far away) and 100 (nearby).
func PrimitiveNetworkProximity(from net.IP, to net.IP, ipVersion uint8) int {
var diff float64
switch ipVersion {
case 4:
// TODO: use ip.To4() and :4
a := binary.BigEndian.Uint32(from[12:])
b := binary.BigEndian.Uint32(to[12:])
if a > b {
diff = float64(a - b)
} else {
diff = float64(b - a)
}
case 6:
a := binary.BigEndian.Uint64(from[:8])
b := binary.BigEndian.Uint64(to[:8])
if a > b {
diff = float64(a - b)
} else {
diff = float64(b - a)
}
default:
return 0
}
switch ipVersion {
case 4:
diff /= 256
return int((1 - diff/16777216) * 100)
case 6:
return int((1 - diff/18446744073709552000) * 100)
default:
return 0
}
}
var unknownASOrgNames = []string{
"", // Expected default for unknown.
"not routed", // Observed as "Not routed" in data set.
"unknown", // Observed as "UNKNOWN" in online data set.
"nil", // Programmatic unknown value.
"null", // Programmatic unknown value.
"undef", // Programmatic unknown value.
"undefined", // Programmatic unknown value.
}
// ASOrgUnknown return whether the given AS Org string actually is meant to
// mean that the AS Org is unknown.
func ASOrgUnknown(asOrg string) bool {
return utils.StringInSlice(
unknownASOrgNames,
strings.ToLower(asOrg),
)
}

View File

@@ -0,0 +1,42 @@
package geoip
import (
"net"
"testing"
)
func TestPrimitiveNetworkProximity(t *testing.T) {
t.Parallel()
ip4_1 := net.ParseIP("1.1.1.1")
ip4_2 := net.ParseIP("1.1.1.2")
ip4_3 := net.ParseIP("255.255.255.0")
dist := PrimitiveNetworkProximity(ip4_1, ip4_2, 4)
t.Logf("primitive proximity %s <> %s: %d", ip4_1, ip4_2, dist)
if dist < 90 {
t.Fatalf("unexpected distance between ip4_1 and ip4_2: %d", dist)
}
dist = PrimitiveNetworkProximity(ip4_1, ip4_3, 4)
t.Logf("primitive proximity %s <> %s: %d", ip4_1, ip4_3, dist)
if dist > 10 {
t.Fatalf("unexpected distance between ip4_1 and ip4_3: %d", dist)
}
ip6_1 := net.ParseIP("2a02::1")
ip6_2 := net.ParseIP("2a02::2")
ip6_3 := net.ParseIP("ffff::1")
dist = PrimitiveNetworkProximity(ip6_1, ip6_2, 6)
t.Logf("primitive proximity %s <> %s: %d", ip6_1, ip6_2, dist)
if dist < 90 {
t.Fatalf("unexpected distance between ip6_1 and ip6_2: %d", dist)
}
dist = PrimitiveNetworkProximity(ip6_1, ip6_3, 6)
t.Logf("primitive proximity %s <> %s: %d", ip6_1, ip6_3, dist)
if dist > 20 {
t.Fatalf("unexpected distance between ip6_1 and ip6_3: %d", dist)
}
}

View File

@@ -0,0 +1,33 @@
package geoip
import (
"fmt"
"net"
"github.com/oschwald/maxminddb-golang"
)
func getReader(ip net.IP) *maxminddb.Reader {
isV6 := ip.To4() == nil
return worker.GetReader(isV6, true)
}
// GetLocation returns Location data of an IP address.
func GetLocation(ip net.IP) (*Location, error) {
db := getReader(ip)
if db == nil {
return nil, fmt.Errorf("geoip database not available")
}
record := &Location{}
if err := db.Lookup(ip, record); err != nil {
return nil, err
}
record.AddCountryInfo()
return record, nil
}
// IsInitialized returns whether the geoip database has been initialized.
func IsInitialized(v6, wait bool) bool {
return worker.GetReader(v6, wait) != nil
}

View File

@@ -0,0 +1,66 @@
package geoip
import (
"net"
"testing"
)
func TestLocationLookup(t *testing.T) {
// Skip in CI.
if testing.Short() {
t.Skip()
}
t.Parallel()
ip1 := net.ParseIP("81.2.69.142")
loc1, err := GetLocation(ip1)
if err != nil {
t.Fatal(err)
}
t.Logf("%v", loc1)
ip2 := net.ParseIP("1.1.1.1")
loc2, err := GetLocation(ip2)
if err != nil {
t.Fatal(err)
}
t.Logf("%v", loc2)
ip3 := net.ParseIP("8.8.8.8")
loc3, err := GetLocation(ip3)
if err != nil {
t.Fatal(err)
}
t.Logf("%v", loc3)
ip4 := net.ParseIP("81.2.70.142")
loc4, err := GetLocation(ip4)
if err != nil {
t.Fatal(err)
}
t.Logf("%v", loc4)
ip5 := net.ParseIP("194.232.1.1")
loc5, err := GetLocation(ip5)
if err != nil {
t.Fatal(err)
}
t.Logf("%v", loc5)
ip6 := net.ParseIP("151.101.1.164")
loc6, err := GetLocation(ip6)
if err != nil {
t.Fatal(err)
}
t.Logf("%v", loc6)
dist1 := loc1.EstimateNetworkProximity(loc2)
dist2 := loc2.EstimateNetworkProximity(loc3)
dist3 := loc1.EstimateNetworkProximity(loc3)
dist4 := loc1.EstimateNetworkProximity(loc4)
t.Logf("proximity %s <> %s: %.2f", ip1, ip2, dist1)
t.Logf("proximity %s <> %s: %.2f", ip2, ip3, dist2)
t.Logf("proximity %s <> %s: %.2f", ip1, ip3, dist3)
t.Logf("proximity %s <> %s: %.2f", ip1, ip4, dist4)
}

View File

@@ -0,0 +1,40 @@
package geoip
import (
"context"
"github.com/safing/portbase/api"
"github.com/safing/portbase/modules"
"github.com/safing/portmaster/service/updates"
)
var module *modules.Module
func init() {
module = modules.Register("geoip", prep, nil, nil, "base", "updates")
}
func prep() error {
if err := api.RegisterEndpoint(api.Endpoint{
Path: "intel/geoip/countries",
Read: api.PermitUser,
// Do not attach to module, as the data is always available anyway.
StructFunc: func(ar *api.Request) (i interface{}, err error) {
return countries, nil
},
Name: "Get Country Information",
Description: "Returns a map of country information centers indexed by ISO-A2 country code",
}); err != nil {
return err
}
return module.RegisterEventHook(
updates.ModuleName,
updates.ResourceUpdateEvent,
"Check for GeoIP database updates",
func(c context.Context, i interface{}) error {
worker.triggerUpdate()
return nil
},
)
}

View File

@@ -0,0 +1,11 @@
package geoip
import (
"testing"
"github.com/safing/portmaster/service/core/pmtesting"
)
func TestMain(m *testing.M) {
pmtesting.TestMain(m, module)
}

View File

@@ -0,0 +1,245 @@
package geoip
import (
"github.com/safing/portbase/utils"
)
// IsRegionalNeighbor returns whether the supplied location is a regional neighbor.
func (l *Location) IsRegionalNeighbor(other *Location) bool {
if l.Country.Continent.Region == "" || other.Country.Continent.Region == "" {
return false
}
if region, ok := regions[l.Country.Continent.Region]; ok {
return utils.StringInSlice(region.Neighbors, other.Country.Continent.Region)
}
return false
}
// Region defines a geographic region and neighboring regions.
type Region struct {
ID string
Name string
Neighbors []string
}
var regions = map[string]*Region{
"AF-C": {
ID: "AF-C",
Name: "Africa, Sub-Saharan Africa, Middle Africa",
Neighbors: []string{
"AF-E",
"AF-N",
"AF-S",
"AF-W",
},
},
"AF-E": {
ID: "AF-E",
Name: "Africa, Sub-Saharan Africa, Eastern Africa",
Neighbors: []string{
"AF-C",
"AF-N",
"AF-S",
},
},
"AF-N": {
ID: "AF-N",
Name: "Africa, Northern Africa",
Neighbors: []string{
"AF-C",
"AF-E",
"AF-W",
"AS-W",
"EU-S",
},
},
"AF-S": {
ID: "AF-S",
Name: "Africa, Sub-Saharan Africa, Southern Africa",
Neighbors: []string{
"AF-C",
"AF-E",
"AF-W",
},
},
"AF-W": {
ID: "AF-W",
Name: "Africa, Sub-Saharan Africa, Western Africa",
Neighbors: []string{
"AF-C",
"AF-N",
"AF-S",
},
},
"AN": {
ID: "AN",
Name: "Antarctica",
Neighbors: []string{},
},
"AS-C": {
ID: "AS-C",
Name: "Asia, Central Asia",
Neighbors: []string{
"AS-E",
"AS-S",
"AS-SE",
"AS-W",
},
},
"AS-E": {
ID: "AS-E",
Name: "Asia, Eastern Asia",
Neighbors: []string{
"AS-C",
"AS-S",
"AS-SE",
},
},
"AS-S": {
ID: "AS-S",
Name: "Asia, Southern Asia",
Neighbors: []string{
"AS-C",
"AS-E",
"AS-SE",
"AS-W",
},
},
"AS-SE": {
ID: "AS-SE",
Name: "Asia, South-eastern Asia",
Neighbors: []string{
"AS-C",
"AS-E",
"AS-S",
"OC-C",
"OC-E",
"OC-N",
"OC-S",
},
},
"AS-W": {
ID: "AS-W",
Name: "Asia, Western Asia",
Neighbors: []string{
"AF-N",
"AS-C",
"AS-S",
"EU-E",
},
},
"EU-E": {
ID: "EU-E",
Name: "Europe, Eastern Europe",
Neighbors: []string{
"AS-W",
"EU-N",
"EU-S",
"EU-W",
},
},
"EU-N": {
ID: "EU-N",
Name: "Europe, Northern Europe",
Neighbors: []string{
"EU-E",
"EU-S",
"EU-W",
},
},
"EU-S": {
ID: "EU-S",
Name: "Europe, Southern Europe",
Neighbors: []string{
"AF-N",
"EU-E",
"EU-N",
"EU-W",
},
},
"EU-W": {
ID: "EU-W",
Name: "Europe, Western Europe",
Neighbors: []string{
"EU-E",
"EU-N",
"EU-S",
},
},
"NA-E": {
ID: "NA-E",
Name: "North America, Caribbean",
Neighbors: []string{
"NA-N",
"NA-S",
"SA",
},
},
"NA-N": {
ID: "NA-N",
Name: "North America, Northern America",
Neighbors: []string{
"NA-E",
"NA-N",
"NA-S",
},
},
"NA-S": {
ID: "NA-S",
Name: "North America, Central America",
Neighbors: []string{
"NA-E",
"NA-N",
"NA-S",
"SA",
},
},
"OC-C": {
ID: "OC-C",
Name: "Oceania, Melanesia",
Neighbors: []string{
"AS-SE",
"OC-E",
"OC-N",
"OC-S",
},
},
"OC-E": {
ID: "OC-E",
Name: "Oceania, Polynesia",
Neighbors: []string{
"AS-SE",
"OC-C",
"OC-N",
"OC-S",
},
},
"OC-N": {
ID: "OC-N",
Name: "Oceania, Micronesia",
Neighbors: []string{
"AS-SE",
"OC-C",
"OC-E",
"OC-S",
},
},
"OC-S": {
ID: "OC-S",
Name: "Oceania, Australia and New Zealand",
Neighbors: []string{
"AS-SE",
"OC-C",
"OC-E",
"OC-N",
},
},
"SA": { // TODO: Split up
ID: "SA",
Name: "South America",
Neighbors: []string{
"NA-E",
"NA-S",
},
},
}

View File

@@ -0,0 +1,27 @@
package geoip
import (
"testing"
"github.com/safing/portbase/utils"
)
func TestRegions(t *testing.T) {
t.Parallel()
// Check if all neighbors are also linked back.
for key, region := range regions {
if key != region.ID {
t.Errorf("region has different key than ID: %s != %s", key, region.ID)
}
for _, neighborID := range region.Neighbors {
if otherRegion, ok := regions[neighborID]; ok {
if !utils.StringInSlice(otherRegion.Neighbors, region.ID) {
t.Errorf("region %s has neighbor %s, but is not linked back", region.ID, neighborID)
}
} else {
t.Errorf("region %s does not exist", neighborID)
}
}
}
}

13
service/intel/module.go Normal file
View File

@@ -0,0 +1,13 @@
package intel
import (
"github.com/safing/portbase/modules"
_ "github.com/safing/portmaster/service/intel/customlists"
)
// Module of this package. Export needed for testing of the endpoints package.
var Module *modules.Module
func init() {
Module = modules.Register("intel", nil, nil, nil, "geoip", "filterlists", "customlists")
}

14
service/intel/resolver.go Normal file
View File

@@ -0,0 +1,14 @@
package intel
import (
"context"
)
var reverseResolver func(ctx context.Context, ip string) (domain string, err error)
// SetReverseResolver allows the resolver module to register a function to allow reverse resolving IPs to domains.
func SetReverseResolver(fn func(ctx context.Context, ip string) (domain string, err error)) {
if reverseResolver == nil {
reverseResolver = fn
}
}