From 89bad684cf34cb69b70f046bd003605a8e822fd5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 24 Apr 2023 13:13:24 +0200 Subject: [PATCH] Suggest using stale DNS cache when queries are slow --- resolver/config.go | 2 +- resolver/main.go | 2 + resolver/metrics.go | 105 +++++++++++++++++++++++++++++++++++++ resolver/resolver-https.go | 12 +++++ resolver/resolver-plain.go | 8 ++- resolver/resolver-tcp.go | 5 ++ 6 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 resolver/metrics.go diff --git a/resolver/config.go b/resolver/config.go index a42eefd9..275ed021 100644 --- a/resolver/config.go +++ b/resolver/config.go @@ -208,7 +208,7 @@ The format is: "protocol://ip:port?parameter=value¶meter=value" err = config.Register(&config.Option{ Name: "Always Use DNS Cache", Key: CfgOptionUseStaleCacheKey, - Description: "Always use stale entries from the DNS cache and refresh expired entries afterwards. This can improve DNS resolving performance a lot, but may lead to occasional connection errors due to the outdated DNS records.", + Description: "Always use the DNS cache, even if entries have expired. Expired entries are refreshed afterwards in the background. This can improve DNS resolving performance a lot, but may lead to occasional connection errors due to outdated DNS records.", OptType: config.OptTypeBool, ExpertiseLevel: config.ExpertiseLevelUser, ReleaseLevel: config.ReleaseLevelStable, diff --git a/resolver/main.go b/resolver/main.go index 9170e6e3..6b2d5fa7 100644 --- a/resolver/main.go +++ b/resolver/main.go @@ -96,6 +96,8 @@ func start() error { return err } + module.NewTask("suggest using stale cache", suggestUsingStaleCacheTask).Repeat(2 * time.Minute) + module.StartServiceWorker( "mdns handler", 5*time.Second, diff --git a/resolver/metrics.go b/resolver/metrics.go new file mode 100644 index 00000000..6d59a289 --- /dev/null +++ b/resolver/metrics.go @@ -0,0 +1,105 @@ +package resolver + +import ( + "context" + "sync/atomic" + "time" + + "github.com/safing/portbase/log" + "github.com/safing/portbase/modules" + "github.com/safing/portbase/notifications" +) + +var ( + slowQueriesSensorCnt atomic.Int64 + slowQueriesSensorSum atomic.Int64 +) + +// reportRequestDuration reports successful query request duration. +func reportRequestDuration(started time.Time, resolver *Resolver) { + // TODO: Record prometheus metrics for all resolvers separately. + + // Add query times from system and configured resolvers to slow queries sensor. + switch resolver.Info.Source { + case ServerSourceConfigured, ServerSourceOperatingSystem: + slowQueriesSensorCnt.Add(1) + slowQueriesSensorSum.Add(int64(time.Since(started))) + default: + } +} + +// getSlowQueriesSensorValue returns the current avg query time recorded by the +// slow queries sensor. +func getSlowQueriesSensorValue() (avgQueryTime time.Duration) { + return time.Duration( + slowQueriesSensorSum.Load() / slowQueriesSensorCnt.Load(), + ) +} + +// getAndResetSlowQueriesSensorValue returns the current avg query time +// recorded by the slow queries sensor and reset the sensor values. + +// resetSlowQueriesSensorValue reset the slow queries sensor values. +func resetSlowQueriesSensorValue() { + slowQueriesSensorCnt.Store(0) + slowQueriesSensorSum.Store(0) +} + +var suggestUsingStaleCacheNotification *notifications.Notification + +func suggestUsingStaleCacheTask(ctx context.Context, t *modules.Task) error { + switch { + case useStaleCache(): + // If setting is already active, disable task repeating. + t.Repeat(0) + + // Delete local reference, if used. + if suggestUsingStaleCacheNotification != nil { + suggestUsingStaleCacheNotification.Delete() + suggestUsingStaleCacheNotification = nil + } + + case suggestUsingStaleCacheNotification != nil: + // Check if notification is already active. + + suggestUsingStaleCacheNotification.Lock() + defer suggestUsingStaleCacheNotification.Unlock() + if suggestUsingStaleCacheNotification.Meta().IsDeleted() { + // Reset local reference if notification was deleted. + suggestUsingStaleCacheNotification = nil + } + + case getSlowQueriesSensorValue() > 100*time.Millisecond: + log.Warningf( + "resolver: suggesting user to use stale dns cache with avg query time of %s for config and system resolvers", + getSlowQueriesSensorValue().Round(time.Millisecond), + ) + + // Notify user. + suggestUsingStaleCacheNotification = ¬ifications.Notification{ + EventID: "resolver:suggest-using-stale-cache", + Type: notifications.Info, + Title: "Speed Up Website Loading", + Message: "Portmaster has detected that websites may load slower because DNS queries are currently slower than expected. You may want to switch your DNS provider or enable using expired DNS cache entries for better performance.", + ShowOnSystem: getSlowQueriesSensorValue() > 500*time.Millisecond, + Expires: time.Now().Add(10 * time.Minute).Unix(), + AvailableActions: []*notifications.Action{ + { + Text: "Open Setting", + Type: notifications.ActionTypeOpenSetting, + Payload: ¬ifications.ActionTypeOpenSettingPayload{ + Key: CfgOptionUseStaleCacheKey, + }, + }, + { + ID: "ack", + Text: "Got it!", + }, + }, + } + notifications.Notify(suggestUsingStaleCacheNotification) + } + + resetSlowQueriesSensorValue() + return nil +} diff --git a/resolver/resolver-https.go b/resolver/resolver-https.go index ba007eb7..2d40aac0 100644 --- a/resolver/resolver-https.go +++ b/resolver/resolver-https.go @@ -14,6 +14,7 @@ import ( "github.com/miekg/dns" "github.com/safing/portbase/log" + "github.com/safing/portmaster/netenv" ) // HTTPSResolver is a resolver using just a single tcp connection with pipelining. @@ -56,6 +57,8 @@ func NewHTTPSResolver(resolver *Resolver) *HTTPSResolver { // Query executes the given query against the resolver. func (hr *HTTPSResolver) Query(ctx context.Context, q *Query) (*RRCache, error) { + queryStarted := time.Now() + dnsQuery := new(dns.Msg) dnsQuery.SetQuestion(q.FQDN, uint16(q.QType)) @@ -88,6 +91,9 @@ func (hr *HTTPSResolver) Query(ctx context.Context, q *Query) (*RRCache, error) resp, err := hr.client.Do(request) if err != nil { + // Hint network environment at failed connection. + netenv.ReportFailedConnection() + return nil, err } defer func() { @@ -111,6 +117,12 @@ func (hr *HTTPSResolver) Query(ctx context.Context, q *Query) (*RRCache, error) return nil, err } + // Hint network environment at successful connection. + netenv.ReportSuccessfulConnection() + + // Report request duration for metrics. + reportRequestDuration(queryStarted, hr.resolver) + newRecord := &RRCache{ Domain: q.FQDN, Question: q.QType, diff --git a/resolver/resolver-plain.go b/resolver/resolver-plain.go index 992366b4..614f30b2 100644 --- a/resolver/resolver-plain.go +++ b/resolver/resolver-plain.go @@ -37,6 +37,8 @@ func NewPlainResolver(resolver *Resolver) *PlainResolver { // Query executes the given query against the resolver. func (pr *PlainResolver) Query(ctx context.Context, q *Query) (*RRCache, error) { + queryStarted := time.Now() + // create query dnsQuery := new(dns.Msg) dnsQuery.SetQuestion(q.FQDN, uint16(q.QType)) @@ -54,6 +56,7 @@ func (pr *PlainResolver) Query(ctx context.Context, q *Query) (*RRCache, error) // create client dnsClient := &dns.Client{ + UDPSize: 1024, Timeout: timeout, Dialer: &net.Dialer{ Timeout: timeout, @@ -80,9 +83,12 @@ func (pr *PlainResolver) Query(ctx context.Context, q *Query) (*RRCache, error) return nil, &BlockedUpstreamError{pr.resolver.Info.DescriptiveName()} } - // hint network environment at successful connection + // Hint network environment at successful connection. netenv.ReportSuccessfulConnection() + // Report request duration for metrics. + reportRequestDuration(queryStarted, pr.resolver) + newRecord := &RRCache{ Domain: q.FQDN, Question: q.QType, diff --git a/resolver/resolver-tcp.go b/resolver/resolver-tcp.go index f61779b1..4a1c65f8 100644 --- a/resolver/resolver-tcp.go +++ b/resolver/resolver-tcp.go @@ -185,6 +185,8 @@ func (tr *TCPResolver) getOrCreateResolverConn(ctx context.Context) (*tcpResolve // Query executes the given query against the resolver. func (tr *TCPResolver) Query(ctx context.Context, q *Query) (*RRCache, error) { + queryStarted := time.Now() + // Get resolver connection. resolverConn, err := tr.getOrCreateResolverConn(ctx) if err != nil { @@ -232,6 +234,9 @@ func (tr *TCPResolver) Query(ctx context.Context, q *Query) (*RRCache, error) { return nil, &BlockedUpstreamError{tr.resolver.Info.DescriptiveName()} } + // Report request duration for metrics. + reportRequestDuration(queryStarted, tr.resolver) + // Create RRCache from reply and return it. return tq.MakeCacheRecord(reply, tr.resolver.Info), nil }