From 16c756144aadb76307c3508d4286690d78f5830a Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 20 Apr 2023 12:52:42 +0200 Subject: [PATCH] Add config option to always use stale DNS cache entries --- resolver/config.go | 32 ++++++++++++--- resolver/resolve.go | 95 ++++++++++++++++++++++++++++----------------- resolver/rrcache.go | 3 +- 3 files changed, 88 insertions(+), 42 deletions(-) diff --git a/resolver/config.go b/resolver/config.go index 19f299d3..a42eefd9 100644 --- a/resolver/config.go +++ b/resolver/config.go @@ -62,13 +62,17 @@ var ( noAssignedNameservers status.SecurityLevelOptionFunc cfgOptionNoAssignedNameserversOrder = 1 + CfgOptionUseStaleCacheKey = "dns/useStaleCache" + useStaleCache config.BoolOption + cfgOptionUseStaleCacheOrder = 2 + CfgOptionNoMulticastDNSKey = "dns/noMulticastDNS" noMulticastDNS status.SecurityLevelOptionFunc - cfgOptionNoMulticastDNSOrder = 2 + cfgOptionNoMulticastDNSOrder = 3 CfgOptionNoInsecureProtocolsKey = "dns/noInsecureProtocols" noInsecureProtocols status.SecurityLevelOptionFunc - cfgOptionNoInsecureProtocolsOrder = 3 + cfgOptionNoInsecureProtocolsOrder = 4 CfgOptionDontResolveSpecialDomainsKey = "dns/dontResolveSpecialDomains" dontResolveSpecialDomains status.SecurityLevelOptionFunc @@ -161,11 +165,11 @@ The format is: "protocol://ip:port?parameter=value¶meter=value" configuredNameServers = config.Concurrent.GetAsStringArray(CfgOptionNameServersKey, defaultNameServers) err = config.Register(&config.Option{ - Name: "Retry Timeout", + Name: "Ignore Failing DNS Servers Duration", Key: CfgOptionNameserverRetryRateKey, - Description: "Timeout between retries when a DNS server fails.", + Description: "Duration in seconds how long a failing DNS server should not be retried.", OptType: config.OptTypeInt, - ExpertiseLevel: config.ExpertiseLevelExpert, + ExpertiseLevel: config.ExpertiseLevelDeveloper, ReleaseLevel: config.ReleaseLevelStable, DefaultValue: 300, Annotations: config.Annotations{ @@ -201,6 +205,24 @@ The format is: "protocol://ip:port?parameter=value¶meter=value" } noAssignedNameservers = status.SecurityLevelOption(CfgOptionNoAssignedNameserversKey) + err = config.Register(&config.Option{ + Name: "Always Use DNS Cache", + Key: CfgOptionUseStaleCacheKey, + Description: "Always use stale entries from the DNS cache and refresh expired entries afterwards. This can improve DNS resolving performance a lot, but may lead to occasional connection errors due to the outdated DNS records.", + OptType: config.OptTypeBool, + ExpertiseLevel: config.ExpertiseLevelUser, + ReleaseLevel: config.ReleaseLevelStable, + DefaultValue: false, + Annotations: config.Annotations{ + config.DisplayOrderAnnotation: cfgOptionUseStaleCacheOrder, + config.CategoryAnnotation: "Resolving", + }, + }) + if err != nil { + return err + } + useStaleCache = config.Concurrent.GetAsBool(CfgOptionUseStaleCacheKey, false) + err = config.Register(&config.Option{ Name: "Ignore Multicast DNS", Key: CfgOptionNoMulticastDNSKey, diff --git a/resolver/resolve.go b/resolver/resolve.go index e72c3e46..2fae0b95 100644 --- a/resolver/resolve.go +++ b/resolver/resolve.go @@ -179,8 +179,21 @@ func Resolve(ctx context.Context, q *Query) (rrCache *RRCache, err error) { // check the cache if !q.NoCaching { rrCache = checkCache(ctx, q) - if rrCache != nil && !rrCache.Expired() { - return rrCache, nil + if rrCache != nil { + switch { + case !rrCache.Expired(): + // Return non-expired cached entry immediately. + return rrCache, nil + case useStaleCache(): + // Return expired cache if we should use stale cache entries, + // but start an async query instead. + log.Tracer(ctx).Tracef( + "resolver: using stale cache entry that expired %s ago", + time.Since(time.Unix(rrCache.Expires, 0)).Round(time.Second), + ) + startAsyncQuery(ctx, q, rrCache) + return rrCache, nil + } } // dedupe! @@ -188,7 +201,9 @@ func Resolve(ctx context.Context, q *Query) (rrCache *RRCache, err error) { if markRequestFinished == nil { // we waited for another request, recheck the cache! rrCache = checkCache(ctx, q) - if rrCache != nil && !rrCache.Expired() { + if rrCache != nil && (!rrCache.Expired() || useStaleCache()) { + // Return non-expired or expired entry if we should use stale cache entries. + // There just was a request, so do not trigger an async query. return rrCache, nil } log.Tracer(ctx).Debugf("resolver: waited for another %s%s query, but cache missed!", q.FQDN, q.QType) @@ -232,63 +247,71 @@ func checkCache(ctx context.Context, q *Query) *RRCache { return nil } - // Check if we want to reset the cache for this entry. - if shouldResetCache(q) { + switch { + case shouldResetCache(q): + // Check if we want to reset the cache for this entry. err := ResetCachedRecord(q.FQDN, q.QType.String()) switch { case err == nil: - log.Tracer(ctx).Tracef("resolver: cache for %s%s was reset", q.FQDN, q.QType) + log.Tracer(ctx).Infof("resolver: cache for %s%s was reset", q.FQDN, q.QType) case errors.Is(err, database.ErrNotFound): log.Tracer(ctx).Tracef("resolver: cache for %s%s was already reset (is empty)", q.FQDN, q.QType) default: log.Tracer(ctx).Warningf("resolver: failed to reset cache for %s%s: %s", q.FQDN, q.QType, err) } return nil - } - // Check if the cache has already expired. - // We still return the cache, if it isn't NXDomain, as it will be used if the - // new query fails. - if rrCache.Expired() { + case rrCache.Expired(): + // Check if the cache has already expired. + // We still return the cache, if it isn't NXDomain, as it will be used if the + // new query fails. if rrCache.RCode == dns.RcodeSuccess { return rrCache } return nil - } - // Check if the cache will expire soon and start an async request. - if rrCache.ExpiresSoon() { - // Set flag that we are refreshing this entry. - rrCache.RequestingNew = true + case rrCache.ExpiresSoon(): + // Check if the cache will expire soon and start an async request. + startAsyncQuery(ctx, q, rrCache) + return rrCache + default: + // Return still valid cache entry. log.Tracer(ctx).Tracef( - "resolver: cache for %s will expire in %s, refreshing async now", - q.ID(), + "resolver: using cached RR (expires in %s)", time.Until(time.Unix(rrCache.Expires, 0)).Round(time.Second), ) - - // resolve async - module.StartWorker("resolve async", func(asyncCtx context.Context) error { - tracingCtx, tracer := log.AddTracer(asyncCtx) - defer tracer.Submit() - tracer.Tracef("resolver: resolving %s async", q.ID()) - _, err := resolveAndCache(tracingCtx, q, nil) - if err != nil { - tracer.Warningf("resolver: async query for %s failed: %s", q.ID(), err) - } else { - tracer.Infof("resolver: async query for %s succeeded", q.ID()) - } - return nil - }) - return rrCache } +} +func startAsyncQuery(ctx context.Context, q *Query, currentRRCache *RRCache) { + // Check if an async query was already started. + if currentRRCache.RequestingNew { + return + } + + // Set flag and log that we are refreshing this entry. + currentRRCache.RequestingNew = true log.Tracer(ctx).Tracef( - "resolver: using cached RR (expires in %s)", - time.Until(time.Unix(rrCache.Expires, 0)).Round(time.Second), + "resolver: cache for %s will expire in %s, refreshing async now", + q.ID(), + time.Until(time.Unix(currentRRCache.Expires, 0)).Round(time.Second), ) - return rrCache + + // resolve async + module.StartWorker("resolve async", func(asyncCtx context.Context) error { + tracingCtx, tracer := log.AddTracer(asyncCtx) + defer tracer.Submit() + tracer.Tracef("resolver: resolving %s async", q.ID()) + _, err := resolveAndCache(tracingCtx, q, nil) + if err != nil { + tracer.Warningf("resolver: async query for %s failed: %s", q.ID(), err) + } else { + tracer.Infof("resolver: async query for %s succeeded", q.ID()) + } + return nil + }) } func deduplicateRequest(ctx context.Context, q *Query) (finishRequest func()) { diff --git a/resolver/rrcache.go b/resolver/rrcache.go index d14179c2..1b6fdc3d 100644 --- a/resolver/rrcache.go +++ b/resolver/rrcache.go @@ -55,7 +55,8 @@ func (rrCache *RRCache) Expired() bool { return rrCache.Expires <= time.Now().Unix() } -// ExpiresSoon returns whether the record will expire soon and should already be refreshed. +// ExpiresSoon returns whether the record will expire soon (or already has) and +// should already be refreshed. func (rrCache *RRCache) ExpiresSoon() bool { return rrCache.Expires <= time.Now().Unix()+refreshTTL }