wip: migrate to mono-repo. SPN has already been moved to spn/
This commit is contained in:
13
spn/unit/doc.go
Normal file
13
spn/unit/doc.go
Normal file
@@ -0,0 +1,13 @@
|
||||
// Package unit provides a "work unit" scheduling system for handling data sets that traverse multiple workers / goroutines.
|
||||
// The aim is to bind priority to a data set instead of a goroutine and split resources fairly among requests.
|
||||
//
|
||||
// Every "work" Unit is assigned an ever increasing ID and can be marked as "paused" or "high priority".
|
||||
// The Scheduler always gives a clearance up to a certain ID. All units below this ID may be processed.
|
||||
// High priority Units may always be processed.
|
||||
//
|
||||
// The Scheduler works with short slots and measures how many Units were finished in a slot.
|
||||
// The "slot pace" holds an indication of the current Unit finishing speed per slot. It is only changed slowly (but boosts if too far away) in order to keep stabilize the system.
|
||||
// The Scheduler then calculates the next unit ID limit to give clearance to for the next slot:
|
||||
//
|
||||
// "finished units" + "slot pace" + "paused units" - "fraction of high priority units"
|
||||
package unit
|
||||
358
spn/unit/scheduler.go
Normal file
358
spn/unit/scheduler.go
Normal file
@@ -0,0 +1,358 @@
|
||||
package unit
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"math"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tevino/abool"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSlotDuration = 10 * time.Millisecond // 100 slots per second
|
||||
defaultMinSlotPace = 100 // 10 000 pps
|
||||
|
||||
defaultWorkSlotPercentage = 0.7 // 70%
|
||||
defaultSlotChangeRatePerStreak = 0.02 // 2%
|
||||
|
||||
defaultStatCycleDuration = 1 * time.Minute
|
||||
)
|
||||
|
||||
// Scheduler creates and schedules units.
|
||||
// Must be created using NewScheduler().
|
||||
type Scheduler struct { //nolint:maligned
|
||||
// Configuration.
|
||||
config SchedulerConfig
|
||||
|
||||
// Units IDs Limit / Thresholds.
|
||||
|
||||
// currentUnitID holds the last assigned Unit ID.
|
||||
currentUnitID atomic.Int64
|
||||
// clearanceUpTo holds the current threshold up to which Unit ID Units may be processed.
|
||||
clearanceUpTo atomic.Int64
|
||||
// slotPace holds the current pace. This is the base value for clearance
|
||||
// calculation, not the value of the current cleared Units itself.
|
||||
slotPace atomic.Int64
|
||||
// finished holds the amount of units that were finished within the current slot.
|
||||
finished atomic.Int64
|
||||
|
||||
// Slot management.
|
||||
slotSignalA chan struct{}
|
||||
slotSignalB chan struct{}
|
||||
slotSignalSwitch bool
|
||||
slotSignalsLock sync.RWMutex
|
||||
|
||||
stopping abool.AtomicBool
|
||||
unitDebugger *UnitDebugger
|
||||
|
||||
// Stats.
|
||||
stats struct {
|
||||
// Working Values.
|
||||
progress struct {
|
||||
maxPace atomic.Int64
|
||||
maxLeveledPace atomic.Int64
|
||||
avgPaceSum atomic.Int64
|
||||
avgPaceCnt atomic.Int64
|
||||
avgUnitLifeSum atomic.Int64
|
||||
avgUnitLifeCnt atomic.Int64
|
||||
avgWorkSlotSum atomic.Int64
|
||||
avgWorkSlotCnt atomic.Int64
|
||||
avgCatchUpSlotSum atomic.Int64
|
||||
avgCatchUpSlotCnt atomic.Int64
|
||||
}
|
||||
|
||||
// Calculated Values.
|
||||
current struct {
|
||||
maxPace atomic.Int64
|
||||
maxLeveledPace atomic.Int64
|
||||
avgPace atomic.Int64
|
||||
avgUnitLife atomic.Int64
|
||||
avgWorkSlot atomic.Int64
|
||||
avgCatchUpSlot atomic.Int64
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SchedulerConfig holds scheduler configuration.
|
||||
type SchedulerConfig struct {
|
||||
// SlotDuration defines the duration of one slot.
|
||||
SlotDuration time.Duration
|
||||
|
||||
// MinSlotPace defines the minimum slot pace.
|
||||
// The slot pace will never fall below this value.
|
||||
MinSlotPace int64
|
||||
|
||||
// WorkSlotPercentage defines the how much of a slot should be scheduled with work.
|
||||
// The remainder is for catching up and breathing room for other tasks.
|
||||
// Must be between 55% (0.55) and 95% (0.95).
|
||||
// The default value is 0.7 (70%).
|
||||
WorkSlotPercentage float64
|
||||
|
||||
// SlotChangeRatePerStreak defines how many percent (0-1) the slot pace
|
||||
// should change per streak.
|
||||
// Is enforced to be able to change the minimum slot pace by at least 1.
|
||||
// The default value is 0.02 (2%).
|
||||
SlotChangeRatePerStreak float64
|
||||
|
||||
// StatCycleDuration defines how often stats are calculated.
|
||||
// The default value is 1 minute.
|
||||
StatCycleDuration time.Duration
|
||||
}
|
||||
|
||||
// NewScheduler returns a new scheduler.
|
||||
func NewScheduler(config *SchedulerConfig) *Scheduler {
|
||||
// Fallback to empty config if none is given.
|
||||
if config == nil {
|
||||
config = &SchedulerConfig{}
|
||||
}
|
||||
|
||||
// Create new scheduler.
|
||||
s := &Scheduler{
|
||||
config: *config,
|
||||
slotSignalA: make(chan struct{}),
|
||||
slotSignalB: make(chan struct{}),
|
||||
}
|
||||
|
||||
// Fill in defaults.
|
||||
if s.config.SlotDuration == 0 {
|
||||
s.config.SlotDuration = defaultSlotDuration
|
||||
}
|
||||
if s.config.MinSlotPace == 0 {
|
||||
s.config.MinSlotPace = defaultMinSlotPace
|
||||
}
|
||||
if s.config.WorkSlotPercentage == 0 {
|
||||
s.config.WorkSlotPercentage = defaultWorkSlotPercentage
|
||||
}
|
||||
if s.config.SlotChangeRatePerStreak == 0 {
|
||||
s.config.SlotChangeRatePerStreak = defaultSlotChangeRatePerStreak
|
||||
}
|
||||
if s.config.StatCycleDuration == 0 {
|
||||
s.config.StatCycleDuration = defaultStatCycleDuration
|
||||
}
|
||||
|
||||
// Check boundaries of WorkSlotPercentage.
|
||||
switch {
|
||||
case s.config.WorkSlotPercentage < 0.55:
|
||||
s.config.WorkSlotPercentage = 0.55
|
||||
case s.config.WorkSlotPercentage > 0.95:
|
||||
s.config.WorkSlotPercentage = 0.95
|
||||
}
|
||||
|
||||
// The slot change rate must be able to change the slot pace by at least 1.
|
||||
if s.config.SlotChangeRatePerStreak < (1 / float64(s.config.MinSlotPace)) {
|
||||
s.config.SlotChangeRatePerStreak = (1 / float64(s.config.MinSlotPace))
|
||||
|
||||
// Debug logging:
|
||||
// fmt.Printf("--- increased SlotChangeRatePerStreak to %f\n", s.config.SlotChangeRatePerStreak)
|
||||
}
|
||||
|
||||
// Initialize scheduler fields.
|
||||
s.clearanceUpTo.Store(s.config.MinSlotPace)
|
||||
s.slotPace.Store(s.config.MinSlotPace)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *Scheduler) nextSlotSignal() chan struct{} {
|
||||
s.slotSignalsLock.RLock()
|
||||
defer s.slotSignalsLock.RUnlock()
|
||||
|
||||
if s.slotSignalSwitch {
|
||||
return s.slotSignalA
|
||||
}
|
||||
return s.slotSignalB
|
||||
}
|
||||
|
||||
func (s *Scheduler) announceNextSlot() {
|
||||
s.slotSignalsLock.Lock()
|
||||
defer s.slotSignalsLock.Unlock()
|
||||
|
||||
// Close new slot signal and refresh previous one.
|
||||
if s.slotSignalSwitch {
|
||||
close(s.slotSignalA)
|
||||
s.slotSignalB = make(chan struct{})
|
||||
} else {
|
||||
close(s.slotSignalB)
|
||||
s.slotSignalA = make(chan struct{})
|
||||
}
|
||||
|
||||
// Switch to next slot.
|
||||
s.slotSignalSwitch = !s.slotSignalSwitch
|
||||
}
|
||||
|
||||
// SlotScheduler manages the slot and schedules units.
|
||||
// Must only be started once.
|
||||
func (s *Scheduler) SlotScheduler(ctx context.Context) error {
|
||||
// Start slot ticker.
|
||||
ticker := time.NewTicker(s.config.SlotDuration / 2)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Give clearance to all when stopping.
|
||||
defer s.clearanceUpTo.Store(math.MaxInt64 - math.MaxInt32)
|
||||
|
||||
var (
|
||||
halfSlotID uint64
|
||||
halfSlotStartedAt = time.Now()
|
||||
halfSlotEndedAt time.Time
|
||||
halfSlotDuration = float64(s.config.SlotDuration / 2)
|
||||
|
||||
increaseStreak float64
|
||||
decreaseStreak float64
|
||||
oneStreaks int
|
||||
|
||||
cycleStatsAt = uint64(s.config.StatCycleDuration / (s.config.SlotDuration / 2))
|
||||
)
|
||||
|
||||
for range ticker.C {
|
||||
halfSlotEndedAt = time.Now()
|
||||
|
||||
switch {
|
||||
case halfSlotID%2 == 0:
|
||||
|
||||
// First Half-Slot: Work Slot
|
||||
|
||||
// Calculate time taken in previous slot.
|
||||
catchUpSlotDuration := halfSlotEndedAt.Sub(halfSlotStartedAt).Nanoseconds()
|
||||
|
||||
// Add current slot duration to avg calculation.
|
||||
s.stats.progress.avgCatchUpSlotCnt.Add(1)
|
||||
if s.stats.progress.avgCatchUpSlotSum.Add(catchUpSlotDuration) < 0 {
|
||||
// Reset if we wrap.
|
||||
s.stats.progress.avgCatchUpSlotCnt.Store(1)
|
||||
s.stats.progress.avgCatchUpSlotSum.Store(catchUpSlotDuration)
|
||||
}
|
||||
|
||||
// Reset slot counters.
|
||||
s.finished.Store(0)
|
||||
|
||||
// Raise clearance according
|
||||
s.clearanceUpTo.Store(
|
||||
s.currentUnitID.Load() +
|
||||
int64(
|
||||
float64(s.slotPace.Load())*s.config.WorkSlotPercentage,
|
||||
),
|
||||
)
|
||||
|
||||
// Announce start of new slot.
|
||||
s.announceNextSlot()
|
||||
|
||||
default:
|
||||
|
||||
// Second Half-Slot: Catch-Up Slot
|
||||
|
||||
// Calculate time taken in previous slot.
|
||||
workSlotDuration := halfSlotEndedAt.Sub(halfSlotStartedAt).Nanoseconds()
|
||||
|
||||
// Add current slot duration to avg calculation.
|
||||
s.stats.progress.avgWorkSlotCnt.Add(1)
|
||||
if s.stats.progress.avgWorkSlotSum.Add(workSlotDuration) < 0 {
|
||||
// Reset if we wrap.
|
||||
s.stats.progress.avgWorkSlotCnt.Store(1)
|
||||
s.stats.progress.avgWorkSlotSum.Store(workSlotDuration)
|
||||
}
|
||||
|
||||
// Calculate slot duration skew correction, as slots will not run in the
|
||||
// exact specified duration.
|
||||
slotDurationSkewCorrection := halfSlotDuration / float64(workSlotDuration)
|
||||
|
||||
// Calculate slot pace with performance of first half-slot.
|
||||
// Get current slot pace as float64.
|
||||
currentSlotPace := float64(s.slotPace.Load())
|
||||
// Calculate current raw slot pace.
|
||||
newRawSlotPace := float64(s.finished.Load()*2) * slotDurationSkewCorrection
|
||||
|
||||
// Move slot pace in the trending direction.
|
||||
if newRawSlotPace >= currentSlotPace {
|
||||
// Adjust based on streak.
|
||||
increaseStreak++
|
||||
decreaseStreak = 0
|
||||
s.slotPace.Add(int64(
|
||||
currentSlotPace * s.config.SlotChangeRatePerStreak * increaseStreak,
|
||||
))
|
||||
|
||||
// Count one-streaks.
|
||||
if increaseStreak == 1 {
|
||||
oneStreaks++
|
||||
} else {
|
||||
oneStreaks = 0
|
||||
}
|
||||
|
||||
// Debug logging:
|
||||
// fmt.Printf("+++ slot pace: %.0f (current raw pace: %.0f, increaseStreak: %.0f, clearanceUpTo: %d)\n", currentSlotPace, newRawSlotPace, increaseStreak, s.clearanceUpTo.Load())
|
||||
} else {
|
||||
// Adjust based on streak.
|
||||
decreaseStreak++
|
||||
increaseStreak = 0
|
||||
s.slotPace.Add(int64(
|
||||
-currentSlotPace * s.config.SlotChangeRatePerStreak * decreaseStreak,
|
||||
))
|
||||
|
||||
// Enforce minimum.
|
||||
if s.slotPace.Load() < s.config.MinSlotPace {
|
||||
s.slotPace.Store(s.config.MinSlotPace)
|
||||
decreaseStreak = 0
|
||||
}
|
||||
|
||||
// Count one-streaks.
|
||||
if decreaseStreak == 1 {
|
||||
oneStreaks++
|
||||
} else {
|
||||
oneStreaks = 0
|
||||
}
|
||||
|
||||
// Debug logging:
|
||||
// fmt.Printf("--- slot pace: %.0f (current raw pace: %.0f, decreaseStreak: %.0f, clearanceUpTo: %d)\n", currentSlotPace, newRawSlotPace, decreaseStreak, s.clearanceUpTo.Load())
|
||||
}
|
||||
|
||||
// Record Stats
|
||||
|
||||
// Add current pace to avg calculation.
|
||||
s.stats.progress.avgPaceCnt.Add(1)
|
||||
if s.stats.progress.avgPaceSum.Add(s.slotPace.Load()) < 0 {
|
||||
// Reset if we wrap.
|
||||
s.stats.progress.avgPaceCnt.Store(1)
|
||||
s.stats.progress.avgPaceSum.Store(s.slotPace.Load())
|
||||
}
|
||||
|
||||
// Check if current pace is new max.
|
||||
if s.slotPace.Load() > s.stats.progress.maxPace.Load() {
|
||||
s.stats.progress.maxPace.Store(s.slotPace.Load())
|
||||
}
|
||||
|
||||
// Check if current pace is new leveled max
|
||||
if oneStreaks >= 3 && s.slotPace.Load() > s.stats.progress.maxLeveledPace.Load() {
|
||||
s.stats.progress.maxLeveledPace.Store(s.slotPace.Load())
|
||||
}
|
||||
}
|
||||
// Switch to other slot-half.
|
||||
halfSlotID++
|
||||
halfSlotStartedAt = halfSlotEndedAt
|
||||
|
||||
// Cycle stats after defined time period.
|
||||
if halfSlotID%cycleStatsAt == 0 {
|
||||
s.cycleStats()
|
||||
}
|
||||
|
||||
// Check if we are stopping.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
default:
|
||||
}
|
||||
if s.stopping.IsSet() {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// We should never get here.
|
||||
// If we do, trigger a worker restart via the service worker.
|
||||
return errors.New("unexpected end of scheduler")
|
||||
}
|
||||
|
||||
// Stop stops the scheduler and gives clearance to all units.
|
||||
func (s *Scheduler) Stop() {
|
||||
s.stopping.Set()
|
||||
}
|
||||
87
spn/unit/scheduler_stats.go
Normal file
87
spn/unit/scheduler_stats.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package unit
|
||||
|
||||
// Stats are somewhat racy, as one value of sum or count might already be
|
||||
// updated with the latest slot data, while the other has been not.
|
||||
// This is not so much of a problem, as slots are really short and the impact
|
||||
// is very low.
|
||||
|
||||
// cycleStats calculates the new values and cycles the current values.
|
||||
func (s *Scheduler) cycleStats() {
|
||||
// Get and reset max pace.
|
||||
s.stats.current.maxPace.Store(s.stats.progress.maxPace.Load())
|
||||
s.stats.progress.maxPace.Store(0)
|
||||
|
||||
// Get and reset max leveled pace.
|
||||
s.stats.current.maxLeveledPace.Store(s.stats.progress.maxLeveledPace.Load())
|
||||
s.stats.progress.maxLeveledPace.Store(0)
|
||||
|
||||
// Get and reset avg slot pace.
|
||||
avgPaceCnt := s.stats.progress.avgPaceCnt.Load()
|
||||
if avgPaceCnt > 0 {
|
||||
s.stats.current.avgPace.Store(s.stats.progress.avgPaceSum.Load() / avgPaceCnt)
|
||||
} else {
|
||||
s.stats.current.avgPace.Store(0)
|
||||
}
|
||||
s.stats.progress.avgPaceCnt.Store(0)
|
||||
s.stats.progress.avgPaceSum.Store(0)
|
||||
|
||||
// Get and reset avg unit life.
|
||||
avgUnitLifeCnt := s.stats.progress.avgUnitLifeCnt.Load()
|
||||
if avgUnitLifeCnt > 0 {
|
||||
s.stats.current.avgUnitLife.Store(s.stats.progress.avgUnitLifeSum.Load() / avgUnitLifeCnt)
|
||||
} else {
|
||||
s.stats.current.avgUnitLife.Store(0)
|
||||
}
|
||||
s.stats.progress.avgUnitLifeCnt.Store(0)
|
||||
s.stats.progress.avgUnitLifeSum.Store(0)
|
||||
|
||||
// Get and reset avg work slot duration.
|
||||
avgWorkSlotCnt := s.stats.progress.avgWorkSlotCnt.Load()
|
||||
if avgWorkSlotCnt > 0 {
|
||||
s.stats.current.avgWorkSlot.Store(s.stats.progress.avgWorkSlotSum.Load() / avgWorkSlotCnt)
|
||||
} else {
|
||||
s.stats.current.avgWorkSlot.Store(0)
|
||||
}
|
||||
s.stats.progress.avgWorkSlotCnt.Store(0)
|
||||
s.stats.progress.avgWorkSlotSum.Store(0)
|
||||
|
||||
// Get and reset avg catch up slot duration.
|
||||
avgCatchUpSlotCnt := s.stats.progress.avgCatchUpSlotCnt.Load()
|
||||
if avgCatchUpSlotCnt > 0 {
|
||||
s.stats.current.avgCatchUpSlot.Store(s.stats.progress.avgCatchUpSlotSum.Load() / avgCatchUpSlotCnt)
|
||||
} else {
|
||||
s.stats.current.avgCatchUpSlot.Store(0)
|
||||
}
|
||||
s.stats.progress.avgCatchUpSlotCnt.Store(0)
|
||||
s.stats.progress.avgCatchUpSlotSum.Store(0)
|
||||
}
|
||||
|
||||
// GetMaxSlotPace returns the current maximum slot pace.
|
||||
func (s *Scheduler) GetMaxSlotPace() int64 {
|
||||
return s.stats.current.maxPace.Load()
|
||||
}
|
||||
|
||||
// GetMaxLeveledSlotPace returns the current maximum leveled slot pace.
|
||||
func (s *Scheduler) GetMaxLeveledSlotPace() int64 {
|
||||
return s.stats.current.maxLeveledPace.Load()
|
||||
}
|
||||
|
||||
// GetAvgSlotPace returns the current average slot pace.
|
||||
func (s *Scheduler) GetAvgSlotPace() int64 {
|
||||
return s.stats.current.avgPace.Load()
|
||||
}
|
||||
|
||||
// GetAvgUnitLife returns the current average unit lifetime until it is finished.
|
||||
func (s *Scheduler) GetAvgUnitLife() int64 {
|
||||
return s.stats.current.avgUnitLife.Load()
|
||||
}
|
||||
|
||||
// GetAvgWorkSlotDuration returns the current average work slot duration.
|
||||
func (s *Scheduler) GetAvgWorkSlotDuration() int64 {
|
||||
return s.stats.current.avgWorkSlot.Load()
|
||||
}
|
||||
|
||||
// GetAvgCatchUpSlotDuration returns the current average catch up slot duration.
|
||||
func (s *Scheduler) GetAvgCatchUpSlotDuration() int64 {
|
||||
return s.stats.current.avgCatchUpSlot.Load()
|
||||
}
|
||||
51
spn/unit/scheduler_test.go
Normal file
51
spn/unit/scheduler_test.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package unit
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkScheduler(b *testing.B) {
|
||||
workers := 10
|
||||
|
||||
// Create and start scheduler.
|
||||
s := NewScheduler(&SchedulerConfig{})
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
err := s.SlotScheduler(ctx)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
defer cancel()
|
||||
|
||||
// Init control structures.
|
||||
done := make(chan struct{})
|
||||
finishedCh := make(chan struct{})
|
||||
|
||||
// Start workers.
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
for {
|
||||
u := s.NewUnit()
|
||||
u.WaitForSlot()
|
||||
u.Finish()
|
||||
select {
|
||||
case finishedCh <- struct{}{}:
|
||||
case <-done:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Start benchmark.
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
<-finishedCh
|
||||
}
|
||||
b.StopTimer()
|
||||
|
||||
// Cleanup.
|
||||
close(done)
|
||||
}
|
||||
103
spn/unit/unit.go
Normal file
103
spn/unit/unit.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package unit
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/tevino/abool"
|
||||
)
|
||||
|
||||
// Unit describes a "work unit" and is meant to be embedded into another struct
|
||||
// used for passing data moving through multiple processing steps.
|
||||
type Unit struct {
|
||||
id int64
|
||||
scheduler *Scheduler
|
||||
created time.Time
|
||||
finished abool.AtomicBool
|
||||
highPriority abool.AtomicBool
|
||||
}
|
||||
|
||||
// NewUnit returns a new unit within the scheduler.
|
||||
func (s *Scheduler) NewUnit() *Unit {
|
||||
return &Unit{
|
||||
id: s.currentUnitID.Add(1),
|
||||
scheduler: s,
|
||||
created: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// ReUse re-initialized the unit to be able to reuse already allocated structs.
|
||||
func (u *Unit) ReUse() {
|
||||
// Finish previous unit.
|
||||
u.Finish()
|
||||
|
||||
// Get new ID and unset finish flag.
|
||||
u.id = u.scheduler.currentUnitID.Add(1)
|
||||
u.finished.UnSet()
|
||||
}
|
||||
|
||||
// WaitForSlot blocks until the unit may be processed.
|
||||
func (u *Unit) WaitForSlot() {
|
||||
// High priority units may always process.
|
||||
if u.highPriority.IsSet() {
|
||||
return
|
||||
}
|
||||
|
||||
for {
|
||||
// Check if we are allowed to process in the current slot.
|
||||
if u.id <= u.scheduler.clearanceUpTo.Load() {
|
||||
return
|
||||
}
|
||||
|
||||
// Debug logging:
|
||||
// fmt.Printf("unit %d waiting for clearance at %d\n", u.id, u.scheduler.clearanceUpTo.Load())
|
||||
|
||||
// Wait for next slot.
|
||||
<-u.scheduler.nextSlotSignal()
|
||||
}
|
||||
}
|
||||
|
||||
// Finish signals the unit scheduler that this unit has finished processing.
|
||||
// Will no-op if called on a nil Unit.
|
||||
func (u *Unit) Finish() {
|
||||
if u == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Always increase finished, even if the unit is from a previous epoch.
|
||||
if u.finished.SetToIf(false, true) {
|
||||
u.scheduler.finished.Add(1)
|
||||
|
||||
// Record the time this unit took from creation to finish.
|
||||
timeTaken := time.Since(u.created).Nanoseconds()
|
||||
u.scheduler.stats.progress.avgUnitLifeCnt.Add(1)
|
||||
if u.scheduler.stats.progress.avgUnitLifeSum.Add(timeTaken) < 0 {
|
||||
// Reset if we wrap.
|
||||
u.scheduler.stats.progress.avgUnitLifeCnt.Store(1)
|
||||
u.scheduler.stats.progress.avgUnitLifeSum.Store(timeTaken)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MakeHighPriority marks the unit as high priority.
|
||||
func (u *Unit) MakeHighPriority() {
|
||||
switch {
|
||||
case u.finished.IsSet():
|
||||
// Unit is already finished.
|
||||
case !u.highPriority.SetToIf(false, true):
|
||||
// Unit is already set to high priority.
|
||||
// Else: High Priority set.
|
||||
case u.id > u.scheduler.clearanceUpTo.Load():
|
||||
// Unit is outside current clearance, reduce clearance by one.
|
||||
u.scheduler.clearanceUpTo.Add(-1)
|
||||
}
|
||||
}
|
||||
|
||||
// IsHighPriority returns whether the unit has high priority.
|
||||
func (u *Unit) IsHighPriority() bool {
|
||||
return u.highPriority.IsSet()
|
||||
}
|
||||
|
||||
// RemovePriority removes the high priority mark.
|
||||
func (u *Unit) RemovePriority() {
|
||||
u.highPriority.UnSet()
|
||||
}
|
||||
86
spn/unit/unit_debug.go
Normal file
86
spn/unit/unit_debug.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package unit
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/safing/portbase/log"
|
||||
)
|
||||
|
||||
// UnitDebugger is used to debug unit leaks.
|
||||
type UnitDebugger struct { //nolint:golint
|
||||
units map[int64]*UnitDebugData
|
||||
unitsLock sync.Mutex
|
||||
}
|
||||
|
||||
// UnitDebugData represents a unit that is being debugged.
|
||||
type UnitDebugData struct { //nolint:golint
|
||||
unit *Unit
|
||||
unitSource string
|
||||
}
|
||||
|
||||
// DebugUnit registers the given unit for debug output with the given source.
|
||||
// Additional calls on the same unit update the unit source.
|
||||
// StartDebugLog() must be called before calling DebugUnit().
|
||||
func (s *Scheduler) DebugUnit(u *Unit, unitSource string) {
|
||||
// Check if scheduler and unit debugger are created.
|
||||
if s == nil || s.unitDebugger == nil {
|
||||
return
|
||||
}
|
||||
|
||||
s.unitDebugger.unitsLock.Lock()
|
||||
defer s.unitDebugger.unitsLock.Unlock()
|
||||
|
||||
s.unitDebugger.units[u.id] = &UnitDebugData{
|
||||
unit: u,
|
||||
unitSource: unitSource,
|
||||
}
|
||||
}
|
||||
|
||||
// StartDebugLog logs the scheduler state every second.
|
||||
func (s *Scheduler) StartDebugLog() {
|
||||
s.unitDebugger = &UnitDebugger{
|
||||
units: make(map[int64]*UnitDebugData),
|
||||
}
|
||||
|
||||
// Force StatCycleDuration to match the debug log output.
|
||||
s.config.StatCycleDuration = time.Second
|
||||
|
||||
go func() {
|
||||
for {
|
||||
s.debugStep()
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (s *Scheduler) debugStep() {
|
||||
s.unitDebugger.unitsLock.Lock()
|
||||
defer s.unitDebugger.unitsLock.Unlock()
|
||||
|
||||
// Go through debugging units and clear finished ones, count sources.
|
||||
sources := make(map[string]int)
|
||||
for id, debugUnit := range s.unitDebugger.units {
|
||||
if debugUnit.unit.finished.IsSet() {
|
||||
delete(s.unitDebugger.units, id)
|
||||
} else {
|
||||
cnt := sources[debugUnit.unitSource]
|
||||
sources[debugUnit.unitSource] = cnt + 1
|
||||
}
|
||||
}
|
||||
|
||||
// Print current state.
|
||||
log.Debugf(
|
||||
`scheduler: state: slotPace=%d avgPace=%d maxPace=%d maxLeveledPace=%d currentUnitID=%d clearanceUpTo=%d unitLife=%s slotDurations=%s/%s`,
|
||||
s.slotPace.Load(),
|
||||
s.GetAvgSlotPace(),
|
||||
s.GetMaxSlotPace(),
|
||||
s.GetMaxLeveledSlotPace(),
|
||||
s.currentUnitID.Load(),
|
||||
s.clearanceUpTo.Load(),
|
||||
time.Duration(s.GetAvgUnitLife()).Round(10*time.Microsecond),
|
||||
time.Duration(s.GetAvgWorkSlotDuration()).Round(10*time.Microsecond),
|
||||
time.Duration(s.GetAvgCatchUpSlotDuration()).Round(10*time.Microsecond),
|
||||
)
|
||||
log.Debugf("scheduler: unit sources: %+v", sources)
|
||||
}
|
||||
104
spn/unit/unit_test.go
Normal file
104
spn/unit/unit_test.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package unit
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestUnit(t *testing.T) { //nolint:paralleltest
|
||||
// Ignore deprectation, as the given alternative is not safe for concurrent use.
|
||||
// The global rand methods use a locked seed, which is not available from outside.
|
||||
rand.Seed(time.Now().UnixNano()) //nolint
|
||||
|
||||
size := 1000000
|
||||
workers := 100
|
||||
|
||||
// Create and start scheduler.
|
||||
s := NewScheduler(&SchedulerConfig{})
|
||||
s.StartDebugLog()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
err := s.SlotScheduler(ctx)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
defer cancel()
|
||||
|
||||
// Create 10 workers.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(workers)
|
||||
sizePerWorker := size / workers
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
for i := 0; i < sizePerWorker; i++ {
|
||||
u := s.NewUnit()
|
||||
|
||||
// Make 1% high priority.
|
||||
if rand.Int()%100 == 0 { //nolint:gosec // This is a test.
|
||||
u.MakeHighPriority()
|
||||
}
|
||||
|
||||
u.WaitForSlot()
|
||||
time.Sleep(10 * time.Microsecond)
|
||||
u.Finish()
|
||||
}
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
// Wait for workers to finish.
|
||||
wg.Wait()
|
||||
|
||||
// Wait for two slot durations for values to update.
|
||||
time.Sleep(s.config.SlotDuration * 2)
|
||||
|
||||
// Print current state.
|
||||
s.cycleStats()
|
||||
fmt.Printf(`scheduler state:
|
||||
currentUnitID = %d
|
||||
slotPace = %d
|
||||
clearanceUpTo = %d
|
||||
finished = %d
|
||||
maxPace = %d
|
||||
maxLeveledPace = %d
|
||||
avgPace = %d
|
||||
avgUnitLife = %s
|
||||
avgWorkSlot = %s
|
||||
avgCatchUpSlot = %s
|
||||
`,
|
||||
s.currentUnitID.Load(),
|
||||
s.slotPace.Load(),
|
||||
s.clearanceUpTo.Load(),
|
||||
s.finished.Load(),
|
||||
s.GetMaxSlotPace(),
|
||||
s.GetMaxLeveledSlotPace(),
|
||||
s.GetAvgSlotPace(),
|
||||
time.Duration(s.GetAvgUnitLife()),
|
||||
time.Duration(s.GetAvgWorkSlotDuration()),
|
||||
time.Duration(s.GetAvgCatchUpSlotDuration()),
|
||||
)
|
||||
|
||||
// Check if everything seems good.
|
||||
assert.Equal(t, size, int(s.currentUnitID.Load()), "currentUnitID must match size")
|
||||
assert.GreaterOrEqual(
|
||||
t,
|
||||
int(s.clearanceUpTo.Load()),
|
||||
size+int(float64(s.config.MinSlotPace)*s.config.SlotChangeRatePerStreak),
|
||||
"clearanceUpTo must be at least size+minSlotPace",
|
||||
)
|
||||
|
||||
// Shutdown
|
||||
cancel()
|
||||
time.Sleep(s.config.SlotDuration * 10)
|
||||
|
||||
// Check if scheduler shut down correctly.
|
||||
assert.Equal(t, math.MaxInt64-math.MaxInt32, int(s.clearanceUpTo.Load()), "clearance must be near MaxInt64")
|
||||
}
|
||||
Reference in New Issue
Block a user