From cc9793c5340e31c93566628a9ed4e7a6f6a01a09 Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Tue, 19 Nov 2024 19:23:08 -0800 Subject: [PATCH] Use a custom clock interface and fake clock implementation The fake clock provided as part of k8s.io/utils has some limitations: it does not conform to the specifications of the standard library time package when it comes to timers (e.g., Stop() does not return false if the timer has already been stopped), and it prevents (deadlock) clock methods such as Now() (but also the Stop() method of timers) from being called while executing a timer function (timer created with AfterFunc). To avoid non-intuitive workarounds in our code caused by these limitations, we define our own clock interface and fake clock implementation. This does not represent too much code, as the CollectingProcess only uses a limited amount of clock functions. Signed-off-by: Antonin Bas --- go.mod | 2 +- pkg/collector/clock.go | 156 ++++++++++++++++++++++++++++++++++ pkg/collector/process.go | 24 +++--- pkg/collector/process_test.go | 48 ++--------- 4 files changed, 174 insertions(+), 56 deletions(-) create mode 100644 pkg/collector/clock.go diff --git a/go.mod b/go.mod index 818dab8..a9f199c 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,6 @@ require ( k8s.io/apimachinery v0.31.0 k8s.io/component-base v0.31.0 k8s.io/klog/v2 v2.130.1 - k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 ) require ( @@ -63,6 +62,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/pkg/collector/clock.go b/pkg/collector/clock.go new file mode 100644 index 0000000..60f00ae --- /dev/null +++ b/pkg/collector/clock.go @@ -0,0 +1,156 @@ +// Copyright 2024 VMware, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "sync" + "time" +) + +// timer allows for injecting fake or real timers into code that needs to do arbitrary things based +// on time. We do not include the C() method, as we only support timers created with AfterFunc. +type timer interface { + Stop() bool + Reset(d time.Duration) bool +} + +// clock allows for injecting fake or real clocks into code that needs to do arbitrary things based +// on time. We only support a very limited interface at the moment, with only the methods required +// by CollectingProcess. +type clock interface { + Now() time.Time + AfterFunc(d time.Duration, f func()) timer +} + +// realClock implements the clock interface using functions from the time package. +type realClock struct{} + +func (realClock) Now() time.Time { + return time.Now() +} + +func (realClock) AfterFunc(d time.Duration, f func()) timer { + return time.AfterFunc(d, f) +} + +type fakeTimer struct { + targetTime time.Time + f func() + clock *fakeClock +} + +func (t *fakeTimer) Stop() bool { + clock := t.clock + clock.m.Lock() + defer clock.m.Unlock() + newTimers := make([]*fakeTimer, 0, len(clock.timers)) + fired := true + for i := range clock.timers { + if clock.timers[i] != t { + newTimers = append(newTimers, t) + continue + } + // timer is found so it hasn't been fired yet + fired = false + } + clock.timers = newTimers + return !fired +} + +func (t *fakeTimer) Reset(d time.Duration) bool { + clock := t.clock + clock.m.Lock() + defer clock.m.Unlock() + fired := true + for i := range clock.timers { + if clock.timers[i] != t { + continue + } + // timer is found so it hasn't been fired yet + fired = false + t.targetTime = clock.now.Add(d) + } + return !fired +} + +// fakeClock implements the clock interface as a virtual clock meant to be used in tests. Time can +// be advanced arbitrarily, but does not change on its own. +type fakeClock struct { + m sync.RWMutex + isAdvancing bool + now time.Time + timers []*fakeTimer +} + +func newFakeClock(t time.Time) *fakeClock { + return &fakeClock{ + now: t, + } +} + +func (c *fakeClock) Now() time.Time { + c.m.RLock() + defer c.m.RUnlock() + return c.now +} + +func (c *fakeClock) AfterFunc(d time.Duration, f func()) timer { + if d <= 0 { + panic("negative duration not supported") + } + c.m.Lock() + defer c.m.Unlock() + t := &fakeTimer{ + targetTime: c.now.Add(d), + f: f, + clock: c, + } + c.timers = append(c.timers, t) + return t +} + +func (c *fakeClock) Step(d time.Duration) { + if d < 0 { + panic("invalid duration") + } + timerFuncs := []func(){} + func() { + c.m.Lock() + defer c.m.Unlock() + if c.isAdvancing { + panic("concurrent calls to Step() not allowed") + } + c.isAdvancing = true + c.now = c.now.Add(d) + // Collect timer functions to run and remove them from list. + newTimers := make([]*fakeTimer, 0, len(c.timers)) + for _, t := range c.timers { + if !t.targetTime.After(c.now) { + timerFuncs = append(timerFuncs, t.f) + } else { + newTimers = append(newTimers, t) + } + } + c.timers = newTimers + }() + // Run the timer functions, without holding a lock. This allows these functions to call + // clock.Now(), but also timer.Stop(). + for _, f := range timerFuncs { + f() + } + c.m.Lock() + defer c.m.Unlock() + c.isAdvancing = false +} diff --git a/pkg/collector/process.go b/pkg/collector/process.go index 6a129e1..0b2fdab 100644 --- a/pkg/collector/process.go +++ b/pkg/collector/process.go @@ -25,7 +25,6 @@ import ( "time" "k8s.io/klog/v2" - "k8s.io/utils/clock" "github.com/vmware/go-ipfix/pkg/entities" "github.com/vmware/go-ipfix/pkg/registry" @@ -54,7 +53,7 @@ const ( type template struct { ies []*entities.InfoElement expiryTime time.Time - expiryTimer clock.Timer + expiryTimer timer } type CollectingProcess struct { @@ -93,7 +92,7 @@ type CollectingProcess struct { wg sync.WaitGroup numOfRecordsReceived uint64 // clock implementation: enables injecting a fake clock for testing - clock clock.WithDelayedExecution + clock clock } type CollectorInput struct { @@ -122,7 +121,7 @@ type clientHandler struct { closeClientChan chan struct{} } -func initCollectingProcess(input CollectorInput, clock clock.WithDelayedExecution) (*CollectingProcess, error) { +func initCollectingProcess(input CollectorInput, clock clock) (*CollectingProcess, error) { templateTTLSeconds := input.TemplateTTL if input.Protocol == "udp" && templateTTLSeconds == 0 { templateTTLSeconds = entities.TemplateTTL @@ -159,7 +158,7 @@ func initCollectingProcess(input CollectorInput, clock clock.WithDelayedExecutio } func InitCollectingProcess(input CollectorInput) (*CollectingProcess, error) { - return initCollectingProcess(input, clock.RealClock{}) + return initCollectingProcess(input, realClock{}) } func (cp *CollectingProcess) Start() { @@ -410,10 +409,7 @@ func (cp *CollectingProcess) addTemplate(obsDomainID uint32, templateID uint16, // In our case, when f executes, we have to verify that the record is indeed // scheduled for deletion by checking expiryTime. We cannot just // automatically delete the template. - - // No reason to try to stop the timer in this case, even though it would be - // technically correct, so we pass false for stopTimer. - cp.deleteTemplateWithConds(obsDomainID, templateID, false, func(tpl *template) bool { + cp.deleteTemplateWithConds(obsDomainID, templateID, func(tpl *template) bool { // lock will be held when this executes return !tpl.expiryTime.After(now) }) @@ -425,11 +421,11 @@ func (cp *CollectingProcess) addTemplate(obsDomainID uint32, templateID uint16, // deleteTemplate returns true iff a template was actually deleted. func (cp *CollectingProcess) deleteTemplate(obsDomainID uint32, templateID uint16) bool { - return cp.deleteTemplateWithConds(obsDomainID, templateID, true) + return cp.deleteTemplateWithConds(obsDomainID, templateID) } // deleteTemplateWithConds returns true iff a template was actually deleted. -func (cp *CollectingProcess) deleteTemplateWithConds(obsDomainID uint32, templateID uint16, stopTimer bool, condFns ...func(*template) bool) bool { +func (cp *CollectingProcess) deleteTemplateWithConds(obsDomainID uint32, templateID uint16, condFns ...func(*template) bool) bool { cp.mutex.Lock() defer cp.mutex.Unlock() template, ok := cp.templatesMap[obsDomainID][templateID] @@ -441,7 +437,11 @@ func (cp *CollectingProcess) deleteTemplateWithConds(obsDomainID uint32, templat return false } } - if stopTimer && template.expiryTimer != nil { + // expiryTimer will be nil when the protocol is UDP. + if template.expiryTimer != nil { + // expiryTimer may have been stopped already (if the timer + // expired and is the reason why the template is being deleted), + // but it is safe to call Stop() on an expired timer. template.expiryTimer.Stop() } delete(cp.templatesMap[obsDomainID], templateID) diff --git a/pkg/collector/process_test.go b/pkg/collector/process_test.go index 2663c75..360cf31 100644 --- a/pkg/collector/process_test.go +++ b/pkg/collector/process_test.go @@ -31,7 +31,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/util/wait" - clocktesting "k8s.io/utils/clock/testing" "github.com/vmware/go-ipfix/pkg/entities" "github.com/vmware/go-ipfix/pkg/exporter" @@ -426,41 +425,8 @@ func TestCollectingProcess_DecodeDataRecord(t *testing.T) { assert.NotNil(t, err, "Error should be logged for malformed data record") } -// testClock is a wrapper around clocktesting.FakeClock. Unfortunately, FakeClock does not support -// calling clock.Now() when executing an AfterFunc() function, which is require in our case. So we -// have to define this wrapper which lets us do it. It may not have the same guarantees as -// FakeClock, as waiters are not executed "atomically" with time advances, but it should be -// sufficient for our use case. -type testClock struct { - *clocktesting.FakeClock - mutex sync.Mutex - now time.Time -} - -func newTestClock(now time.Time) *testClock { - return &testClock{ - FakeClock: clocktesting.NewFakeClock(now), - now: now, - } -} - -func (c *testClock) Now() time.Time { - c.mutex.Lock() - defer c.mutex.Unlock() - return c.now -} - -func (c *testClock) Step(d time.Duration) { - func() { - c.mutex.Lock() - defer c.mutex.Unlock() - c.now = c.now.Add(d) - }() - c.FakeClock.Step(d) -} - func TestUDPCollectingProcess_TemplateExpire(t *testing.T) { - clock := newTestClock(time.Now()) + clock := newFakeClock(time.Now()) input := CollectorInput{ Address: hostPortIPv4, Protocol: udpTransport, @@ -505,18 +471,14 @@ func TestUDPCollectingProcess_TemplateAddAndDelete(t *testing.T) { templateID = 100 obsDomainID = 0xabcd ) + clock := newFakeClock(time.Now()) input := CollectorInput{ Address: hostPortIPv4, Protocol: udpTransport, MaxBufferSize: 1024, TemplateTTL: 1, } - // We should be using the fake clock for this test, but unfortunately - // the behavior of the Stop method for fake timers do not conform to - // https://pkg.go.dev/time#Timer.Stop. - // Stop should return false if the timer has already been stopped, which - // is not the case of the fake timer. - cp, err := InitCollectingProcess(input) + cp, err := initCollectingProcess(input, clock) require.NoError(t, err) cp.addTemplate(obsDomainID, templateID, elementsWithValueIPv4) // Get a copy of the stored template @@ -542,7 +504,7 @@ func TestUDPCollectingProcess_TemplateUpdate(t *testing.T) { obsDomainID = 0xabcd ) now := time.Now() - clock := newTestClock(now) + clock := newFakeClock(now) input := CollectorInput{ Address: hostPortIPv4, Protocol: udpTransport, @@ -589,7 +551,7 @@ func BenchmarkAddTemplateUDP(b *testing.B) { ServerCert: nil, ServerKey: nil, } - cp, err := initCollectingProcess(input, clocktesting.NewFakeClock(time.Now())) + cp, err := initCollectingProcess(input, newFakeClock(time.Now())) require.NoError(b, err) obsDomainID := uint32(1) b.ResetTimer()