Skip to content

Commit

Permalink
Added histogram test (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
akshayvadher authored Jul 19, 2024
1 parent c00f22c commit 4ba52bf
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 3 deletions.
14 changes: 11 additions & 3 deletions cuid2_collision_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,18 @@ func TestCollision(t *testing.T) {
checkHistogram(t, n/numPools, v.Histogram)
m.Unlock()
}
CheckCollision(t, ids)
fmt.Printf("Sample ids %v\n", ids[:10])
}

func CheckCollision(t *testing.T, ids []string) {
set := make(map[string]struct{}, len(ids))
for _, id := range ids {
set[id] = struct{}{}
}
if len(set) < len(ids) {
t.Errorf("Collision detected. len(set) %d, len(ids) %d", len(set), len(ids))
}
fmt.Printf("Sample ids %v\n", ids[:10])
}

func checkHistogram(t *testing.T, numberOfIds int, histogram []int64) {
Expand All @@ -63,7 +67,11 @@ type IdPoolResponse struct {
}

func createIdPool(t *testing.T, max int, poolId int, idPoolResponseChan chan *IdPoolResponse, wg *sync.WaitGroup) {
defer wg.Done()
idPoolResponseChan <- CreateIdPool(t, max, poolId)
wg.Done()
}

func CreateIdPool(t *testing.T, max int, poolId int) *IdPoolResponse {
set := make(map[string]struct{}, max)
for i := 0; i < max; i++ {
id := CreateId()
Expand Down Expand Up @@ -94,7 +102,7 @@ func createIdPool(t *testing.T, max int, poolId int, idPoolResponseChan chan *Id
bucketCount := 20
histogram := buildHistogram(numbers, bucketCount)
fmt.Printf("Histogram created for pool %d\n", poolId)
idPoolResponseChan <- &IdPoolResponse{
return &IdPoolResponse{
Ids: ids,
Numbers: numbers,
Histogram: histogram,
Expand Down
83 changes: 83 additions & 0 deletions cuid2_histogram_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package cuid2

import (
"fmt"
"math"
"math/rand/v2"
"strings"
"testing"
)

func TestHistogram(t *testing.T) {
n := 100000
fmt.Printf("Testing %d unique ids\n", n)
poolId := rand.IntN(100)
poolResponse := CreateIdPool(t, n, poolId)
ids := poolResponse.Ids
sampleIds := ids[:10]
fmt.Printf("Sample ids %v\n", sampleIds)
t.Run("Test collision", func(t *testing.T) {
CheckCollision(t, ids)
})
t.Run("Test char frequency", func(t *testing.T) {
testCharFrequency(t, n, ids)
})
t.Run("Test histogram", func(t *testing.T) {
testHistogram(t, poolResponse, n)
})

}

func testCharFrequency(t *testing.T, n int, ids []string) {
tolerance := 0.1
idLength := 23
totalLetters := idLength * n
base := 36
expectedBinSize := math.Ceil(float64(totalLetters) / float64(base))
minBinSize := math.Round(expectedBinSize * (1 - tolerance))
maxBinSize := math.Round(expectedBinSize * (1 + tolerance))

// Drop the first character because it will always be a letter, making
// the letter frequency skewed.
testIds := make([]string, len(ids))
for i, id := range ids {
testIds[i] = id[1:]
}
charFrequencies := make(map[string]int)
for _, id := range testIds {
chars := strings.Split(id, "")
for _, char := range chars {
charFrequencies[char] += 1
}
}
fmt.Println("Testing character frequency...")
fmt.Printf("expectedBinSize %v\n", expectedBinSize)
fmt.Printf("minBinSize %v\n", minBinSize)
fmt.Printf("maxBinSize %v\n", maxBinSize)
fmt.Printf("charFrequencies %v\n", charFrequencies)
for k, v := range charFrequencies {
if float64(v) < minBinSize || float64(v) > maxBinSize {
t.Errorf("The char %v is out of the expected bin size with value %v\n", k, v)
}
}
if len(charFrequencies) != base {
t.Errorf("Not all of the chars are presention in ids. Got only %v\n", len(charFrequencies))
}
}

func testHistogram(t *testing.T, poolResponse *IdPoolResponse, n int) {
histogram := poolResponse.Histogram
expectedBinSize := math.Ceil(float64(n) / float64(len(histogram)))
tolerance := 0.1
minBinSize := math.Round(expectedBinSize * (1 - tolerance))
maxBinSize := math.Round(expectedBinSize * (1 + tolerance))
fmt.Printf("Histogram %v\n", histogram)
fmt.Printf("expectedBinSize %v\n", expectedBinSize)
fmt.Printf("minBinSize %v\n", minBinSize)
fmt.Printf("maxBinSize %v\n", maxBinSize)
for _, i := range histogram {
if float64(i) < minBinSize || float64(i) > maxBinSize {
t.Errorf("Histogram is out of distribution tolerance")
}
}
}

0 comments on commit 4ba52bf

Please sign in to comment.