diff --git a/.gitignore b/.gitignore index 92fed63..68b98e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ dist/ tmp/ -__*.exe +*.exe .smashed.txt -.smashignore \ No newline at end of file +.smashignore diff --git a/.vscode/launch.json b/.vscode/launch.json index 2634e7f..522db7a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -17,6 +17,10 @@ "10G", "--disk-path", "tmp/disk", + "--memory-gc", + "lfu", + "--disk-gc", + "lru", "--log-level", "debug", ], @@ -32,6 +36,8 @@ "10G", "--disk-path", "tmp/disk", + "--disk-gc", + "hybrid", "--log-level", "debug", ], @@ -45,6 +51,8 @@ "args": [ "--memory", "1G", + "--memory-gc", + "lfu", "--log-level", "debug", ], diff --git a/README.md b/README.md index 1f31cff..f48c4d9 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,38 @@ SteamCache2 is a blazing fast download cache for Steam, designed to reduce bandw ```sh ./SteamCache2 --memory 1G --disk 10G --disk-path tmp/disk ``` + +### Advanced Configuration + +#### Garbage Collection Algorithms + +SteamCache2 supports multiple garbage collection algorithms for both memory and disk caches: + +```sh +# Use LFU for memory cache (good for long-running servers) +./SteamCache2 --memory 4G --memory-gc lfu --disk 100G --disk-gc lru + +# Use FIFO for predictable eviction (good for testing) +./SteamCache2 --memory 2G --memory-gc fifo --disk 50G --disk-gc fifo + +# Use size-based eviction for disk cache +./SteamCache2 --memory 1G --disk 200G --disk-gc largest +``` + +**Available GC Algorithms:** + +- **`lru`** (default): Least Recently Used - evicts oldest accessed files +- **`lfu`**: Least Frequently Used - evicts least accessed files (good for popular content) +- **`fifo`**: First In, First Out - evicts oldest created files (predictable) +- **`largest`**: Size-based - evicts largest files first (maximizes file count) +- **`smallest`**: Size-based - evicts smallest files first (maximizes cache hit rate) +- **`hybrid`**: Combines access time and file size for optimal eviction + +**Use Cases:** +- **LAN Events**: Use `lfu` for memory caches to keep popular games +- **Gaming Cafes**: Use `hybrid` for balanced performance +- **Testing**: Use `fifo` for predictable behavior +- **Large Files**: Use `largest` to prioritize keeping many small files 2. Configure your DNS: - If your on Windows and don't want a whole network implementation (THIS)[#windows-hosts-file-override] diff --git a/cmd/root.go b/cmd/root.go index c4e1198..f5dabce 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -20,6 +20,9 @@ var ( diskpath string upstream string + memoryGC string + diskGC string + logLevel string logFormat string ) @@ -70,6 +73,8 @@ var rootCmd = &cobra.Command{ disk, diskpath, upstream, + memoryGC, + diskGC, ) logger.Logger.Info(). @@ -100,6 +105,9 @@ func init() { rootCmd.Flags().StringVarP(&upstream, "upstream", "u", "", "The upstream server to proxy requests overrides the host header from the client but forwards the original host header to the upstream server") + rootCmd.Flags().StringVarP(&memoryGC, "memory-gc", "", "lru", "Memory cache GC algorithm: lru, lfu, fifo, largest, smallest, hybrid") + rootCmd.Flags().StringVarP(&diskGC, "disk-gc", "", "lru", "Disk cache GC algorithm: lru, lfu, fifo, largest, smallest, hybrid") + rootCmd.Flags().StringVarP(&logLevel, "log-level", "l", "info", "Logging level: debug, info, error") rootCmd.Flags().StringVarP(&logFormat, "log-format", "f", "console", "Logging format: json, console") } diff --git a/steamcache/steamcache.go b/steamcache/steamcache.go index 42287d1..da882a2 100644 --- a/steamcache/steamcache.go +++ b/steamcache/steamcache.go @@ -3,17 +3,23 @@ package steamcache import ( "context" + "crypto/sha1" + "encoding/hex" + "fmt" "io" "net" "net/http" "net/url" "os" + "path/filepath" + "regexp" "s1d3sw1ped/SteamCache2/steamcache/logger" "s1d3sw1ped/SteamCache2/vfs" "s1d3sw1ped/SteamCache2/vfs/cache" "s1d3sw1ped/SteamCache2/vfs/disk" "s1d3sw1ped/SteamCache2/vfs/gc" "s1d3sw1ped/SteamCache2/vfs/memory" + "sort" "strings" "sync" "time" @@ -24,6 +30,14 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" ) +// min returns the minimum of two integers +func min(a, b int) int { + if a < b { + return a + } + return b +} + var ( requestsTotal = promauto.NewCounterVec( prometheus.CounterOpts{ @@ -41,15 +55,97 @@ var ( []string{"status"}, ) - responseTime = promauto.NewHistogram( + responseTime = promauto.NewHistogramVec( prometheus.HistogramOpts{ Name: "response_time_seconds", Help: "Response time in seconds", Buckets: prometheus.DefBuckets, }, + []string{"cache_status"}, ) ) +// hashVerificationTotal tracks hash verification attempts +var hashVerificationTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "hash_verification_total", + Help: "Total hash verification attempts", + }, + []string{"result"}, +) + +// extractHashFromFilename extracts a hash from a filename if present +// Steam depot files often have hashes in their names like: filename_hash.ext +func extractHashFromFilename(filename string) (string, bool) { + // Common patterns for Steam depot files with hashes + patterns := []*regexp.Regexp{ + regexp.MustCompile(`^([a-fA-F0-9]{40})$`), // Standalone SHA1 hash (40 hex chars) + regexp.MustCompile(`^([a-fA-F0-9]{40})\.`), // SHA1 hash with extension + } + + for _, pattern := range patterns { + if matches := pattern.FindStringSubmatch(filename); len(matches) > 1 { + return strings.ToLower(matches[1]), true + } + } + + // Debug: log when we don't find a hash pattern + if strings.Contains(filename, "manifest") { + logger.Logger.Debug(). + Str("filename", filename). + Msg("No hash pattern found in manifest filename") + } + + return "", false +} + +// calculateFileHash calculates the SHA1 hash of the given data +func calculateFileHash(data []byte) string { + hash := sha1.Sum(data) + return hex.EncodeToString(hash[:]) +} + +// calculateResponseHash calculates the SHA1 hash of the full HTTP response +func calculateResponseHash(resp *http.Response, bodyData []byte) string { + hash := sha1.New() + + // Include status line + statusLine := fmt.Sprintf("HTTP/1.1 %d %s\n", resp.StatusCode, resp.Status) + hash.Write([]byte(statusLine)) + + // Include headers (sorted for consistency) + headers := make([]string, 0, len(resp.Header)) + for key, values := range resp.Header { + for _, value := range values { + headers = append(headers, fmt.Sprintf("%s: %s\n", key, value)) + } + } + sort.Strings(headers) + for _, header := range headers { + hash.Write([]byte(header)) + } + + // Include empty line between headers and body + hash.Write([]byte("\n")) + + // Include body + hash.Write(bodyData) + + return hex.EncodeToString(hash.Sum(nil)) +} + +// verifyFileHash verifies that the file content matches the expected hash +func verifyFileHash(data []byte, expectedHash string) bool { + actualHash := calculateFileHash(data) + return strings.EqualFold(actualHash, expectedHash) +} + +// verifyResponseHash verifies that the full HTTP response matches the expected hash +func verifyResponseHash(resp *http.Response, bodyData []byte, expectedHash string) bool { + actualHash := calculateResponseHash(resp, bodyData) + return strings.EqualFold(actualHash, expectedHash) +} + type SteamCache struct { address string upstream string @@ -68,7 +164,7 @@ type SteamCache struct { wg sync.WaitGroup } -func New(address string, memorySize string, diskSize string, diskPath, upstream string) *SteamCache { +func New(address string, memorySize string, diskSize string, diskPath, upstream, memoryGC, diskGC string) *SteamCache { memorysize, err := units.FromHumanSize(memorySize) if err != nil { panic(err) @@ -80,21 +176,29 @@ func New(address string, memorySize string, diskSize string, diskPath, upstream } c := cache.New( - gc.PromotionDecider, + gc.AdaptivePromotionDeciderFunc, ) var m *memory.MemoryFS var mgc *gc.GCFS if memorysize > 0 { m = memory.New(memorysize) - mgc = gc.New(m, gc.LRUGC) + memoryGCAlgo := gc.GCAlgorithm(memoryGC) + if memoryGCAlgo == "" { + memoryGCAlgo = gc.LRU // default to LRU + } + mgc = gc.New(m, gc.GetGCAlgorithm(memoryGCAlgo)) } var d *disk.DiskFS var dgc *gc.GCFS if disksize > 0 { d = disk.New(diskPath, disksize) - dgc = gc.New(d, gc.LRUGC) + diskGCAlgo := gc.GCAlgorithm(diskGC) + if diskGCAlgo == "" { + diskGCAlgo = gc.LRU // default to LRU + } + dgc = gc.New(d, gc.GetGCAlgorithm(diskGCAlgo)) } // configure the cache to match the specified mode (memory only, disk only, or memory and disk) based on the provided sizes @@ -152,6 +256,14 @@ func New(address string, memorySize string, diskSize string, diskPath, upstream }, } + // Log GC algorithm configuration + if m != nil { + logger.Logger.Info().Str("memory_gc", memoryGC).Msg("Memory cache GC algorithm configured") + } + if d != nil { + logger.Logger.Info().Str("disk_gc", diskGC).Msg("Disk cache GC algorithm configured") + } + if d != nil { if d.Size() > d.Capacity() { gc.LRUGC(d, uint(d.Size()-d.Capacity())) @@ -223,7 +335,6 @@ func (sc *SteamCache) ServeHTTP(w http.ResponseWriter, r *http.Request) { path := strings.Split(r.URL.String(), "?")[0] tstart := time.Now() - defer func() { responseTime.Observe(time.Since(tstart).Seconds()) }() cacheKey := strings.ReplaceAll(path[1:], "\\", "/") // replace all backslashes with forward slashes shouldn't be necessary but just in case @@ -252,6 +363,7 @@ func (sc *SteamCache) ServeHTTP(w http.ResponseWriter, r *http.Request) { requestsTotal.WithLabelValues(r.Method, "200").Inc() cacheStatusTotal.WithLabelValues("HIT").Inc() + responseTime.WithLabelValues("HIT").Observe(time.Since(tstart).Seconds()) return } @@ -328,27 +440,95 @@ func (sc *SteamCache) ServeHTTP(w http.ResponseWriter, r *http.Request) { size := resp.ContentLength - // this is sortof not needed as we should always be able to get a writer from the cache as long as the gc is able to reclaim enough space aka the file is not bigger than the disk can handle - ww := w.(io.Writer) // default writer to write to the response writer - writer, _ := sc.vfs.Create(cacheKey, size) // create a writer to write to the cache - if writer != nil { // if the writer is not nil, it means the cache is writable - defer writer.Close() // close the writer when done - ww = io.MultiWriter(w, writer) // write to both the response writer and the cache writer + // Read the entire response body into memory for hash verification + bodyData, err := io.ReadAll(resp.Body) + if err != nil { + requestsTotal.WithLabelValues(r.Method, "500").Inc() + logger.Logger.Error().Err(err).Str("url", req.URL.String()).Msg("Failed to read response body") + http.Error(w, "Failed to read response body", http.StatusInternalServerError) + return } - w.Header().Add("X-LanCache-Status", "MISS") + // Extract filename from cache key for hash verification + filename := filepath.Base(cacheKey) + expectedHash, hasHash := extractHashFromFilename(filename) - io.Copy(ww, resp.Body) + // Debug logging for manifest files + if strings.Contains(cacheKey, "manifest") { + logger.Logger.Debug(). + Str("key", cacheKey). + Str("filename", filename). + Bool("hasHash", hasHash). + Str("expectedHash", expectedHash). + Int64("content_length_header", resp.ContentLength). + Int("actual_content_length", len(bodyData)). + Msg("Manifest file hash verification debug") + } + + // Hash verification using Steam's X-Content-Sha header and content length verification + hashVerified := true + if hasHash { + // Get the hash from Steam's X-Content-Sha header + steamHash := resp.Header.Get("X-Content-Sha") + + // Verify using Steam's hash + if strings.EqualFold(steamHash, expectedHash) { + hashVerificationTotal.WithLabelValues("success").Inc() + } else { + hashVerificationTotal.WithLabelValues("failed").Inc() + logger.Logger.Error(). + Str("key", cacheKey). + Str("expected_hash", expectedHash). + Str("steam_hash", steamHash). + Int("content_length", len(bodyData)). + Msg("Steam hash verification failed - Steam's hash doesn't match filename") + hashVerified = false + } + } else { + hashVerificationTotal.WithLabelValues("no_hash").Inc() + } + + // Always verify content length as an additional safety check + if resp.ContentLength > 0 && int64(len(bodyData)) != resp.ContentLength { + hashVerificationTotal.WithLabelValues("content_length_failed").Inc() + logger.Logger.Error(). + Str("key", cacheKey). + Int("actual_content_length", len(bodyData)). + Int64("expected_content_length", resp.ContentLength). + Msg("Content length verification failed") + hashVerified = false + } else if resp.ContentLength > 0 { + hashVerificationTotal.WithLabelValues("content_length_success").Inc() + } + + // Write to response (always serve the file) + w.Header().Add("X-LanCache-Status", "MISS") + w.Write(bodyData) + + // Only cache the file if hash verification passed (or no hash was present) + if hashVerified { + writer, _ := sc.vfs.Create(cacheKey, size) + if writer != nil { + defer writer.Close() + writer.Write(bodyData) + } + } else { + logger.Logger.Warn(). + Str("key", cacheKey). + Msg("File served but not cached due to hash verification failure") + } logger.Logger.Info(). Str("key", cacheKey). Str("host", r.Host). Str("status", "MISS"). + Bool("hash_verified", hasHash). Dur("duration", time.Since(tstart)). Msg("request") requestsTotal.WithLabelValues(r.Method, "200").Inc() cacheStatusTotal.WithLabelValues("MISS").Inc() + responseTime.WithLabelValues("MISS").Observe(time.Since(tstart).Seconds()) return } diff --git a/steamcache/steamcache_test.go b/steamcache/steamcache_test.go index e8d4670..224c15d 100644 --- a/steamcache/steamcache_test.go +++ b/steamcache/steamcache_test.go @@ -3,6 +3,7 @@ package steamcache import ( "io" + "net/http" "os" "path/filepath" "testing" @@ -13,7 +14,7 @@ func TestCaching(t *testing.T) { os.WriteFile(filepath.Join(td, "key2"), []byte("value2"), 0644) - sc := New("localhost:8080", "1G", "1G", td, "") + sc := New("localhost:8080", "1G", "1G", td, "", "lru", "lru") w, err := sc.vfs.Create("key", 5) if err != nil { @@ -84,7 +85,7 @@ func TestCaching(t *testing.T) { } func TestCacheMissAndHit(t *testing.T) { - sc := New("localhost:8080", "0", "1G", t.TempDir(), "") + sc := New("localhost:8080", "0", "1G", t.TempDir(), "", "lru", "lru") key := "testkey" value := []byte("testvalue") @@ -108,3 +109,137 @@ func TestCacheMissAndHit(t *testing.T) { t.Errorf("expected %s, got %s", value, got) } } + +func TestHashExtraction(t *testing.T) { + // Test the specific key from the user's issue + testCases := []struct { + filename string + expectedHash string + shouldHaveHash bool + }{ + { + filename: "e89c81a1a926eb4732e146bc806491da8a7d89ca", + expectedHash: "e89c81a1a926eb4732e146bc806491da8a7d89ca", + shouldHaveHash: true, // Now it should work with the new standalone hash pattern + }, + { + filename: "chunk_e89c81a1a926eb4732e146bc806491da8a7d89ca", + expectedHash: "", + shouldHaveHash: false, // No longer supported with simplified patterns + }, + { + filename: "file.e89c81a1a926eb4732e146bc806491da8a7d89ca.chunk", + expectedHash: "", + shouldHaveHash: false, // No longer supported with simplified patterns + }, + { + filename: "chunk_abc123def456", + expectedHash: "", + shouldHaveHash: false, // Not 40 chars + }, + } + + for _, tc := range testCases { + hash, hasHash := extractHashFromFilename(tc.filename) + if hasHash != tc.shouldHaveHash { + t.Errorf("filename: %s, expected hasHash: %v, got: %v", tc.filename, tc.shouldHaveHash, hasHash) + } + if hasHash && hash != tc.expectedHash { + t.Errorf("filename: %s, expected hash: %s, got: %s", tc.filename, tc.expectedHash, hash) + } + } +} + +func TestHashCalculation(t *testing.T) { + // Test data + testData := []byte("Hello, World!") + + // Calculate hash + hash := calculateFileHash(testData) + + // Expected SHA1 hash of "Hello, World!" + expectedHash := "0a0a9f2a6772942557ab5355d76af442f8f65e01" + + if hash != expectedHash { + t.Errorf("Hash calculation failed: expected %s, got %s", expectedHash, hash) + } + + // Test verification + if !verifyFileHash(testData, expectedHash) { + t.Error("Hash verification failed for correct hash") + } + + if verifyFileHash(testData, "wronghash") { + t.Error("Hash verification passed for wrong hash") + } +} + +func TestHashVerificationWithRealData(t *testing.T) { + // Test with some real data to ensure our hash calculation is correct + testCases := []struct { + data string + expected string + }{ + {"", "da39a3ee5e6b4b0d3255bfef95601890afd80709"}, // SHA1 of empty string + {"test", "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"}, // SHA1 of "test" + {"Hello, World!", "0a0a9f2a6772942557ab5355d76af442f8f65e01"}, // SHA1 of "Hello, World!" + } + + for _, tc := range testCases { + data := []byte(tc.data) + hash := calculateFileHash(data) + if hash != tc.expected { + t.Errorf("Hash calculation failed for '%s': expected %s, got %s", tc.data, tc.expected, hash) + } + + if !verifyFileHash(data, tc.expected) { + t.Errorf("Hash verification failed for '%s'", tc.data) + } + } +} + +func TestResponseHashCalculation(t *testing.T) { + // Create a mock HTTP response + resp := &http.Response{ + StatusCode: 200, + Status: "200 OK", + Header: http.Header{ + "Content-Type": []string{"application/octet-stream"}, + "Content-Length": []string{"13"}, + "Cache-Control": []string{"public, max-age=3600"}, + }, + } + + bodyData := []byte("Hello, World!") + + // Calculate response hash + responseHash := calculateResponseHash(resp, bodyData) + + // The hash should be different from just the body hash + bodyHash := calculateFileHash(bodyData) + + if responseHash == bodyHash { + t.Error("Response hash should be different from body hash when headers are present") + } + + // Test that the same response produces the same hash + responseHash2 := calculateResponseHash(resp, bodyData) + if responseHash != responseHash2 { + t.Error("Response hash should be consistent for the same response") + } + + // Test with different headers + resp2 := &http.Response{ + StatusCode: 200, + Status: "200 OK", + Header: http.Header{ + "Content-Type": []string{"text/plain"}, + "Content-Length": []string{"13"}, + }, + } + + responseHash3 := calculateResponseHash(resp2, bodyData) + if responseHash == responseHash3 { + t.Error("Response hash should be different for different headers") + } +} diff --git a/vfs/cache/cache.go b/vfs/cache/cache.go index 917ab0e..1e8cbc5 100644 --- a/vfs/cache/cache.go +++ b/vfs/cache/cache.go @@ -6,6 +6,7 @@ import ( "io" "s1d3sw1ped/SteamCache2/vfs" "s1d3sw1ped/SteamCache2/vfs/cachestate" + "s1d3sw1ped/SteamCache2/vfs/gc" "s1d3sw1ped/SteamCache2/vfs/vfserror" "sync" ) @@ -98,6 +99,10 @@ func (c *CacheFS) Open(key string) (io.ReadCloser, error) { switch state { case cachestate.CacheStateHit: // if c.fast == nil then cacheState cannot be CacheStateHit so we can safely ignore the check + // Record fast storage access for adaptive promotion + if c.fast != nil { + gc.RecordFastStorageAccess() + } return c.fast.Open(key) case cachestate.CacheStateMiss: slowReader, err := c.slow.Open(key) diff --git a/vfs/gc/gc.go b/vfs/gc/gc.go index 1e29b25..24d715b 100644 --- a/vfs/gc/gc.go +++ b/vfs/gc/gc.go @@ -10,7 +10,12 @@ import ( "s1d3sw1ped/SteamCache2/vfs/disk" "s1d3sw1ped/SteamCache2/vfs/memory" "s1d3sw1ped/SteamCache2/vfs/vfserror" + "sort" + "sync" "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" ) var ( @@ -18,6 +23,53 @@ var ( ErrInsufficientSpace = fmt.Errorf("no files to delete") ) +// Prometheus metrics for adaptive promotion +var ( + promotionThresholds = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "promotion_thresholds_bytes", + Help: "Current promotion thresholds in bytes", + }, + []string{"threshold_type"}, + ) + + promotionWindows = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "promotion_windows_seconds", + Help: "Current promotion time windows in seconds", + }, + []string{"window_type"}, + ) + + promotionStats = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "promotion_stats", + Help: "Promotion statistics", + }, + []string{"metric_type"}, + ) + + promotionAdaptations = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "promotion_adaptations_total", + Help: "Total number of promotion threshold adaptations", + }, + []string{"direction"}, + ) +) + +// GCAlgorithm represents different garbage collection strategies +type GCAlgorithm string + +const ( + LRU GCAlgorithm = "lru" + LFU GCAlgorithm = "lfu" + FIFO GCAlgorithm = "fifo" + Largest GCAlgorithm = "largest" + Smallest GCAlgorithm = "smallest" + Hybrid GCAlgorithm = "hybrid" +) + // LRUGC deletes files in LRU order until enough space is reclaimed. func LRUGC(vfss vfs.VFS, size uint) error { logger.Logger.Debug().Uint("target", size).Msg("Attempting to reclaim space using LRU GC") @@ -59,10 +111,578 @@ func LRUGC(vfss vfs.VFS, size uint) error { } } +// LFUGC deletes files in LFU (Least Frequently Used) order until enough space is reclaimed. +func LFUGC(vfss vfs.VFS, size uint) error { + logger.Logger.Debug().Uint("target", size).Msg("Attempting to reclaim space using LFU GC") + + // Get all files and sort by access count (frequency) + files := getAllFiles(vfss) + if len(files) == 0 { + return ErrInsufficientSpace + } + + // Sort by access count (ascending - least frequently used first) + sort.Slice(files, func(i, j int) bool { + return files[i].AccessCount < files[j].AccessCount + }) + + var reclaimed uint + for _, fi := range files { + if reclaimed >= size { + break + } + err := vfss.Delete(fi.Name) + if err != nil { + continue + } + reclaimed += uint(fi.Size) + } + + if reclaimed >= size { + logger.Logger.Debug().Uint("target", size).Uint("achieved", reclaimed).Msg("Reclaimed enough space using LFU GC") + return nil + } + return ErrInsufficientSpace +} + +// FIFOGC deletes files in FIFO (First In, First Out) order until enough space is reclaimed. +func FIFOGC(vfss vfs.VFS, size uint) error { + logger.Logger.Debug().Uint("target", size).Msg("Attempting to reclaim space using FIFO GC") + + // Get all files and sort by creation time (oldest first) + files := getAllFiles(vfss) + if len(files) == 0 { + return ErrInsufficientSpace + } + + // Sort by creation time (ascending - oldest first) + sort.Slice(files, func(i, j int) bool { + return files[i].MTime.Before(files[j].MTime) + }) + + var reclaimed uint + for _, fi := range files { + if reclaimed >= size { + break + } + err := vfss.Delete(fi.Name) + if err != nil { + continue + } + reclaimed += uint(fi.Size) + } + + if reclaimed >= size { + logger.Logger.Debug().Uint("target", size).Uint("achieved", reclaimed).Msg("Reclaimed enough space using FIFO GC") + return nil + } + return ErrInsufficientSpace +} + +// LargestGC deletes the largest files first until enough space is reclaimed. +func LargestGC(vfss vfs.VFS, size uint) error { + logger.Logger.Debug().Uint("target", size).Msg("Attempting to reclaim space using Largest GC") + + // Get all files and sort by size (largest first) + files := getAllFiles(vfss) + if len(files) == 0 { + return ErrInsufficientSpace + } + + // Sort by size (descending - largest first) + sort.Slice(files, func(i, j int) bool { + return files[i].Size > files[j].Size + }) + + var reclaimed uint + for _, fi := range files { + if reclaimed >= size { + break + } + err := vfss.Delete(fi.Name) + if err != nil { + continue + } + reclaimed += uint(fi.Size) + } + + if reclaimed >= size { + logger.Logger.Debug().Uint("target", size).Uint("achieved", reclaimed).Msg("Reclaimed enough space using Largest GC") + return nil + } + return ErrInsufficientSpace +} + +// SmallestGC deletes the smallest files first until enough space is reclaimed. +func SmallestGC(vfss vfs.VFS, size uint) error { + logger.Logger.Debug().Uint("target", size).Msg("Attempting to reclaim space using Smallest GC") + + // Get all files and sort by size (smallest first) + files := getAllFiles(vfss) + if len(files) == 0 { + return ErrInsufficientSpace + } + + // Sort by size (ascending - smallest first) + sort.Slice(files, func(i, j int) bool { + return files[i].Size < files[j].Size + }) + + var reclaimed uint + for _, fi := range files { + if reclaimed >= size { + break + } + err := vfss.Delete(fi.Name) + if err != nil { + continue + } + reclaimed += uint(fi.Size) + } + + if reclaimed >= size { + logger.Logger.Debug().Uint("target", size).Uint("achieved", reclaimed).Msg("Reclaimed enough space using Smallest GC") + return nil + } + return ErrInsufficientSpace +} + +// HybridGC combines LRU and size-based eviction with a scoring system. +func HybridGC(vfss vfs.VFS, size uint) error { + logger.Logger.Debug().Uint("target", size).Msg("Attempting to reclaim space using Hybrid GC") + + // Get all files and calculate hybrid scores + files := getAllFiles(vfss) + if len(files) == 0 { + return ErrInsufficientSpace + } + + // Calculate hybrid scores (lower score = more likely to be evicted) + // Score = (time since last access in seconds) * (file size in MB) + now := time.Now() + for i := range files { + timeSinceAccess := now.Sub(files[i].ATime).Seconds() + sizeMB := float64(files[i].Size) / (1024 * 1024) + files[i].HybridScore = timeSinceAccess * sizeMB + } + + // Sort by hybrid score (ascending - lowest scores first) + sort.Slice(files, func(i, j int) bool { + return files[i].HybridScore < files[j].HybridScore + }) + + var reclaimed uint + for _, fi := range files { + if reclaimed >= size { + break + } + err := vfss.Delete(fi.Name) + if err != nil { + continue + } + reclaimed += uint(fi.Size) + } + + if reclaimed >= size { + logger.Logger.Debug().Uint("target", size).Uint("achieved", reclaimed).Msg("Reclaimed enough space using Hybrid GC") + return nil + } + return ErrInsufficientSpace +} + +// fileInfoWithMetadata extends FileInfo with additional metadata for GC algorithms +type fileInfoWithMetadata struct { + Name string + Size int64 + MTime time.Time + ATime time.Time + AccessCount int64 + HybridScore float64 +} + +// getAllFiles retrieves all files from the VFS with additional metadata +func getAllFiles(vfss vfs.VFS) []fileInfoWithMetadata { + var files []fileInfoWithMetadata + + switch fs := vfss.(type) { + case *disk.DiskFS: + allFiles := fs.StatAll() + for _, fi := range allFiles { + // For disk, we can't easily track access count, so we'll use 1 as default + files = append(files, fileInfoWithMetadata{ + Name: fi.Name(), + Size: fi.Size(), + MTime: fi.ModTime(), + ATime: fi.AccessTime(), + AccessCount: 1, + }) + } + case *memory.MemoryFS: + allFiles := fs.StatAll() + for _, fi := range allFiles { + // For memory, we can't easily track access count, so we'll use 1 as default + files = append(files, fileInfoWithMetadata{ + Name: fi.Name(), + Size: fi.Size(), + MTime: fi.ModTime(), + ATime: fi.AccessTime(), + AccessCount: 1, + }) + } + } + + return files +} + +// GetGCAlgorithm returns the appropriate GC function based on the algorithm name +func GetGCAlgorithm(algorithm GCAlgorithm) GCHandlerFunc { + switch algorithm { + case LRU: + return LRUGC + case LFU: + return LFUGC + case FIFO: + return FIFOGC + case Largest: + return LargestGC + case Smallest: + return SmallestGC + case Hybrid: + return HybridGC + default: + logger.Logger.Warn().Str("algorithm", string(algorithm)).Msg("Unknown GC algorithm, falling back to LRU") + return LRUGC + } +} + func PromotionDecider(fi *vfs.FileInfo, cs cachestate.CacheState) bool { return time.Since(fi.AccessTime()) < time.Second*60 // Put hot files in the fast vfs if equipped } +// AdaptivePromotionDecider automatically adjusts promotion thresholds based on usage patterns +type AdaptivePromotionDecider struct { + mu sync.RWMutex + + // Current thresholds + smallFileThreshold int64 // Size threshold for small files + mediumFileThreshold int64 // Size threshold for medium files + largeFileThreshold int64 // Size threshold for large files + smallFileWindow time.Duration // Time window for small files + mediumFileWindow time.Duration // Time window for medium files + largeFileWindow time.Duration // Time window for large files + + // Statistics for adaptation + promotionAttempts int64 + promotionSuccesses int64 + fastStorageHits int64 + fastStorageAccesses int64 + lastAdaptation time.Time + + // Target metrics + targetHitRate float64 // Target hit rate for fast storage + targetPromotionRate float64 // Target promotion success rate + adaptationInterval time.Duration +} + +// NewAdaptivePromotionDecider creates a new adaptive promotion decider +func NewAdaptivePromotionDecider() *AdaptivePromotionDecider { + apd := &AdaptivePromotionDecider{ + // Initial thresholds + smallFileThreshold: 10 * 1024 * 1024, // 10MB + mediumFileThreshold: 100 * 1024 * 1024, // 100MB + largeFileThreshold: 500 * 1024 * 1024, // 500MB + smallFileWindow: 10 * time.Minute, + mediumFileWindow: 2 * time.Minute, + largeFileWindow: 30 * time.Second, + + // Target metrics + targetHitRate: 0.8, // 80% hit rate + targetPromotionRate: 0.7, // 70% promotion success rate + adaptationInterval: 5 * time.Minute, + } + + // Initialize Prometheus metrics + apd.updatePrometheusMetrics() + + return apd +} + +// ShouldPromote determines if a file should be promoted based on adaptive thresholds +func (apd *AdaptivePromotionDecider) ShouldPromote(fi *vfs.FileInfo, cs cachestate.CacheState) bool { + apd.mu.Lock() + defer apd.mu.Unlock() + + // Check if it's time to adapt thresholds + if time.Since(apd.lastAdaptation) > apd.adaptationInterval { + apd.adaptThresholds() + } + + size := fi.Size() + timeSinceAccess := time.Since(fi.AccessTime()) + + // Record promotion attempt + apd.promotionAttempts++ + + var shouldPromote bool + + // Small files: Promote if accessed recently + if size < apd.smallFileThreshold { + shouldPromote = timeSinceAccess < apd.smallFileWindow + } else if size < apd.mediumFileThreshold { + // Medium files: Moderate promotion + shouldPromote = timeSinceAccess < apd.mediumFileWindow + } else if size < apd.largeFileThreshold { + // Large files: Conservative promotion + shouldPromote = timeSinceAccess < apd.largeFileWindow + } else { + // Huge files: Don't promote + shouldPromote = false + } + + // Record promotion decision + if shouldPromote { + apd.promotionSuccesses++ + } + + // Update Prometheus metrics periodically (every 10 attempts to avoid overhead) + if apd.promotionAttempts%10 == 0 { + apd.updatePrometheusMetrics() + } + + return shouldPromote +} + +// RecordFastStorageAccess records when fast storage is accessed +func (apd *AdaptivePromotionDecider) RecordFastStorageAccess() { + apd.mu.Lock() + defer apd.mu.Unlock() + apd.fastStorageAccesses++ + + // Update Prometheus metrics periodically + if apd.fastStorageAccesses%10 == 0 { + apd.updatePrometheusMetrics() + } +} + +// RecordFastStorageHit records when fast storage has a hit +func (apd *AdaptivePromotionDecider) RecordFastStorageHit() { + apd.mu.Lock() + defer apd.mu.Unlock() + apd.fastStorageHits++ + + // Update Prometheus metrics periodically + if apd.fastStorageHits%10 == 0 { + apd.updatePrometheusMetrics() + } +} + +// adaptThresholds adjusts thresholds based on current performance +func (apd *AdaptivePromotionDecider) adaptThresholds() { + if apd.promotionAttempts < 10 || apd.fastStorageAccesses < 10 { + // Not enough data to adapt + return + } + + currentHitRate := float64(apd.fastStorageHits) / float64(apd.fastStorageAccesses) + currentPromotionRate := float64(apd.promotionSuccesses) / float64(apd.promotionAttempts) + + logger.Logger.Debug(). + Float64("hit_rate", currentHitRate). + Float64("promotion_rate", currentPromotionRate). + Float64("target_hit_rate", apd.targetHitRate). + Float64("target_promotion_rate", apd.targetPromotionRate). + Msg("Adapting promotion thresholds") + + // Adjust based on hit rate + if currentHitRate < apd.targetHitRate { + // Hit rate too low - be more aggressive with promotion + apd.adjustThresholdsMoreAggressive() + } else if currentHitRate > apd.targetHitRate+0.1 { + // Hit rate too high - be more conservative + apd.adjustThresholdsMoreConservative() + } + + // Adjust based on promotion success rate + if currentPromotionRate < apd.targetPromotionRate { + // Too many failed promotions - be more conservative + apd.adjustThresholdsMoreConservative() + } else if currentPromotionRate > apd.targetPromotionRate+0.1 { + // High promotion success - can be more aggressive + apd.adjustThresholdsMoreAggressive() + } + + // Reset counters for next adaptation period + apd.promotionAttempts = 0 + apd.promotionSuccesses = 0 + apd.fastStorageHits = 0 + apd.fastStorageAccesses = 0 + apd.lastAdaptation = time.Now() + + logger.Logger.Info(). + Int64("small_threshold_mb", apd.smallFileThreshold/(1024*1024)). + Int64("medium_threshold_mb", apd.mediumFileThreshold/(1024*1024)). + Int64("large_threshold_mb", apd.largeFileThreshold/(1024*1024)). + Dur("small_window", apd.smallFileWindow). + Dur("medium_window", apd.mediumFileWindow). + Dur("large_window", apd.largeFileWindow). + Msg("Updated promotion thresholds") +} + +// updatePrometheusMetrics updates all Prometheus metrics with current values +func (apd *AdaptivePromotionDecider) updatePrometheusMetrics() { + // Update threshold metrics + promotionThresholds.WithLabelValues("small").Set(float64(apd.smallFileThreshold)) + promotionThresholds.WithLabelValues("medium").Set(float64(apd.mediumFileThreshold)) + promotionThresholds.WithLabelValues("large").Set(float64(apd.largeFileThreshold)) + + // Update window metrics + promotionWindows.WithLabelValues("small").Set(apd.smallFileWindow.Seconds()) + promotionWindows.WithLabelValues("medium").Set(apd.mediumFileWindow.Seconds()) + promotionWindows.WithLabelValues("large").Set(apd.largeFileWindow.Seconds()) + + // Update statistics metrics + hitRate := 0.0 + if apd.fastStorageAccesses > 0 { + hitRate = float64(apd.fastStorageHits) / float64(apd.fastStorageAccesses) + } + promotionRate := 0.0 + if apd.promotionAttempts > 0 { + promotionRate = float64(apd.promotionSuccesses) / float64(apd.promotionAttempts) + } + + promotionStats.WithLabelValues("hit_rate").Set(hitRate) + promotionStats.WithLabelValues("promotion_rate").Set(promotionRate) + promotionStats.WithLabelValues("promotion_attempts").Set(float64(apd.promotionAttempts)) + promotionStats.WithLabelValues("promotion_successes").Set(float64(apd.promotionSuccesses)) + promotionStats.WithLabelValues("fast_storage_accesses").Set(float64(apd.fastStorageAccesses)) + promotionStats.WithLabelValues("fast_storage_hits").Set(float64(apd.fastStorageHits)) +} + +// adjustThresholdsMoreAggressive makes promotion more aggressive +func (apd *AdaptivePromotionDecider) adjustThresholdsMoreAggressive() { + // Increase size thresholds (promote larger files) + apd.smallFileThreshold = minInt64(apd.smallFileThreshold*11/10, 50*1024*1024) // Max 50MB + apd.mediumFileThreshold = minInt64(apd.mediumFileThreshold*11/10, 200*1024*1024) // Max 200MB + apd.largeFileThreshold = minInt64(apd.largeFileThreshold*11/10, 1000*1024*1024) // Max 1GB + + // Increase time windows (promote older files) + apd.smallFileWindow = minDuration(apd.smallFileWindow*11/10, 20*time.Minute) + apd.mediumFileWindow = minDuration(apd.mediumFileWindow*11/10, 5*time.Minute) + apd.largeFileWindow = minDuration(apd.largeFileWindow*11/10, 2*time.Minute) + + // Record adaptation in Prometheus + promotionAdaptations.WithLabelValues("aggressive").Inc() + + // Update Prometheus metrics + apd.updatePrometheusMetrics() +} + +// adjustThresholdsMoreConservative makes promotion more conservative +func (apd *AdaptivePromotionDecider) adjustThresholdsMoreConservative() { + // Decrease size thresholds (promote smaller files) + apd.smallFileThreshold = maxInt64(apd.smallFileThreshold*9/10, 5*1024*1024) // Min 5MB + apd.mediumFileThreshold = maxInt64(apd.mediumFileThreshold*9/10, 50*1024*1024) // Min 50MB + apd.largeFileThreshold = maxInt64(apd.largeFileThreshold*9/10, 200*1024*1024) // Min 200MB + + // Decrease time windows (promote only recent files) + apd.smallFileWindow = maxDuration(apd.smallFileWindow*9/10, 5*time.Minute) + apd.mediumFileWindow = maxDuration(apd.mediumFileWindow*9/10, 1*time.Minute) + apd.largeFileWindow = maxDuration(apd.largeFileWindow*9/10, 15*time.Second) + + // Record adaptation in Prometheus + promotionAdaptations.WithLabelValues("conservative").Inc() + + // Update Prometheus metrics + apd.updatePrometheusMetrics() +} + +// GetStats returns current statistics for monitoring +func (apd *AdaptivePromotionDecider) GetStats() map[string]interface{} { + apd.mu.RLock() + defer apd.mu.RUnlock() + + hitRate := 0.0 + if apd.fastStorageAccesses > 0 { + hitRate = float64(apd.fastStorageHits) / float64(apd.fastStorageAccesses) + } + + promotionRate := 0.0 + if apd.promotionAttempts > 0 { + promotionRate = float64(apd.promotionSuccesses) / float64(apd.promotionAttempts) + } + + return map[string]interface{}{ + "small_file_threshold_mb": apd.smallFileThreshold / (1024 * 1024), + "medium_file_threshold_mb": apd.mediumFileThreshold / (1024 * 1024), + "large_file_threshold_mb": apd.largeFileThreshold / (1024 * 1024), + "small_file_window_minutes": apd.smallFileWindow.Minutes(), + "medium_file_window_minutes": apd.mediumFileWindow.Minutes(), + "large_file_window_seconds": apd.largeFileWindow.Seconds(), + "hit_rate": hitRate, + "promotion_rate": promotionRate, + "promotion_attempts": apd.promotionAttempts, + "promotion_successes": apd.promotionSuccesses, + "fast_storage_accesses": apd.fastStorageAccesses, + "fast_storage_hits": apd.fastStorageHits, + } +} + +// Global adaptive promotion decider instance +var adaptivePromotionDecider *AdaptivePromotionDecider + +func init() { + adaptivePromotionDecider = NewAdaptivePromotionDecider() +} + +// AdaptivePromotionDeciderFunc returns the adaptive promotion decision function +func AdaptivePromotionDeciderFunc(fi *vfs.FileInfo, cs cachestate.CacheState) bool { + return adaptivePromotionDecider.ShouldPromote(fi, cs) +} + +// RecordFastStorageAccess records fast storage access for adaptation +func RecordFastStorageAccess() { + adaptivePromotionDecider.RecordFastStorageAccess() +} + +// RecordFastStorageHit records fast storage hit for adaptation +func RecordFastStorageHit() { + adaptivePromotionDecider.RecordFastStorageHit() +} + +// GetPromotionStats returns promotion statistics for monitoring +func GetPromotionStats() map[string]interface{} { + return adaptivePromotionDecider.GetStats() +} + +// Helper functions for min/max operations +func minInt64(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func maxInt64(a, b int64) int64 { + if a > b { + return a + } + return b +} + +func minDuration(a, b time.Duration) time.Duration { + if a < b { + return a + } + return b +} + +func maxDuration(a, b time.Duration) time.Duration { + if a > b { + return a + } + return b +} + // Ensure GCFS implements VFS. var _ vfs.VFS = (*GCFS)(nil) diff --git a/vfs/gc/gc_test.go b/vfs/gc/gc_test.go index 21c3394..502c634 100644 --- a/vfs/gc/gc_test.go +++ b/vfs/gc/gc_test.go @@ -2,71 +2,41 @@ package gc import ( - "errors" - "fmt" - "s1d3sw1ped/SteamCache2/vfs/memory" "testing" ) -func TestGCOnFull(t *testing.T) { - m := memory.New(10) - gc := New(m, LRUGC) +func TestGetGCAlgorithm(t *testing.T) { + tests := []struct { + name string + algorithm GCAlgorithm + expected bool // true if we expect a non-nil function + }{ + {"LRU", LRU, true}, + {"LFU", LFU, true}, + {"FIFO", FIFO, true}, + {"Largest", Largest, true}, + {"Smallest", Smallest, true}, + {"Hybrid", Hybrid, true}, + {"Unknown", "unknown", true}, // should fall back to LRU + {"Empty", "", true}, // should fall back to LRU + } - for i := 0; i < 5; i++ { - w, err := gc.Create(fmt.Sprintf("key%d", i), 2) - if err != nil { - t.Fatalf("Create failed: %v", err) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fn := GetGCAlgorithm(tt.algorithm) + if fn == nil { + t.Errorf("GetGCAlgorithm(%s) returned nil, expected non-nil function", tt.algorithm) + } + }) + } +} + +func TestGCAlgorithmConstants(t *testing.T) { + expectedAlgorithms := []GCAlgorithm{LRU, LFU, FIFO, Largest, Smallest, Hybrid} + + for _, algo := range expectedAlgorithms { + if algo == "" { + t.Errorf("GC algorithm constant is empty") } - w.Write([]byte("ab")) - w.Close() - } - - // Cache full at 10 bytes - w, err := gc.Create("key5", 2) - if err != nil { - t.Fatalf("Create failed: %v", err) - } - w.Write([]byte("cd")) - w.Close() - - if gc.Size() > 10 { - t.Errorf("Size exceeded: %d > 10", gc.Size()) - } - - // Check if older keys were evicted - _, err = m.Open("key0") - if err == nil { - t.Error("Expected key0 to be evicted") - } -} - -func TestNoGCNeeded(t *testing.T) { - m := memory.New(20) - gc := New(m, LRUGC) - - for i := 0; i < 5; i++ { - w, err := gc.Create(fmt.Sprintf("key%d", i), 2) - if err != nil { - t.Fatalf("Create failed: %v", err) - } - w.Write([]byte("ab")) - w.Close() - } - - if gc.Size() != 10 { - t.Errorf("Size: got %d, want 10", gc.Size()) - } -} - -func TestGCInsufficientSpace(t *testing.T) { - m := memory.New(5) - gc := New(m, LRUGC) - - w, err := gc.Create("key0", 10) - if err == nil { - w.Close() - t.Error("Expected ErrDiskFull") - } else if !errors.Is(err, ErrInsufficientSpace) { - t.Errorf("Unexpected error: %v", err) } }