Refactor caching and memory management components

- Updated the caching logic to utilize a predictive cache warmer, enhancing content prefetching based on access patterns. - Replaced the legacy warming system with a more efficient predictive approach, allowing for better performance and resource management. - Refactored memory management to integrate dynamic cache size adjustments based on system memory usage, improving overall efficiency. - Simplified the VFS interface and improved concurrency handling with sharded locks for better performance in multi-threaded environments. - Enhanced tests to validate the new caching and memory management behaviors, ensuring reliability and performance improvements.
2025-09-22 01:59:15 -05:00
parent 9b2affe95a
commit bfe29dea75
13 changed files with 612 additions and 1215 deletions
--- a/vfs/disk/disk.go
+++ b/vfs/disk/disk.go
@@ -2,13 +2,14 @@
 package disk

 import (
-	"container/list"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"s1d3sw1ped/steamcache2/steamcache/logger"
 	"s1d3sw1ped/steamcache2/vfs"
+	"s1d3sw1ped/steamcache2/vfs/locks"
+	"s1d3sw1ped/steamcache2/vfs/lru"
 	"s1d3sw1ped/steamcache2/vfs/vfserror"
 	"sort"
 	"strings"
@@ -32,55 +33,10 @@ type DiskFS struct {
 	size        int64
 	mu          sync.RWMutex
 	keyLocks    []sync.Map // Sharded lock pools for better concurrency
-	LRU         *lruList
+	LRU         *lru.LRUList[*vfs.FileInfo]
 	timeUpdater *vfs.BatchedTimeUpdate // Batched time updates for better performance
 }

-// Number of lock shards for reducing contention
-const numLockShards = 32
-
-// lruList for time-decayed LRU eviction
-type lruList struct {
-	list *list.List
-	elem map[string]*list.Element
-}
-
-func newLruList() *lruList {
-	return &lruList{
-		list: list.New(),
-		elem: make(map[string]*list.Element),
-	}
-}
-
-func (l *lruList) Add(key string, fi *vfs.FileInfo) {
-	elem := l.list.PushFront(fi)
-	l.elem[key] = elem
-}
-
-func (l *lruList) MoveToFront(key string, timeUpdater *vfs.BatchedTimeUpdate) {
-	if elem, exists := l.elem[key]; exists {
-		l.list.MoveToFront(elem)
-		// Update the FileInfo in the element with new access time
-		if fi := elem.Value.(*vfs.FileInfo); fi != nil {
-			fi.UpdateAccessBatched(timeUpdater)
-		}
-	}
-}
-
-func (l *lruList) Remove(key string) *vfs.FileInfo {
-	if elem, exists := l.elem[key]; exists {
-		delete(l.elem, key)
-		if fi := l.list.Remove(elem).(*vfs.FileInfo); fi != nil {
-			return fi
-		}
-	}
-	return nil
-}
-
-func (l *lruList) Len() int {
-	return l.list.Len()
-}
-
 // shardPath converts a Steam cache key to a sharded directory path to reduce inode pressure
 func (d *DiskFS) shardPath(key string) string {
 	if !strings.HasPrefix(key, "steam/") {
@@ -105,43 +61,6 @@ func (d *DiskFS) shardPath(key string) string {
 	return filepath.Join("steam", shard1, shard2, hashPart)
 }

-// extractKeyFromPath reverses the sharding logic to get the original key from a sharded path
-func (d *DiskFS) extractKeyFromPath(path string) string {
-	// Fast path: if no slashes, it's not a sharded path
-	if !strings.Contains(path, "/") {
-		return path
-	}
-
-	parts := strings.SplitN(path, "/", 5)
-	numParts := len(parts)
-
-	if numParts >= 4 && parts[0] == "steam" {
-		lastThree := parts[numParts-3:]
-		shard1 := lastThree[0]
-		shard2 := lastThree[1]
-		filename := lastThree[2]
-
-		// Verify sharding is correct
-		if len(filename) >= 4 && filename[:2] == shard1 && filename[2:4] == shard2 {
-			return "steam/" + filename
-		}
-	}
-
-	// Handle single-level sharding for short hashes: steam/shard1/filename
-	if numParts >= 3 && parts[0] == "steam" {
-		lastTwo := parts[numParts-2:]
-		shard1 := lastTwo[0]
-		filename := lastTwo[1]
-
-		if len(filename) >= 2 && filename[:2] == shard1 {
-			return "steam/" + filename
-		}
-	}
-
-	// Fallback: return as-is for any unrecognized format
-	return path
-}
-
 // New creates a new DiskFS.
 func New(root string, capacity int64) *DiskFS {
 	if capacity <= 0 {
@@ -152,7 +71,7 @@ func New(root string, capacity int64) *DiskFS {
 	os.MkdirAll(root, 0755)

 	// Initialize sharded locks
-	keyLocks := make([]sync.Map, numLockShards)
+	keyLocks := make([]sync.Map, locks.NumLockShards)

 	d := &DiskFS{
 		root:        root,
@@ -160,7 +79,7 @@ func New(root string, capacity int64) *DiskFS {
 		capacity:    capacity,
 		size:        0,
 		keyLocks:    keyLocks,
-		LRU:         newLruList(),
+		LRU:         lru.NewLRUList[*vfs.FileInfo](),
 		timeUpdater: vfs.NewBatchedTimeUpdate(100 * time.Millisecond), // Update time every 100ms
 	}

@@ -168,15 +87,15 @@ func New(root string, capacity int64) *DiskFS {
 	return d
 }

-// init loads existing files from disk
+// init loads existing files from disk with ultra-fast lazy initialization
 func (d *DiskFS) init() {
 	tstart := time.Now()

-	// Use concurrent directory scanning for blazing fast initialization
-	fileInfos := d.scanDirectoryConcurrently()
+	// Ultra-fast initialization: only scan directory structure, defer file stats
+	d.scanDirectoriesOnly()

-	// Batch process all files to minimize lock contention
-	d.batchProcessFiles(fileInfos)
+	// Start background size calculation in a separate goroutine
+	go d.calculateSizeInBackground()

 	logger.Logger.Info().
 		Str("name", d.Name()).
@@ -188,25 +107,26 @@ func (d *DiskFS) init() {
 		Msg("init")
 }

-// fileInfo represents a file found during directory scanning
-type fileInfo struct {
-	path    string
-	relPath string
-	key     string
-	size    int64
-	modTime time.Time
-	isDepot bool
+// scanDirectoriesOnly performs ultra-fast directory structure scanning without file stats
+func (d *DiskFS) scanDirectoriesOnly() {
+	// Just ensure the root directory exists and is accessible
+	// No file scanning during init - files will be discovered on-demand
+	logger.Logger.Debug().
+		Str("root", d.root).
+		Msg("Directory structure scan completed (lazy file discovery enabled)")
 }

-// scanDirectoryConcurrently performs fast concurrent directory scanning
-func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
+// calculateSizeInBackground calculates the total size of all files in the background
+func (d *DiskFS) calculateSizeInBackground() {
+	tstart := time.Now()
+
 	// Channel for collecting file information
-	fileChan := make(chan fileInfo, 1000)
+	fileChan := make(chan fileSizeInfo, 1000)

 	// Progress tracking
 	var totalFiles int64
 	var processedFiles int64
-	progressTicker := time.NewTicker(500 * time.Millisecond)
+	progressTicker := time.NewTicker(2 * time.Second)
 	defer progressTicker.Stop()

 	// Wait group for workers
@@ -217,11 +137,11 @@ func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
 	go func() {
 		defer wg.Done()
 		defer close(fileChan)
-		d.scanDirectoryRecursive(d.root, fileChan, &totalFiles)
+		d.scanFilesForSize(d.root, fileChan, &totalFiles)
 	}()

 	// Collect results with progress reporting
-	var fileInfos []fileInfo
+	var totalSize int64

 	// Use a separate goroutine to collect results
 	done := make(chan struct{})
@@ -233,15 +153,16 @@ func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
 				if !ok {
 					return
 				}
-				fileInfos = append(fileInfos, fi)
+				totalSize += fi.size
 				processedFiles++
 			case <-progressTicker.C:
 				if totalFiles > 0 {
 					logger.Logger.Debug().
 						Int64("processed", processedFiles).
 						Int64("total", totalFiles).
+						Int64("size", totalSize).
 						Float64("progress", float64(processedFiles)/float64(totalFiles)*100).
-						Msg("Directory scan progress")
+						Msg("Background size calculation progress")
 				}
 			}
 		}
@@ -251,16 +172,26 @@ func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
 	wg.Wait()
 	<-done

+	// Update the total size
+	d.mu.Lock()
+	d.size = totalSize
+	d.mu.Unlock()
+
 	logger.Logger.Info().
 		Int64("files_scanned", processedFiles).
-		Msg("Directory scan completed")
-
-	return fileInfos
+		Int64("total_size", totalSize).
+		Str("duration", time.Since(tstart).String()).
+		Msg("Background size calculation completed")
 }

-// scanDirectoryRecursive performs recursive directory scanning with early termination
-func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo, totalFiles *int64) {
-	// Use ReadDir for faster directory listing (no stat calls)
+// fileSizeInfo represents a file found during size calculation
+type fileSizeInfo struct {
+	size int64
+}
+
+// scanFilesForSize performs recursive file scanning for size calculation only
+func (d *DiskFS) scanFilesForSize(dirPath string, fileChan chan<- fileSizeInfo, totalFiles *int64) {
+	// Use ReadDir for faster directory listing
 	entries, err := os.ReadDir(dirPath)
 	if err != nil {
 		return
@@ -276,7 +207,7 @@ func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo
 	atomic.AddInt64(totalFiles, int64(fileCount))

 	// Process entries concurrently with limited workers
-	semaphore := make(chan struct{}, 8) // Limit concurrent processing
+	semaphore := make(chan struct{}, 16) // More workers for size calculation
 	var wg sync.WaitGroup

 	for _, entry := range entries {
@@ -289,103 +220,33 @@ func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo
 				defer wg.Done()
 				semaphore <- struct{}{}        // Acquire semaphore
 				defer func() { <-semaphore }() // Release semaphore
-				d.scanDirectoryRecursive(path, fileChan, totalFiles)
+				d.scanFilesForSize(path, fileChan, totalFiles)
 			}(entryPath)
 		} else {
-			// Process file with lazy loading
+			// Process file for size only
 			wg.Add(1)
-			go func(path string, name string, entry os.DirEntry) {
+			go func(entry os.DirEntry) {
 				defer wg.Done()
 				semaphore <- struct{}{}        // Acquire semaphore
 				defer func() { <-semaphore }() // Release semaphore

-				// Extract relative path and key first (no stat call)
-				rootPath := d.root
-				rootPath = strings.TrimPrefix(rootPath, "./")
-				relPath := strings.ReplaceAll(path[len(rootPath)+1:], "\\", "/")
-				key := d.extractKeyFromPath(relPath)
-
-				// Get file info only when needed (lazy loading)
+				// Get file info for size calculation
 				info, err := entry.Info()
 				if err != nil {
 					return
 				}

-				// Send file info
-				fileChan <- fileInfo{
-					path:    path,
-					relPath: relPath,
-					key:     key,
-					size:    info.Size(),
-					modTime: info.ModTime(),
-					isDepot: false, // No longer tracking depot files
+				// Send file size info
+				fileChan <- fileSizeInfo{
+					size: info.Size(),
 				}
-			}(entryPath, entry.Name(), entry)
+			}(entry)
 		}
 	}

 	wg.Wait()
 }

-// batchProcessFiles processes all files in batches to minimize lock contention
-func (d *DiskFS) batchProcessFiles(fileInfos []fileInfo) {
-	const batchSize = 1000 // Process files in batches
-
-	// Sort files by key for consistent ordering
-	sort.Slice(fileInfos, func(i, j int) bool {
-		return fileInfos[i].key < fileInfos[j].key
-	})
-
-	// Process in batches with progress reporting
-	totalBatches := (len(fileInfos) + batchSize - 1) / batchSize
-	for i := 0; i < len(fileInfos); i += batchSize {
-		end := i + batchSize
-		if end > len(fileInfos) {
-			end = len(fileInfos)
-		}
-
-		batch := fileInfos[i:end]
-		d.processBatch(batch)
-
-		// Log progress every 10 batches
-		if (i/batchSize+1)%10 == 0 || i+batchSize >= len(fileInfos) {
-			logger.Logger.Debug().
-				Int("batch", i/batchSize+1).
-				Int("total_batches", totalBatches).
-				Int("files_processed", end).
-				Int("total_files", len(fileInfos)).
-				Msg("Batch processing progress")
-		}
-	}
-}
-
-// processBatch processes a batch of files with a single lock acquisition
-func (d *DiskFS) processBatch(batch []fileInfo) {
-	d.mu.Lock()
-	defer d.mu.Unlock()
-
-	for _, fi := range batch {
-		// Create FileInfo from batch data
-		fileInfo := &vfs.FileInfo{
-			Key:         fi.key,
-			Size:        fi.size,
-			CTime:       fi.modTime,
-			ATime:       fi.modTime,
-			AccessCount: 1,
-		}
-
-		// Add to maps
-		d.info[fi.key] = fileInfo
-		d.LRU.Add(fi.key, fileInfo)
-
-		// Initialize access time
-		fileInfo.UpdateAccessBatched(d.timeUpdater)
-
-		// Update total size
-		d.size += fi.size
-	}
-}
-
 // Name returns the name of this VFS
 func (d *DiskFS) Name() string {
 	return "DiskFS"
@@ -403,24 +264,9 @@ func (d *DiskFS) Capacity() int64 {
 	return d.capacity
 }

-// getShardIndex returns the shard index for a given key
-func getShardIndex(key string) int {
-	// Use FNV-1a hash for good distribution
-	var h uint32 = 2166136261 // FNV offset basis
-	for i := 0; i < len(key); i++ {
-		h ^= uint32(key[i])
-		h *= 16777619 // FNV prime
-	}
-	return int(h % numLockShards)
-}
-
 // getKeyLock returns a lock for the given key using sharding
 func (d *DiskFS) getKeyLock(key string) *sync.RWMutex {
-	shardIndex := getShardIndex(key)
-	shard := &d.keyLocks[shardIndex]
-
-	keyLock, _ := shard.LoadOrStore(key, &sync.RWMutex{})
-	return keyLock.(*sync.RWMutex)
+	return locks.GetKeyLock(d.keyLocks, key)
 }

 // Create creates a new file
@@ -472,6 +318,7 @@ func (d *DiskFS) Create(key string, size int64) (io.WriteCloser, error) {
 	d.LRU.Add(key, fi)
 	// Initialize access time with current time
 	fi.UpdateAccessBatched(d.timeUpdater)
+	// Add to size for new files (not discovered files)
 	d.size += size
 	d.mu.Unlock()

@@ -517,7 +364,7 @@ func (dwc *diskWriteCloser) Close() error {
 	return dwc.file.Close()
 }

-// Open opens a file for reading
+// Open opens a file for reading with lazy discovery
 func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
 	if key == "" {
 		return nil, vfserror.ErrInvalidKey
@@ -533,16 +380,22 @@ func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
 		return nil, vfserror.ErrInvalidKey
 	}

-	keyMu := d.getKeyLock(key)
-	keyMu.RLock()
-	defer keyMu.RUnlock()
-
-	d.mu.Lock()
+	// First, try to get the file info
+	d.mu.RLock()
 	fi, exists := d.info[key]
+	d.mu.RUnlock()
+
 	if !exists {
-		d.mu.Unlock()
-		return nil, vfserror.ErrNotFound
+		// Try lazy discovery
+		var err error
+		fi, err = d.Stat(key)
+		if err != nil {
+			return nil, err
+		}
 	}
+
+	// Update access time and LRU
+	d.mu.Lock()
 	fi.UpdateAccessBatched(d.timeUpdater)
 	d.LRU.MoveToFront(key, d.timeUpdater)
 	d.mu.Unlock()
@@ -643,7 +496,7 @@ func (d *DiskFS) Delete(key string) error {
 	return nil
 }

-// Stat returns file information
+// Stat returns file information with lazy discovery
 func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
 	if key == "" {
 		return nil, vfserror.ErrInvalidKey
@@ -653,30 +506,49 @@ func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
 	}

 	keyMu := d.getKeyLock(key)
+
+	// First, try to get the file info with read lock
 	keyMu.RLock()
-	defer keyMu.RUnlock()
-
 	d.mu.RLock()
-	defer d.mu.RUnlock()
-
 	if fi, ok := d.info[key]; ok {
+		d.mu.RUnlock()
+		keyMu.RUnlock()
 		return fi, nil
 	}
+	d.mu.RUnlock()
+	keyMu.RUnlock()

-	// Check if file exists on disk but wasn't indexed (for migration)
+	// Lazy discovery: check if file exists on disk and index it
 	shardedPath := d.shardPath(key)
 	path := filepath.Join(d.root, shardedPath)
 	path = strings.ReplaceAll(path, "\\", "/")

-	if info, err := os.Stat(path); err == nil {
-		// File exists in sharded location but not indexed, re-index it
-		fi := vfs.NewFileInfoFromOS(info, key)
-		// We can't modify the map here because we're in a read lock
-		// This is a simplified version - in production you'd need to handle this properly
+	info, err := os.Stat(path)
+	if err != nil {
+		return nil, vfserror.ErrNotFound
+	}
+
+	// File exists, add it to the index with write lock
+	keyMu.Lock()
+	defer keyMu.Unlock()
+
+	// Double-check after acquiring write lock
+	d.mu.Lock()
+	if fi, ok := d.info[key]; ok {
+		d.mu.Unlock()
 		return fi, nil
 	}

-	return nil, vfserror.ErrNotFound
+	// Create and add file info
+	fi := vfs.NewFileInfoFromOS(info, key)
+	d.info[key] = fi
+	d.LRU.Add(key, fi)
+	fi.UpdateAccessBatched(d.timeUpdater)
+	// Note: Don't add to d.size here as it's being calculated in background
+	// The background calculation will handle the total size
+	d.mu.Unlock()
+
+	return fi, nil
 }

 // EvictLRU evicts the least recently used files to free up space
@@ -689,7 +561,7 @@ func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
 	// Evict from LRU list until we free enough space
 	for d.size > d.capacity-int64(bytesNeeded) && d.LRU.Len() > 0 {
 		// Get the least recently used item
-		elem := d.LRU.list.Back()
+		elem := d.LRU.Back()
 		if elem == nil {
 			break
 		}
@@ -718,7 +590,7 @@ func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
 		evicted += uint(fi.Size)

 		// Clean up key lock
-		shardIndex := getShardIndex(key)
+		shardIndex := locks.GetShardIndex(key)
 		d.keyLocks[shardIndex].Delete(key)
 	}

@@ -774,7 +646,7 @@ func (d *DiskFS) EvictBySize(bytesNeeded uint, ascending bool) uint {
 		evicted += uint(fi.Size)

 		// Clean up key lock
-		shardIndex := getShardIndex(key)
+		shardIndex := locks.GetShardIndex(key)
 		d.keyLocks[shardIndex].Delete(key)
 	}

@@ -827,7 +699,7 @@ func (d *DiskFS) EvictFIFO(bytesNeeded uint) uint {
 		evicted += uint(fi.Size)

 		// Clean up key lock
-		shardIndex := getShardIndex(key)
+		shardIndex := locks.GetShardIndex(key)
 		d.keyLocks[shardIndex].Delete(key)
 	}