Refactor caching and memory management components
All checks were successful
Release Tag / release (push) Successful in 9s

- Updated the caching logic to utilize a predictive cache warmer, enhancing content prefetching based on access patterns.
- Replaced the legacy warming system with a more efficient predictive approach, allowing for better performance and resource management.
- Refactored memory management to integrate dynamic cache size adjustments based on system memory usage, improving overall efficiency.
- Simplified the VFS interface and improved concurrency handling with sharded locks for better performance in multi-threaded environments.
- Enhanced tests to validate the new caching and memory management behaviors, ensuring reliability and performance improvements.
This commit is contained in:
2025-09-22 01:59:15 -05:00
parent 9b2affe95a
commit bfe29dea75
13 changed files with 612 additions and 1215 deletions

View File

@@ -2,13 +2,14 @@
package disk
import (
"container/list"
"fmt"
"io"
"os"
"path/filepath"
"s1d3sw1ped/steamcache2/steamcache/logger"
"s1d3sw1ped/steamcache2/vfs"
"s1d3sw1ped/steamcache2/vfs/locks"
"s1d3sw1ped/steamcache2/vfs/lru"
"s1d3sw1ped/steamcache2/vfs/vfserror"
"sort"
"strings"
@@ -32,55 +33,10 @@ type DiskFS struct {
size int64
mu sync.RWMutex
keyLocks []sync.Map // Sharded lock pools for better concurrency
LRU *lruList
LRU *lru.LRUList[*vfs.FileInfo]
timeUpdater *vfs.BatchedTimeUpdate // Batched time updates for better performance
}
// Number of lock shards for reducing contention
const numLockShards = 32
// lruList for time-decayed LRU eviction
type lruList struct {
list *list.List
elem map[string]*list.Element
}
func newLruList() *lruList {
return &lruList{
list: list.New(),
elem: make(map[string]*list.Element),
}
}
func (l *lruList) Add(key string, fi *vfs.FileInfo) {
elem := l.list.PushFront(fi)
l.elem[key] = elem
}
func (l *lruList) MoveToFront(key string, timeUpdater *vfs.BatchedTimeUpdate) {
if elem, exists := l.elem[key]; exists {
l.list.MoveToFront(elem)
// Update the FileInfo in the element with new access time
if fi := elem.Value.(*vfs.FileInfo); fi != nil {
fi.UpdateAccessBatched(timeUpdater)
}
}
}
func (l *lruList) Remove(key string) *vfs.FileInfo {
if elem, exists := l.elem[key]; exists {
delete(l.elem, key)
if fi := l.list.Remove(elem).(*vfs.FileInfo); fi != nil {
return fi
}
}
return nil
}
func (l *lruList) Len() int {
return l.list.Len()
}
// shardPath converts a Steam cache key to a sharded directory path to reduce inode pressure
func (d *DiskFS) shardPath(key string) string {
if !strings.HasPrefix(key, "steam/") {
@@ -105,43 +61,6 @@ func (d *DiskFS) shardPath(key string) string {
return filepath.Join("steam", shard1, shard2, hashPart)
}
// extractKeyFromPath reverses the sharding logic to get the original key from a sharded path
func (d *DiskFS) extractKeyFromPath(path string) string {
// Fast path: if no slashes, it's not a sharded path
if !strings.Contains(path, "/") {
return path
}
parts := strings.SplitN(path, "/", 5)
numParts := len(parts)
if numParts >= 4 && parts[0] == "steam" {
lastThree := parts[numParts-3:]
shard1 := lastThree[0]
shard2 := lastThree[1]
filename := lastThree[2]
// Verify sharding is correct
if len(filename) >= 4 && filename[:2] == shard1 && filename[2:4] == shard2 {
return "steam/" + filename
}
}
// Handle single-level sharding for short hashes: steam/shard1/filename
if numParts >= 3 && parts[0] == "steam" {
lastTwo := parts[numParts-2:]
shard1 := lastTwo[0]
filename := lastTwo[1]
if len(filename) >= 2 && filename[:2] == shard1 {
return "steam/" + filename
}
}
// Fallback: return as-is for any unrecognized format
return path
}
// New creates a new DiskFS.
func New(root string, capacity int64) *DiskFS {
if capacity <= 0 {
@@ -152,7 +71,7 @@ func New(root string, capacity int64) *DiskFS {
os.MkdirAll(root, 0755)
// Initialize sharded locks
keyLocks := make([]sync.Map, numLockShards)
keyLocks := make([]sync.Map, locks.NumLockShards)
d := &DiskFS{
root: root,
@@ -160,7 +79,7 @@ func New(root string, capacity int64) *DiskFS {
capacity: capacity,
size: 0,
keyLocks: keyLocks,
LRU: newLruList(),
LRU: lru.NewLRUList[*vfs.FileInfo](),
timeUpdater: vfs.NewBatchedTimeUpdate(100 * time.Millisecond), // Update time every 100ms
}
@@ -168,15 +87,15 @@ func New(root string, capacity int64) *DiskFS {
return d
}
// init loads existing files from disk
// init loads existing files from disk with ultra-fast lazy initialization
func (d *DiskFS) init() {
tstart := time.Now()
// Use concurrent directory scanning for blazing fast initialization
fileInfos := d.scanDirectoryConcurrently()
// Ultra-fast initialization: only scan directory structure, defer file stats
d.scanDirectoriesOnly()
// Batch process all files to minimize lock contention
d.batchProcessFiles(fileInfos)
// Start background size calculation in a separate goroutine
go d.calculateSizeInBackground()
logger.Logger.Info().
Str("name", d.Name()).
@@ -188,25 +107,26 @@ func (d *DiskFS) init() {
Msg("init")
}
// fileInfo represents a file found during directory scanning
type fileInfo struct {
path string
relPath string
key string
size int64
modTime time.Time
isDepot bool
// scanDirectoriesOnly performs ultra-fast directory structure scanning without file stats
func (d *DiskFS) scanDirectoriesOnly() {
// Just ensure the root directory exists and is accessible
// No file scanning during init - files will be discovered on-demand
logger.Logger.Debug().
Str("root", d.root).
Msg("Directory structure scan completed (lazy file discovery enabled)")
}
// scanDirectoryConcurrently performs fast concurrent directory scanning
func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
// calculateSizeInBackground calculates the total size of all files in the background
func (d *DiskFS) calculateSizeInBackground() {
tstart := time.Now()
// Channel for collecting file information
fileChan := make(chan fileInfo, 1000)
fileChan := make(chan fileSizeInfo, 1000)
// Progress tracking
var totalFiles int64
var processedFiles int64
progressTicker := time.NewTicker(500 * time.Millisecond)
progressTicker := time.NewTicker(2 * time.Second)
defer progressTicker.Stop()
// Wait group for workers
@@ -217,11 +137,11 @@ func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
go func() {
defer wg.Done()
defer close(fileChan)
d.scanDirectoryRecursive(d.root, fileChan, &totalFiles)
d.scanFilesForSize(d.root, fileChan, &totalFiles)
}()
// Collect results with progress reporting
var fileInfos []fileInfo
var totalSize int64
// Use a separate goroutine to collect results
done := make(chan struct{})
@@ -233,15 +153,16 @@ func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
if !ok {
return
}
fileInfos = append(fileInfos, fi)
totalSize += fi.size
processedFiles++
case <-progressTicker.C:
if totalFiles > 0 {
logger.Logger.Debug().
Int64("processed", processedFiles).
Int64("total", totalFiles).
Int64("size", totalSize).
Float64("progress", float64(processedFiles)/float64(totalFiles)*100).
Msg("Directory scan progress")
Msg("Background size calculation progress")
}
}
}
@@ -251,16 +172,26 @@ func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
wg.Wait()
<-done
// Update the total size
d.mu.Lock()
d.size = totalSize
d.mu.Unlock()
logger.Logger.Info().
Int64("files_scanned", processedFiles).
Msg("Directory scan completed")
return fileInfos
Int64("total_size", totalSize).
Str("duration", time.Since(tstart).String()).
Msg("Background size calculation completed")
}
// scanDirectoryRecursive performs recursive directory scanning with early termination
func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo, totalFiles *int64) {
// Use ReadDir for faster directory listing (no stat calls)
// fileSizeInfo represents a file found during size calculation
type fileSizeInfo struct {
size int64
}
// scanFilesForSize performs recursive file scanning for size calculation only
func (d *DiskFS) scanFilesForSize(dirPath string, fileChan chan<- fileSizeInfo, totalFiles *int64) {
// Use ReadDir for faster directory listing
entries, err := os.ReadDir(dirPath)
if err != nil {
return
@@ -276,7 +207,7 @@ func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo
atomic.AddInt64(totalFiles, int64(fileCount))
// Process entries concurrently with limited workers
semaphore := make(chan struct{}, 8) // Limit concurrent processing
semaphore := make(chan struct{}, 16) // More workers for size calculation
var wg sync.WaitGroup
for _, entry := range entries {
@@ -289,103 +220,33 @@ func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo
defer wg.Done()
semaphore <- struct{}{} // Acquire semaphore
defer func() { <-semaphore }() // Release semaphore
d.scanDirectoryRecursive(path, fileChan, totalFiles)
d.scanFilesForSize(path, fileChan, totalFiles)
}(entryPath)
} else {
// Process file with lazy loading
// Process file for size only
wg.Add(1)
go func(path string, name string, entry os.DirEntry) {
go func(entry os.DirEntry) {
defer wg.Done()
semaphore <- struct{}{} // Acquire semaphore
defer func() { <-semaphore }() // Release semaphore
// Extract relative path and key first (no stat call)
rootPath := d.root
rootPath = strings.TrimPrefix(rootPath, "./")
relPath := strings.ReplaceAll(path[len(rootPath)+1:], "\\", "/")
key := d.extractKeyFromPath(relPath)
// Get file info only when needed (lazy loading)
// Get file info for size calculation
info, err := entry.Info()
if err != nil {
return
}
// Send file info
fileChan <- fileInfo{
path: path,
relPath: relPath,
key: key,
size: info.Size(),
modTime: info.ModTime(),
isDepot: false, // No longer tracking depot files
// Send file size info
fileChan <- fileSizeInfo{
size: info.Size(),
}
}(entryPath, entry.Name(), entry)
}(entry)
}
}
wg.Wait()
}
// batchProcessFiles processes all files in batches to minimize lock contention
func (d *DiskFS) batchProcessFiles(fileInfos []fileInfo) {
const batchSize = 1000 // Process files in batches
// Sort files by key for consistent ordering
sort.Slice(fileInfos, func(i, j int) bool {
return fileInfos[i].key < fileInfos[j].key
})
// Process in batches with progress reporting
totalBatches := (len(fileInfos) + batchSize - 1) / batchSize
for i := 0; i < len(fileInfos); i += batchSize {
end := i + batchSize
if end > len(fileInfos) {
end = len(fileInfos)
}
batch := fileInfos[i:end]
d.processBatch(batch)
// Log progress every 10 batches
if (i/batchSize+1)%10 == 0 || i+batchSize >= len(fileInfos) {
logger.Logger.Debug().
Int("batch", i/batchSize+1).
Int("total_batches", totalBatches).
Int("files_processed", end).
Int("total_files", len(fileInfos)).
Msg("Batch processing progress")
}
}
}
// processBatch processes a batch of files with a single lock acquisition
func (d *DiskFS) processBatch(batch []fileInfo) {
d.mu.Lock()
defer d.mu.Unlock()
for _, fi := range batch {
// Create FileInfo from batch data
fileInfo := &vfs.FileInfo{
Key: fi.key,
Size: fi.size,
CTime: fi.modTime,
ATime: fi.modTime,
AccessCount: 1,
}
// Add to maps
d.info[fi.key] = fileInfo
d.LRU.Add(fi.key, fileInfo)
// Initialize access time
fileInfo.UpdateAccessBatched(d.timeUpdater)
// Update total size
d.size += fi.size
}
}
// Name returns the name of this VFS
func (d *DiskFS) Name() string {
return "DiskFS"
@@ -403,24 +264,9 @@ func (d *DiskFS) Capacity() int64 {
return d.capacity
}
// getShardIndex returns the shard index for a given key
func getShardIndex(key string) int {
// Use FNV-1a hash for good distribution
var h uint32 = 2166136261 // FNV offset basis
for i := 0; i < len(key); i++ {
h ^= uint32(key[i])
h *= 16777619 // FNV prime
}
return int(h % numLockShards)
}
// getKeyLock returns a lock for the given key using sharding
func (d *DiskFS) getKeyLock(key string) *sync.RWMutex {
shardIndex := getShardIndex(key)
shard := &d.keyLocks[shardIndex]
keyLock, _ := shard.LoadOrStore(key, &sync.RWMutex{})
return keyLock.(*sync.RWMutex)
return locks.GetKeyLock(d.keyLocks, key)
}
// Create creates a new file
@@ -472,6 +318,7 @@ func (d *DiskFS) Create(key string, size int64) (io.WriteCloser, error) {
d.LRU.Add(key, fi)
// Initialize access time with current time
fi.UpdateAccessBatched(d.timeUpdater)
// Add to size for new files (not discovered files)
d.size += size
d.mu.Unlock()
@@ -517,7 +364,7 @@ func (dwc *diskWriteCloser) Close() error {
return dwc.file.Close()
}
// Open opens a file for reading
// Open opens a file for reading with lazy discovery
func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
@@ -533,16 +380,22 @@ func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
return nil, vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
keyMu.RLock()
defer keyMu.RUnlock()
d.mu.Lock()
// First, try to get the file info
d.mu.RLock()
fi, exists := d.info[key]
d.mu.RUnlock()
if !exists {
d.mu.Unlock()
return nil, vfserror.ErrNotFound
// Try lazy discovery
var err error
fi, err = d.Stat(key)
if err != nil {
return nil, err
}
}
// Update access time and LRU
d.mu.Lock()
fi.UpdateAccessBatched(d.timeUpdater)
d.LRU.MoveToFront(key, d.timeUpdater)
d.mu.Unlock()
@@ -643,7 +496,7 @@ func (d *DiskFS) Delete(key string) error {
return nil
}
// Stat returns file information
// Stat returns file information with lazy discovery
func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
@@ -653,30 +506,49 @@ func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
}
keyMu := d.getKeyLock(key)
// First, try to get the file info with read lock
keyMu.RLock()
defer keyMu.RUnlock()
d.mu.RLock()
defer d.mu.RUnlock()
if fi, ok := d.info[key]; ok {
d.mu.RUnlock()
keyMu.RUnlock()
return fi, nil
}
d.mu.RUnlock()
keyMu.RUnlock()
// Check if file exists on disk but wasn't indexed (for migration)
// Lazy discovery: check if file exists on disk and index it
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if info, err := os.Stat(path); err == nil {
// File exists in sharded location but not indexed, re-index it
fi := vfs.NewFileInfoFromOS(info, key)
// We can't modify the map here because we're in a read lock
// This is a simplified version - in production you'd need to handle this properly
info, err := os.Stat(path)
if err != nil {
return nil, vfserror.ErrNotFound
}
// File exists, add it to the index with write lock
keyMu.Lock()
defer keyMu.Unlock()
// Double-check after acquiring write lock
d.mu.Lock()
if fi, ok := d.info[key]; ok {
d.mu.Unlock()
return fi, nil
}
return nil, vfserror.ErrNotFound
// Create and add file info
fi := vfs.NewFileInfoFromOS(info, key)
d.info[key] = fi
d.LRU.Add(key, fi)
fi.UpdateAccessBatched(d.timeUpdater)
// Note: Don't add to d.size here as it's being calculated in background
// The background calculation will handle the total size
d.mu.Unlock()
return fi, nil
}
// EvictLRU evicts the least recently used files to free up space
@@ -689,7 +561,7 @@ func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
// Evict from LRU list until we free enough space
for d.size > d.capacity-int64(bytesNeeded) && d.LRU.Len() > 0 {
// Get the least recently used item
elem := d.LRU.list.Back()
elem := d.LRU.Back()
if elem == nil {
break
}
@@ -718,7 +590,7 @@ func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := getShardIndex(key)
shardIndex := locks.GetShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
@@ -774,7 +646,7 @@ func (d *DiskFS) EvictBySize(bytesNeeded uint, ascending bool) uint {
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := getShardIndex(key)
shardIndex := locks.GetShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
@@ -827,7 +699,7 @@ func (d *DiskFS) EvictFIFO(bytesNeeded uint) uint {
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := getShardIndex(key)
shardIndex := locks.GetShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}