Files
steamcache2/vfs/disk/disk.go
Justin Harms bfe29dea75
All checks were successful
Release Tag / release (push) Successful in 9s
Refactor caching and memory management components
- Updated the caching logic to utilize a predictive cache warmer, enhancing content prefetching based on access patterns.
- Replaced the legacy warming system with a more efficient predictive approach, allowing for better performance and resource management.
- Refactored memory management to integrate dynamic cache size adjustments based on system memory usage, improving overall efficiency.
- Simplified the VFS interface and improved concurrency handling with sharded locks for better performance in multi-threaded environments.
- Enhanced tests to validate the new caching and memory management behaviors, ensuring reliability and performance improvements.
2025-09-22 01:59:15 -05:00

708 lines
16 KiB
Go

// vfs/disk/disk.go
package disk
import (
"fmt"
"io"
"os"
"path/filepath"
"s1d3sw1ped/steamcache2/steamcache/logger"
"s1d3sw1ped/steamcache2/vfs"
"s1d3sw1ped/steamcache2/vfs/locks"
"s1d3sw1ped/steamcache2/vfs/lru"
"s1d3sw1ped/steamcache2/vfs/vfserror"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/docker/go-units"
"github.com/edsrzf/mmap-go"
)
// Ensure DiskFS implements VFS.
var _ vfs.VFS = (*DiskFS)(nil)
// DiskFS is a virtual file system that stores files on disk.
type DiskFS struct {
root string
info map[string]*vfs.FileInfo
capacity int64
size int64
mu sync.RWMutex
keyLocks []sync.Map // Sharded lock pools for better concurrency
LRU *lru.LRUList[*vfs.FileInfo]
timeUpdater *vfs.BatchedTimeUpdate // Batched time updates for better performance
}
// shardPath converts a Steam cache key to a sharded directory path to reduce inode pressure
func (d *DiskFS) shardPath(key string) string {
if !strings.HasPrefix(key, "steam/") {
return key
}
// Extract hash part
hashPart := key[6:] // Remove "steam/" prefix
if len(hashPart) < 4 {
// For very short hashes, single level sharding
if len(hashPart) >= 2 {
shard1 := hashPart[:2]
return filepath.Join("steam", shard1, hashPart)
}
return filepath.Join("steam", hashPart)
}
// Optimal 2-level sharding for Steam hashes (typically 40 chars)
shard1 := hashPart[:2] // First 2 chars
shard2 := hashPart[2:4] // Next 2 chars
return filepath.Join("steam", shard1, shard2, hashPart)
}
// New creates a new DiskFS.
func New(root string, capacity int64) *DiskFS {
if capacity <= 0 {
panic("disk capacity must be greater than 0")
}
// Create root directory if it doesn't exist
os.MkdirAll(root, 0755)
// Initialize sharded locks
keyLocks := make([]sync.Map, locks.NumLockShards)
d := &DiskFS{
root: root,
info: make(map[string]*vfs.FileInfo),
capacity: capacity,
size: 0,
keyLocks: keyLocks,
LRU: lru.NewLRUList[*vfs.FileInfo](),
timeUpdater: vfs.NewBatchedTimeUpdate(100 * time.Millisecond), // Update time every 100ms
}
d.init()
return d
}
// init loads existing files from disk with ultra-fast lazy initialization
func (d *DiskFS) init() {
tstart := time.Now()
// Ultra-fast initialization: only scan directory structure, defer file stats
d.scanDirectoriesOnly()
// Start background size calculation in a separate goroutine
go d.calculateSizeInBackground()
logger.Logger.Info().
Str("name", d.Name()).
Str("root", d.root).
Str("capacity", units.HumanSize(float64(d.capacity))).
Str("size", units.HumanSize(float64(d.Size()))).
Str("files", fmt.Sprint(len(d.info))).
Str("duration", time.Since(tstart).String()).
Msg("init")
}
// scanDirectoriesOnly performs ultra-fast directory structure scanning without file stats
func (d *DiskFS) scanDirectoriesOnly() {
// Just ensure the root directory exists and is accessible
// No file scanning during init - files will be discovered on-demand
logger.Logger.Debug().
Str("root", d.root).
Msg("Directory structure scan completed (lazy file discovery enabled)")
}
// calculateSizeInBackground calculates the total size of all files in the background
func (d *DiskFS) calculateSizeInBackground() {
tstart := time.Now()
// Channel for collecting file information
fileChan := make(chan fileSizeInfo, 1000)
// Progress tracking
var totalFiles int64
var processedFiles int64
progressTicker := time.NewTicker(2 * time.Second)
defer progressTicker.Stop()
// Wait group for workers
var wg sync.WaitGroup
// Start directory scanner
wg.Add(1)
go func() {
defer wg.Done()
defer close(fileChan)
d.scanFilesForSize(d.root, fileChan, &totalFiles)
}()
// Collect results with progress reporting
var totalSize int64
// Use a separate goroutine to collect results
done := make(chan struct{})
go func() {
defer close(done)
for {
select {
case fi, ok := <-fileChan:
if !ok {
return
}
totalSize += fi.size
processedFiles++
case <-progressTicker.C:
if totalFiles > 0 {
logger.Logger.Debug().
Int64("processed", processedFiles).
Int64("total", totalFiles).
Int64("size", totalSize).
Float64("progress", float64(processedFiles)/float64(totalFiles)*100).
Msg("Background size calculation progress")
}
}
}
}()
// Wait for scanning to complete
wg.Wait()
<-done
// Update the total size
d.mu.Lock()
d.size = totalSize
d.mu.Unlock()
logger.Logger.Info().
Int64("files_scanned", processedFiles).
Int64("total_size", totalSize).
Str("duration", time.Since(tstart).String()).
Msg("Background size calculation completed")
}
// fileSizeInfo represents a file found during size calculation
type fileSizeInfo struct {
size int64
}
// scanFilesForSize performs recursive file scanning for size calculation only
func (d *DiskFS) scanFilesForSize(dirPath string, fileChan chan<- fileSizeInfo, totalFiles *int64) {
// Use ReadDir for faster directory listing
entries, err := os.ReadDir(dirPath)
if err != nil {
return
}
// Count files first for progress tracking
fileCount := 0
for _, entry := range entries {
if !entry.IsDir() {
fileCount++
}
}
atomic.AddInt64(totalFiles, int64(fileCount))
// Process entries concurrently with limited workers
semaphore := make(chan struct{}, 16) // More workers for size calculation
var wg sync.WaitGroup
for _, entry := range entries {
entryPath := filepath.Join(dirPath, entry.Name())
if entry.IsDir() {
// Recursively scan subdirectories
wg.Add(1)
go func(path string) {
defer wg.Done()
semaphore <- struct{}{} // Acquire semaphore
defer func() { <-semaphore }() // Release semaphore
d.scanFilesForSize(path, fileChan, totalFiles)
}(entryPath)
} else {
// Process file for size only
wg.Add(1)
go func(entry os.DirEntry) {
defer wg.Done()
semaphore <- struct{}{} // Acquire semaphore
defer func() { <-semaphore }() // Release semaphore
// Get file info for size calculation
info, err := entry.Info()
if err != nil {
return
}
// Send file size info
fileChan <- fileSizeInfo{
size: info.Size(),
}
}(entry)
}
}
wg.Wait()
}
// Name returns the name of this VFS
func (d *DiskFS) Name() string {
return "DiskFS"
}
// Size returns the current size
func (d *DiskFS) Size() int64 {
d.mu.RLock()
defer d.mu.RUnlock()
return d.size
}
// Capacity returns the maximum capacity
func (d *DiskFS) Capacity() int64 {
return d.capacity
}
// getKeyLock returns a lock for the given key using sharding
func (d *DiskFS) getKeyLock(key string) *sync.RWMutex {
return locks.GetKeyLock(d.keyLocks, key)
}
// Create creates a new file
func (d *DiskFS) Create(key string, size int64) (io.WriteCloser, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
}
if key[0] == '/' {
return nil, vfserror.ErrInvalidKey
}
// Sanitize key to prevent path traversal
key = filepath.Clean(key)
key = strings.ReplaceAll(key, "\\", "/")
if strings.Contains(key, "..") {
return nil, vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
keyMu.Lock()
defer keyMu.Unlock()
d.mu.Lock()
// Check if file already exists and handle overwrite
if fi, exists := d.info[key]; exists {
d.size -= fi.Size
d.LRU.Remove(key)
delete(d.info, key)
}
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
d.mu.Unlock()
path = strings.ReplaceAll(path, "\\", "/")
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, err
}
file, err := os.Create(path)
if err != nil {
return nil, err
}
fi := vfs.NewFileInfo(key, size)
d.mu.Lock()
d.info[key] = fi
d.LRU.Add(key, fi)
// Initialize access time with current time
fi.UpdateAccessBatched(d.timeUpdater)
// Add to size for new files (not discovered files)
d.size += size
d.mu.Unlock()
return &diskWriteCloser{
file: file,
disk: d,
key: key,
declaredSize: size,
}, nil
}
// diskWriteCloser implements io.WriteCloser for disk files with size adjustment
type diskWriteCloser struct {
file *os.File
disk *DiskFS
key string
declaredSize int64
}
func (dwc *diskWriteCloser) Write(p []byte) (n int, err error) {
return dwc.file.Write(p)
}
func (dwc *diskWriteCloser) Close() error {
// Get the actual file size
stat, err := dwc.file.Stat()
if err != nil {
dwc.file.Close()
return err
}
actualSize := stat.Size()
// Update the size in FileInfo if it differs from declared size
dwc.disk.mu.Lock()
if fi, exists := dwc.disk.info[dwc.key]; exists {
sizeDiff := actualSize - fi.Size
fi.Size = actualSize
dwc.disk.size += sizeDiff
}
dwc.disk.mu.Unlock()
return dwc.file.Close()
}
// Open opens a file for reading with lazy discovery
func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
}
if key[0] == '/' {
return nil, vfserror.ErrInvalidKey
}
// Sanitize key to prevent path traversal
key = filepath.Clean(key)
key = strings.ReplaceAll(key, "\\", "/")
if strings.Contains(key, "..") {
return nil, vfserror.ErrInvalidKey
}
// First, try to get the file info
d.mu.RLock()
fi, exists := d.info[key]
d.mu.RUnlock()
if !exists {
// Try lazy discovery
var err error
fi, err = d.Stat(key)
if err != nil {
return nil, err
}
}
// Update access time and LRU
d.mu.Lock()
fi.UpdateAccessBatched(d.timeUpdater)
d.LRU.MoveToFront(key, d.timeUpdater)
d.mu.Unlock()
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
file, err := os.Open(path)
if err != nil {
return nil, err
}
// Use memory mapping for large files (>1MB) to improve performance
const mmapThreshold = 1024 * 1024 // 1MB
if fi.Size > mmapThreshold {
// Close the regular file handle
file.Close()
// Try memory mapping
mmapFile, err := os.Open(path)
if err != nil {
return nil, err
}
mapped, err := mmap.Map(mmapFile, mmap.RDONLY, 0)
if err != nil {
mmapFile.Close()
// Fallback to regular file reading
return os.Open(path)
}
return &mmapReadCloser{
data: mapped,
file: mmapFile,
offset: 0,
}, nil
}
return file, nil
}
// mmapReadCloser implements io.ReadCloser for memory-mapped files
type mmapReadCloser struct {
data mmap.MMap
file *os.File
offset int
}
func (m *mmapReadCloser) Read(p []byte) (n int, err error) {
if m.offset >= len(m.data) {
return 0, io.EOF
}
n = copy(p, m.data[m.offset:])
m.offset += n
return n, nil
}
func (m *mmapReadCloser) Close() error {
m.data.Unmap()
return m.file.Close()
}
// Delete removes a file
func (d *DiskFS) Delete(key string) error {
if key == "" {
return vfserror.ErrInvalidKey
}
if key[0] == '/' {
return vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
keyMu.Lock()
defer keyMu.Unlock()
d.mu.Lock()
fi, exists := d.info[key]
if !exists {
d.mu.Unlock()
return vfserror.ErrNotFound
}
d.size -= fi.Size
d.LRU.Remove(key)
delete(d.info, key)
d.mu.Unlock()
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
err := os.Remove(path)
if err != nil {
return err
}
return nil
}
// Stat returns file information with lazy discovery
func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
}
if key[0] == '/' {
return nil, vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
// First, try to get the file info with read lock
keyMu.RLock()
d.mu.RLock()
if fi, ok := d.info[key]; ok {
d.mu.RUnlock()
keyMu.RUnlock()
return fi, nil
}
d.mu.RUnlock()
keyMu.RUnlock()
// Lazy discovery: check if file exists on disk and index it
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
info, err := os.Stat(path)
if err != nil {
return nil, vfserror.ErrNotFound
}
// File exists, add it to the index with write lock
keyMu.Lock()
defer keyMu.Unlock()
// Double-check after acquiring write lock
d.mu.Lock()
if fi, ok := d.info[key]; ok {
d.mu.Unlock()
return fi, nil
}
// Create and add file info
fi := vfs.NewFileInfoFromOS(info, key)
d.info[key] = fi
d.LRU.Add(key, fi)
fi.UpdateAccessBatched(d.timeUpdater)
// Note: Don't add to d.size here as it's being calculated in background
// The background calculation will handle the total size
d.mu.Unlock()
return fi, nil
}
// EvictLRU evicts the least recently used files to free up space
func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
d.mu.Lock()
defer d.mu.Unlock()
var evicted uint
// Evict from LRU list until we free enough space
for d.size > d.capacity-int64(bytesNeeded) && d.LRU.Len() > 0 {
// Get the least recently used item
elem := d.LRU.Back()
if elem == nil {
break
}
fi := elem.Value.(*vfs.FileInfo)
key := fi.Key
// Remove from LRU
d.LRU.Remove(key)
// Remove from map
delete(d.info, key)
// Remove file from disk
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if err := os.Remove(path); err != nil {
// Log error but continue
continue
}
// Update size
d.size -= fi.Size
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := locks.GetShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
return evicted
}
// EvictBySize evicts files by size (ascending = smallest first, descending = largest first)
func (d *DiskFS) EvictBySize(bytesNeeded uint, ascending bool) uint {
d.mu.Lock()
defer d.mu.Unlock()
var evicted uint
var candidates []*vfs.FileInfo
// Collect all files
for _, fi := range d.info {
candidates = append(candidates, fi)
}
// Sort by size
sort.Slice(candidates, func(i, j int) bool {
if ascending {
return candidates[i].Size < candidates[j].Size
}
return candidates[i].Size > candidates[j].Size
})
// Evict files until we free enough space
for _, fi := range candidates {
if d.size <= d.capacity-int64(bytesNeeded) {
break
}
key := fi.Key
// Remove from LRU
d.LRU.Remove(key)
// Remove from map
delete(d.info, key)
// Remove file from disk
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if err := os.Remove(path); err != nil {
continue
}
// Update size
d.size -= fi.Size
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := locks.GetShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
return evicted
}
// EvictFIFO evicts files using FIFO (oldest creation time first)
func (d *DiskFS) EvictFIFO(bytesNeeded uint) uint {
d.mu.Lock()
defer d.mu.Unlock()
var evicted uint
var candidates []*vfs.FileInfo
// Collect all files
for _, fi := range d.info {
candidates = append(candidates, fi)
}
// Sort by creation time (oldest first)
sort.Slice(candidates, func(i, j int) bool {
return candidates[i].CTime.Before(candidates[j].CTime)
})
// Evict oldest files until we free enough space
for _, fi := range candidates {
if d.size <= d.capacity-int64(bytesNeeded) {
break
}
key := fi.Key
// Remove from LRU
d.LRU.Remove(key)
// Remove from map
delete(d.info, key)
// Remove file from disk
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if err := os.Remove(path); err != nil {
continue
}
// Update size
d.size -= fi.Size
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := locks.GetShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
return evicted
}