Files
steamcache2/vfs/disk/disk.go
Justin Harms 9b2affe95a
All checks were successful
Release Tag / release (push) Successful in 9s
Refactor disk initialization and file processing in DiskFS
- Replaced legacy depot file migration logic with concurrent directory scanning for improved performance.
- Introduced batch processing of files to minimize lock contention during initialization.
- Simplified the init function by removing unnecessary complexity and focusing on efficient file handling.
- Enhanced logging to provide better insights into directory scan progress and completion.
2025-09-22 00:51:51 -05:00

836 lines
19 KiB
Go

// vfs/disk/disk.go
package disk
import (
"container/list"
"fmt"
"io"
"os"
"path/filepath"
"s1d3sw1ped/steamcache2/steamcache/logger"
"s1d3sw1ped/steamcache2/vfs"
"s1d3sw1ped/steamcache2/vfs/vfserror"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/docker/go-units"
"github.com/edsrzf/mmap-go"
)
// Ensure DiskFS implements VFS.
var _ vfs.VFS = (*DiskFS)(nil)
// DiskFS is a virtual file system that stores files on disk.
type DiskFS struct {
root string
info map[string]*vfs.FileInfo
capacity int64
size int64
mu sync.RWMutex
keyLocks []sync.Map // Sharded lock pools for better concurrency
LRU *lruList
timeUpdater *vfs.BatchedTimeUpdate // Batched time updates for better performance
}
// Number of lock shards for reducing contention
const numLockShards = 32
// lruList for time-decayed LRU eviction
type lruList struct {
list *list.List
elem map[string]*list.Element
}
func newLruList() *lruList {
return &lruList{
list: list.New(),
elem: make(map[string]*list.Element),
}
}
func (l *lruList) Add(key string, fi *vfs.FileInfo) {
elem := l.list.PushFront(fi)
l.elem[key] = elem
}
func (l *lruList) MoveToFront(key string, timeUpdater *vfs.BatchedTimeUpdate) {
if elem, exists := l.elem[key]; exists {
l.list.MoveToFront(elem)
// Update the FileInfo in the element with new access time
if fi := elem.Value.(*vfs.FileInfo); fi != nil {
fi.UpdateAccessBatched(timeUpdater)
}
}
}
func (l *lruList) Remove(key string) *vfs.FileInfo {
if elem, exists := l.elem[key]; exists {
delete(l.elem, key)
if fi := l.list.Remove(elem).(*vfs.FileInfo); fi != nil {
return fi
}
}
return nil
}
func (l *lruList) Len() int {
return l.list.Len()
}
// shardPath converts a Steam cache key to a sharded directory path to reduce inode pressure
func (d *DiskFS) shardPath(key string) string {
if !strings.HasPrefix(key, "steam/") {
return key
}
// Extract hash part
hashPart := key[6:] // Remove "steam/" prefix
if len(hashPart) < 4 {
// For very short hashes, single level sharding
if len(hashPart) >= 2 {
shard1 := hashPart[:2]
return filepath.Join("steam", shard1, hashPart)
}
return filepath.Join("steam", hashPart)
}
// Optimal 2-level sharding for Steam hashes (typically 40 chars)
shard1 := hashPart[:2] // First 2 chars
shard2 := hashPart[2:4] // Next 2 chars
return filepath.Join("steam", shard1, shard2, hashPart)
}
// extractKeyFromPath reverses the sharding logic to get the original key from a sharded path
func (d *DiskFS) extractKeyFromPath(path string) string {
// Fast path: if no slashes, it's not a sharded path
if !strings.Contains(path, "/") {
return path
}
parts := strings.SplitN(path, "/", 5)
numParts := len(parts)
if numParts >= 4 && parts[0] == "steam" {
lastThree := parts[numParts-3:]
shard1 := lastThree[0]
shard2 := lastThree[1]
filename := lastThree[2]
// Verify sharding is correct
if len(filename) >= 4 && filename[:2] == shard1 && filename[2:4] == shard2 {
return "steam/" + filename
}
}
// Handle single-level sharding for short hashes: steam/shard1/filename
if numParts >= 3 && parts[0] == "steam" {
lastTwo := parts[numParts-2:]
shard1 := lastTwo[0]
filename := lastTwo[1]
if len(filename) >= 2 && filename[:2] == shard1 {
return "steam/" + filename
}
}
// Fallback: return as-is for any unrecognized format
return path
}
// New creates a new DiskFS.
func New(root string, capacity int64) *DiskFS {
if capacity <= 0 {
panic("disk capacity must be greater than 0")
}
// Create root directory if it doesn't exist
os.MkdirAll(root, 0755)
// Initialize sharded locks
keyLocks := make([]sync.Map, numLockShards)
d := &DiskFS{
root: root,
info: make(map[string]*vfs.FileInfo),
capacity: capacity,
size: 0,
keyLocks: keyLocks,
LRU: newLruList(),
timeUpdater: vfs.NewBatchedTimeUpdate(100 * time.Millisecond), // Update time every 100ms
}
d.init()
return d
}
// init loads existing files from disk
func (d *DiskFS) init() {
tstart := time.Now()
// Use concurrent directory scanning for blazing fast initialization
fileInfos := d.scanDirectoryConcurrently()
// Batch process all files to minimize lock contention
d.batchProcessFiles(fileInfos)
logger.Logger.Info().
Str("name", d.Name()).
Str("root", d.root).
Str("capacity", units.HumanSize(float64(d.capacity))).
Str("size", units.HumanSize(float64(d.Size()))).
Str("files", fmt.Sprint(len(d.info))).
Str("duration", time.Since(tstart).String()).
Msg("init")
}
// fileInfo represents a file found during directory scanning
type fileInfo struct {
path string
relPath string
key string
size int64
modTime time.Time
isDepot bool
}
// scanDirectoryConcurrently performs fast concurrent directory scanning
func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
// Channel for collecting file information
fileChan := make(chan fileInfo, 1000)
// Progress tracking
var totalFiles int64
var processedFiles int64
progressTicker := time.NewTicker(500 * time.Millisecond)
defer progressTicker.Stop()
// Wait group for workers
var wg sync.WaitGroup
// Start directory scanner
wg.Add(1)
go func() {
defer wg.Done()
defer close(fileChan)
d.scanDirectoryRecursive(d.root, fileChan, &totalFiles)
}()
// Collect results with progress reporting
var fileInfos []fileInfo
// Use a separate goroutine to collect results
done := make(chan struct{})
go func() {
defer close(done)
for {
select {
case fi, ok := <-fileChan:
if !ok {
return
}
fileInfos = append(fileInfos, fi)
processedFiles++
case <-progressTicker.C:
if totalFiles > 0 {
logger.Logger.Debug().
Int64("processed", processedFiles).
Int64("total", totalFiles).
Float64("progress", float64(processedFiles)/float64(totalFiles)*100).
Msg("Directory scan progress")
}
}
}
}()
// Wait for scanning to complete
wg.Wait()
<-done
logger.Logger.Info().
Int64("files_scanned", processedFiles).
Msg("Directory scan completed")
return fileInfos
}
// scanDirectoryRecursive performs recursive directory scanning with early termination
func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo, totalFiles *int64) {
// Use ReadDir for faster directory listing (no stat calls)
entries, err := os.ReadDir(dirPath)
if err != nil {
return
}
// Count files first for progress tracking
fileCount := 0
for _, entry := range entries {
if !entry.IsDir() {
fileCount++
}
}
atomic.AddInt64(totalFiles, int64(fileCount))
// Process entries concurrently with limited workers
semaphore := make(chan struct{}, 8) // Limit concurrent processing
var wg sync.WaitGroup
for _, entry := range entries {
entryPath := filepath.Join(dirPath, entry.Name())
if entry.IsDir() {
// Recursively scan subdirectories
wg.Add(1)
go func(path string) {
defer wg.Done()
semaphore <- struct{}{} // Acquire semaphore
defer func() { <-semaphore }() // Release semaphore
d.scanDirectoryRecursive(path, fileChan, totalFiles)
}(entryPath)
} else {
// Process file with lazy loading
wg.Add(1)
go func(path string, name string, entry os.DirEntry) {
defer wg.Done()
semaphore <- struct{}{} // Acquire semaphore
defer func() { <-semaphore }() // Release semaphore
// Extract relative path and key first (no stat call)
rootPath := d.root
rootPath = strings.TrimPrefix(rootPath, "./")
relPath := strings.ReplaceAll(path[len(rootPath)+1:], "\\", "/")
key := d.extractKeyFromPath(relPath)
// Get file info only when needed (lazy loading)
info, err := entry.Info()
if err != nil {
return
}
// Send file info
fileChan <- fileInfo{
path: path,
relPath: relPath,
key: key,
size: info.Size(),
modTime: info.ModTime(),
isDepot: false, // No longer tracking depot files
}
}(entryPath, entry.Name(), entry)
}
}
wg.Wait()
}
// batchProcessFiles processes all files in batches to minimize lock contention
func (d *DiskFS) batchProcessFiles(fileInfos []fileInfo) {
const batchSize = 1000 // Process files in batches
// Sort files by key for consistent ordering
sort.Slice(fileInfos, func(i, j int) bool {
return fileInfos[i].key < fileInfos[j].key
})
// Process in batches with progress reporting
totalBatches := (len(fileInfos) + batchSize - 1) / batchSize
for i := 0; i < len(fileInfos); i += batchSize {
end := i + batchSize
if end > len(fileInfos) {
end = len(fileInfos)
}
batch := fileInfos[i:end]
d.processBatch(batch)
// Log progress every 10 batches
if (i/batchSize+1)%10 == 0 || i+batchSize >= len(fileInfos) {
logger.Logger.Debug().
Int("batch", i/batchSize+1).
Int("total_batches", totalBatches).
Int("files_processed", end).
Int("total_files", len(fileInfos)).
Msg("Batch processing progress")
}
}
}
// processBatch processes a batch of files with a single lock acquisition
func (d *DiskFS) processBatch(batch []fileInfo) {
d.mu.Lock()
defer d.mu.Unlock()
for _, fi := range batch {
// Create FileInfo from batch data
fileInfo := &vfs.FileInfo{
Key: fi.key,
Size: fi.size,
CTime: fi.modTime,
ATime: fi.modTime,
AccessCount: 1,
}
// Add to maps
d.info[fi.key] = fileInfo
d.LRU.Add(fi.key, fileInfo)
// Initialize access time
fileInfo.UpdateAccessBatched(d.timeUpdater)
// Update total size
d.size += fi.size
}
}
// Name returns the name of this VFS
func (d *DiskFS) Name() string {
return "DiskFS"
}
// Size returns the current size
func (d *DiskFS) Size() int64 {
d.mu.RLock()
defer d.mu.RUnlock()
return d.size
}
// Capacity returns the maximum capacity
func (d *DiskFS) Capacity() int64 {
return d.capacity
}
// getShardIndex returns the shard index for a given key
func getShardIndex(key string) int {
// Use FNV-1a hash for good distribution
var h uint32 = 2166136261 // FNV offset basis
for i := 0; i < len(key); i++ {
h ^= uint32(key[i])
h *= 16777619 // FNV prime
}
return int(h % numLockShards)
}
// getKeyLock returns a lock for the given key using sharding
func (d *DiskFS) getKeyLock(key string) *sync.RWMutex {
shardIndex := getShardIndex(key)
shard := &d.keyLocks[shardIndex]
keyLock, _ := shard.LoadOrStore(key, &sync.RWMutex{})
return keyLock.(*sync.RWMutex)
}
// Create creates a new file
func (d *DiskFS) Create(key string, size int64) (io.WriteCloser, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
}
if key[0] == '/' {
return nil, vfserror.ErrInvalidKey
}
// Sanitize key to prevent path traversal
key = filepath.Clean(key)
key = strings.ReplaceAll(key, "\\", "/")
if strings.Contains(key, "..") {
return nil, vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
keyMu.Lock()
defer keyMu.Unlock()
d.mu.Lock()
// Check if file already exists and handle overwrite
if fi, exists := d.info[key]; exists {
d.size -= fi.Size
d.LRU.Remove(key)
delete(d.info, key)
}
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
d.mu.Unlock()
path = strings.ReplaceAll(path, "\\", "/")
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, err
}
file, err := os.Create(path)
if err != nil {
return nil, err
}
fi := vfs.NewFileInfo(key, size)
d.mu.Lock()
d.info[key] = fi
d.LRU.Add(key, fi)
// Initialize access time with current time
fi.UpdateAccessBatched(d.timeUpdater)
d.size += size
d.mu.Unlock()
return &diskWriteCloser{
file: file,
disk: d,
key: key,
declaredSize: size,
}, nil
}
// diskWriteCloser implements io.WriteCloser for disk files with size adjustment
type diskWriteCloser struct {
file *os.File
disk *DiskFS
key string
declaredSize int64
}
func (dwc *diskWriteCloser) Write(p []byte) (n int, err error) {
return dwc.file.Write(p)
}
func (dwc *diskWriteCloser) Close() error {
// Get the actual file size
stat, err := dwc.file.Stat()
if err != nil {
dwc.file.Close()
return err
}
actualSize := stat.Size()
// Update the size in FileInfo if it differs from declared size
dwc.disk.mu.Lock()
if fi, exists := dwc.disk.info[dwc.key]; exists {
sizeDiff := actualSize - fi.Size
fi.Size = actualSize
dwc.disk.size += sizeDiff
}
dwc.disk.mu.Unlock()
return dwc.file.Close()
}
// Open opens a file for reading
func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
}
if key[0] == '/' {
return nil, vfserror.ErrInvalidKey
}
// Sanitize key to prevent path traversal
key = filepath.Clean(key)
key = strings.ReplaceAll(key, "\\", "/")
if strings.Contains(key, "..") {
return nil, vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
keyMu.RLock()
defer keyMu.RUnlock()
d.mu.Lock()
fi, exists := d.info[key]
if !exists {
d.mu.Unlock()
return nil, vfserror.ErrNotFound
}
fi.UpdateAccessBatched(d.timeUpdater)
d.LRU.MoveToFront(key, d.timeUpdater)
d.mu.Unlock()
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
file, err := os.Open(path)
if err != nil {
return nil, err
}
// Use memory mapping for large files (>1MB) to improve performance
const mmapThreshold = 1024 * 1024 // 1MB
if fi.Size > mmapThreshold {
// Close the regular file handle
file.Close()
// Try memory mapping
mmapFile, err := os.Open(path)
if err != nil {
return nil, err
}
mapped, err := mmap.Map(mmapFile, mmap.RDONLY, 0)
if err != nil {
mmapFile.Close()
// Fallback to regular file reading
return os.Open(path)
}
return &mmapReadCloser{
data: mapped,
file: mmapFile,
offset: 0,
}, nil
}
return file, nil
}
// mmapReadCloser implements io.ReadCloser for memory-mapped files
type mmapReadCloser struct {
data mmap.MMap
file *os.File
offset int
}
func (m *mmapReadCloser) Read(p []byte) (n int, err error) {
if m.offset >= len(m.data) {
return 0, io.EOF
}
n = copy(p, m.data[m.offset:])
m.offset += n
return n, nil
}
func (m *mmapReadCloser) Close() error {
m.data.Unmap()
return m.file.Close()
}
// Delete removes a file
func (d *DiskFS) Delete(key string) error {
if key == "" {
return vfserror.ErrInvalidKey
}
if key[0] == '/' {
return vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
keyMu.Lock()
defer keyMu.Unlock()
d.mu.Lock()
fi, exists := d.info[key]
if !exists {
d.mu.Unlock()
return vfserror.ErrNotFound
}
d.size -= fi.Size
d.LRU.Remove(key)
delete(d.info, key)
d.mu.Unlock()
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
err := os.Remove(path)
if err != nil {
return err
}
return nil
}
// Stat returns file information
func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
if key == "" {
return nil, vfserror.ErrInvalidKey
}
if key[0] == '/' {
return nil, vfserror.ErrInvalidKey
}
keyMu := d.getKeyLock(key)
keyMu.RLock()
defer keyMu.RUnlock()
d.mu.RLock()
defer d.mu.RUnlock()
if fi, ok := d.info[key]; ok {
return fi, nil
}
// Check if file exists on disk but wasn't indexed (for migration)
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if info, err := os.Stat(path); err == nil {
// File exists in sharded location but not indexed, re-index it
fi := vfs.NewFileInfoFromOS(info, key)
// We can't modify the map here because we're in a read lock
// This is a simplified version - in production you'd need to handle this properly
return fi, nil
}
return nil, vfserror.ErrNotFound
}
// EvictLRU evicts the least recently used files to free up space
func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
d.mu.Lock()
defer d.mu.Unlock()
var evicted uint
// Evict from LRU list until we free enough space
for d.size > d.capacity-int64(bytesNeeded) && d.LRU.Len() > 0 {
// Get the least recently used item
elem := d.LRU.list.Back()
if elem == nil {
break
}
fi := elem.Value.(*vfs.FileInfo)
key := fi.Key
// Remove from LRU
d.LRU.Remove(key)
// Remove from map
delete(d.info, key)
// Remove file from disk
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if err := os.Remove(path); err != nil {
// Log error but continue
continue
}
// Update size
d.size -= fi.Size
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := getShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
return evicted
}
// EvictBySize evicts files by size (ascending = smallest first, descending = largest first)
func (d *DiskFS) EvictBySize(bytesNeeded uint, ascending bool) uint {
d.mu.Lock()
defer d.mu.Unlock()
var evicted uint
var candidates []*vfs.FileInfo
// Collect all files
for _, fi := range d.info {
candidates = append(candidates, fi)
}
// Sort by size
sort.Slice(candidates, func(i, j int) bool {
if ascending {
return candidates[i].Size < candidates[j].Size
}
return candidates[i].Size > candidates[j].Size
})
// Evict files until we free enough space
for _, fi := range candidates {
if d.size <= d.capacity-int64(bytesNeeded) {
break
}
key := fi.Key
// Remove from LRU
d.LRU.Remove(key)
// Remove from map
delete(d.info, key)
// Remove file from disk
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if err := os.Remove(path); err != nil {
continue
}
// Update size
d.size -= fi.Size
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := getShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
return evicted
}
// EvictFIFO evicts files using FIFO (oldest creation time first)
func (d *DiskFS) EvictFIFO(bytesNeeded uint) uint {
d.mu.Lock()
defer d.mu.Unlock()
var evicted uint
var candidates []*vfs.FileInfo
// Collect all files
for _, fi := range d.info {
candidates = append(candidates, fi)
}
// Sort by creation time (oldest first)
sort.Slice(candidates, func(i, j int) bool {
return candidates[i].CTime.Before(candidates[j].CTime)
})
// Evict oldest files until we free enough space
for _, fi := range candidates {
if d.size <= d.capacity-int64(bytesNeeded) {
break
}
key := fi.Key
// Remove from LRU
d.LRU.Remove(key)
// Remove from map
delete(d.info, key)
// Remove file from disk
shardedPath := d.shardPath(key)
path := filepath.Join(d.root, shardedPath)
path = strings.ReplaceAll(path, "\\", "/")
if err := os.Remove(path); err != nil {
continue
}
// Update size
d.size -= fi.Size
evicted += uint(fi.Size)
// Clean up key lock
shardIndex := getShardIndex(key)
d.keyLocks[shardIndex].Delete(key)
}
return evicted
}