steamcache2/vfs/disk/disk.go

// vfs/disk/disk.go
package disk

import (
	"container/list"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"s1d3sw1ped/steamcache2/steamcache/logger"
	"s1d3sw1ped/steamcache2/vfs"
	"s1d3sw1ped/steamcache2/vfs/vfserror"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/docker/go-units"
	"github.com/edsrzf/mmap-go"
)

// Ensure DiskFS implements VFS.
var _ vfs.VFS = (*DiskFS)(nil)

// DiskFS is a virtual file system that stores files on disk.
type DiskFS struct {
	root string

	info        map[string]*vfs.FileInfo
	capacity    int64
	size        int64
	mu          sync.RWMutex
	keyLocks    []sync.Map // Sharded lock pools for better concurrency
	LRU         *lruList
	timeUpdater *vfs.BatchedTimeUpdate // Batched time updates for better performance
}

// Number of lock shards for reducing contention
const numLockShards = 32

// lruList for time-decayed LRU eviction
type lruList struct {
	list *list.List
	elem map[string]*list.Element
}

func newLruList() *lruList {
	return &lruList{
		list: list.New(),
		elem: make(map[string]*list.Element),
	}
}

func (l *lruList) Add(key string, fi *vfs.FileInfo) {
	elem := l.list.PushFront(fi)
	l.elem[key] = elem
}

func (l *lruList) MoveToFront(key string, timeUpdater *vfs.BatchedTimeUpdate) {
	if elem, exists := l.elem[key]; exists {
		l.list.MoveToFront(elem)
		// Update the FileInfo in the element with new access time
		if fi := elem.Value.(*vfs.FileInfo); fi != nil {
			fi.UpdateAccessBatched(timeUpdater)
		}
	}
}

func (l *lruList) Remove(key string) *vfs.FileInfo {
	if elem, exists := l.elem[key]; exists {
		delete(l.elem, key)
		if fi := l.list.Remove(elem).(*vfs.FileInfo); fi != nil {
			return fi
		}
	}
	return nil
}

func (l *lruList) Len() int {
	return l.list.Len()
}

// shardPath converts a Steam cache key to a sharded directory path to reduce inode pressure
func (d *DiskFS) shardPath(key string) string {
	if !strings.HasPrefix(key, "steam/") {
		return key
	}

	// Extract hash part
	hashPart := key[6:] // Remove "steam/" prefix

	if len(hashPart) < 4 {
		// For very short hashes, single level sharding
		if len(hashPart) >= 2 {
			shard1 := hashPart[:2]
			return filepath.Join("steam", shard1, hashPart)
		}
		return filepath.Join("steam", hashPart)
	}

	// Optimal 2-level sharding for Steam hashes (typically 40 chars)
	shard1 := hashPart[:2]  // First 2 chars
	shard2 := hashPart[2:4] // Next 2 chars
	return filepath.Join("steam", shard1, shard2, hashPart)
}

// extractKeyFromPath reverses the sharding logic to get the original key from a sharded path
func (d *DiskFS) extractKeyFromPath(path string) string {
	// Fast path: if no slashes, it's not a sharded path
	if !strings.Contains(path, "/") {
		return path
	}

	parts := strings.SplitN(path, "/", 5)
	numParts := len(parts)

	if numParts >= 4 && parts[0] == "steam" {
		lastThree := parts[numParts-3:]
		shard1 := lastThree[0]
		shard2 := lastThree[1]
		filename := lastThree[2]

		// Verify sharding is correct
		if len(filename) >= 4 && filename[:2] == shard1 && filename[2:4] == shard2 {
			return "steam/" + filename
		}
	}

	// Handle single-level sharding for short hashes: steam/shard1/filename
	if numParts >= 3 && parts[0] == "steam" {
		lastTwo := parts[numParts-2:]
		shard1 := lastTwo[0]
		filename := lastTwo[1]

		if len(filename) >= 2 && filename[:2] == shard1 {
			return "steam/" + filename
		}
	}

	// Fallback: return as-is for any unrecognized format
	return path
}

// New creates a new DiskFS.
func New(root string, capacity int64) *DiskFS {
	if capacity <= 0 {
		panic("disk capacity must be greater than 0")
	}

	// Create root directory if it doesn't exist
	os.MkdirAll(root, 0755)

	// Initialize sharded locks
	keyLocks := make([]sync.Map, numLockShards)

	d := &DiskFS{
		root:        root,
		info:        make(map[string]*vfs.FileInfo),
		capacity:    capacity,
		size:        0,
		keyLocks:    keyLocks,
		LRU:         newLruList(),
		timeUpdater: vfs.NewBatchedTimeUpdate(100 * time.Millisecond), // Update time every 100ms
	}

	d.init()
	return d
}

// init loads existing files from disk
func (d *DiskFS) init() {
	tstart := time.Now()

	// Use concurrent directory scanning for blazing fast initialization
	fileInfos := d.scanDirectoryConcurrently()

	// Batch process all files to minimize lock contention
	d.batchProcessFiles(fileInfos)

	logger.Logger.Info().
		Str("name", d.Name()).
		Str("root", d.root).
		Str("capacity", units.HumanSize(float64(d.capacity))).
		Str("size", units.HumanSize(float64(d.Size()))).
		Str("files", fmt.Sprint(len(d.info))).
		Str("duration", time.Since(tstart).String()).
		Msg("init")
}

// fileInfo represents a file found during directory scanning
type fileInfo struct {
	path    string
	relPath string
	key     string
	size    int64
	modTime time.Time
	isDepot bool
}

// scanDirectoryConcurrently performs fast concurrent directory scanning
func (d *DiskFS) scanDirectoryConcurrently() []fileInfo {
	// Channel for collecting file information
	fileChan := make(chan fileInfo, 1000)

	// Progress tracking
	var totalFiles int64
	var processedFiles int64
	progressTicker := time.NewTicker(500 * time.Millisecond)
	defer progressTicker.Stop()

	// Wait group for workers
	var wg sync.WaitGroup

	// Start directory scanner
	wg.Add(1)
	go func() {
		defer wg.Done()
		defer close(fileChan)
		d.scanDirectoryRecursive(d.root, fileChan, &totalFiles)
	}()

	// Collect results with progress reporting
	var fileInfos []fileInfo

	// Use a separate goroutine to collect results
	done := make(chan struct{})
	go func() {
		defer close(done)
		for {
			select {
			case fi, ok := <-fileChan:
				if !ok {
					return
				}
				fileInfos = append(fileInfos, fi)
				processedFiles++
			case <-progressTicker.C:
				if totalFiles > 0 {
					logger.Logger.Debug().
						Int64("processed", processedFiles).
						Int64("total", totalFiles).
						Float64("progress", float64(processedFiles)/float64(totalFiles)*100).
						Msg("Directory scan progress")
				}
			}
		}
	}()

	// Wait for scanning to complete
	wg.Wait()
	<-done

	logger.Logger.Info().
		Int64("files_scanned", processedFiles).
		Msg("Directory scan completed")

	return fileInfos
}

// scanDirectoryRecursive performs recursive directory scanning with early termination
func (d *DiskFS) scanDirectoryRecursive(dirPath string, fileChan chan<- fileInfo, totalFiles *int64) {
	// Use ReadDir for faster directory listing (no stat calls)
	entries, err := os.ReadDir(dirPath)
	if err != nil {
		return
	}

	// Count files first for progress tracking
	fileCount := 0
	for _, entry := range entries {
		if !entry.IsDir() {
			fileCount++
		}
	}
	atomic.AddInt64(totalFiles, int64(fileCount))

	// Process entries concurrently with limited workers
	semaphore := make(chan struct{}, 8) // Limit concurrent processing
	var wg sync.WaitGroup

	for _, entry := range entries {
		entryPath := filepath.Join(dirPath, entry.Name())

		if entry.IsDir() {
			// Recursively scan subdirectories
			wg.Add(1)
			go func(path string) {
				defer wg.Done()
				semaphore <- struct{}{}        // Acquire semaphore
				defer func() { <-semaphore }() // Release semaphore
				d.scanDirectoryRecursive(path, fileChan, totalFiles)
			}(entryPath)
		} else {
			// Process file with lazy loading
			wg.Add(1)
			go func(path string, name string, entry os.DirEntry) {
				defer wg.Done()
				semaphore <- struct{}{}        // Acquire semaphore
				defer func() { <-semaphore }() // Release semaphore

				// Extract relative path and key first (no stat call)
				rootPath := d.root
				rootPath = strings.TrimPrefix(rootPath, "./")
				relPath := strings.ReplaceAll(path[len(rootPath)+1:], "\\", "/")
				key := d.extractKeyFromPath(relPath)

				// Get file info only when needed (lazy loading)
				info, err := entry.Info()
				if err != nil {
					return
				}

				// Send file info
				fileChan <- fileInfo{
					path:    path,
					relPath: relPath,
					key:     key,
					size:    info.Size(),
					modTime: info.ModTime(),
					isDepot: false, // No longer tracking depot files
				}
			}(entryPath, entry.Name(), entry)
		}
	}

	wg.Wait()
}

// batchProcessFiles processes all files in batches to minimize lock contention
func (d *DiskFS) batchProcessFiles(fileInfos []fileInfo) {
	const batchSize = 1000 // Process files in batches

	// Sort files by key for consistent ordering
	sort.Slice(fileInfos, func(i, j int) bool {
		return fileInfos[i].key < fileInfos[j].key
	})

	// Process in batches with progress reporting
	totalBatches := (len(fileInfos) + batchSize - 1) / batchSize
	for i := 0; i < len(fileInfos); i += batchSize {
		end := i + batchSize
		if end > len(fileInfos) {
			end = len(fileInfos)
		}

		batch := fileInfos[i:end]
		d.processBatch(batch)

		// Log progress every 10 batches
		if (i/batchSize+1)%10 == 0 || i+batchSize >= len(fileInfos) {
			logger.Logger.Debug().
				Int("batch", i/batchSize+1).
				Int("total_batches", totalBatches).
				Int("files_processed", end).
				Int("total_files", len(fileInfos)).
				Msg("Batch processing progress")
		}
	}
}

// processBatch processes a batch of files with a single lock acquisition
func (d *DiskFS) processBatch(batch []fileInfo) {
	d.mu.Lock()
	defer d.mu.Unlock()

	for _, fi := range batch {
		// Create FileInfo from batch data
		fileInfo := &vfs.FileInfo{
			Key:         fi.key,
			Size:        fi.size,
			CTime:       fi.modTime,
			ATime:       fi.modTime,
			AccessCount: 1,
		}

		// Add to maps
		d.info[fi.key] = fileInfo
		d.LRU.Add(fi.key, fileInfo)

		// Initialize access time
		fileInfo.UpdateAccessBatched(d.timeUpdater)

		// Update total size
		d.size += fi.size
	}
}

// Name returns the name of this VFS
func (d *DiskFS) Name() string {
	return "DiskFS"
}

// Size returns the current size
func (d *DiskFS) Size() int64 {
	d.mu.RLock()
	defer d.mu.RUnlock()
	return d.size
}

// Capacity returns the maximum capacity
func (d *DiskFS) Capacity() int64 {
	return d.capacity
}

// getShardIndex returns the shard index for a given key
func getShardIndex(key string) int {
	// Use FNV-1a hash for good distribution
	var h uint32 = 2166136261 // FNV offset basis
	for i := 0; i < len(key); i++ {
		h ^= uint32(key[i])
		h *= 16777619 // FNV prime
	}
	return int(h % numLockShards)
}

// getKeyLock returns a lock for the given key using sharding
func (d *DiskFS) getKeyLock(key string) *sync.RWMutex {
	shardIndex := getShardIndex(key)
	shard := &d.keyLocks[shardIndex]

	keyLock, _ := shard.LoadOrStore(key, &sync.RWMutex{})
	return keyLock.(*sync.RWMutex)
}

// Create creates a new file
func (d *DiskFS) Create(key string, size int64) (io.WriteCloser, error) {
	if key == "" {
		return nil, vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return nil, vfserror.ErrInvalidKey
	}

	// Sanitize key to prevent path traversal
	key = filepath.Clean(key)
	key = strings.ReplaceAll(key, "\\", "/")
	if strings.Contains(key, "..") {
		return nil, vfserror.ErrInvalidKey
	}

	keyMu := d.getKeyLock(key)
	keyMu.Lock()
	defer keyMu.Unlock()

	d.mu.Lock()
	// Check if file already exists and handle overwrite
	if fi, exists := d.info[key]; exists {
		d.size -= fi.Size
		d.LRU.Remove(key)
		delete(d.info, key)
	}

	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	d.mu.Unlock()

	path = strings.ReplaceAll(path, "\\", "/")
	dir := filepath.Dir(path)
	if err := os.MkdirAll(dir, 0755); err != nil {
		return nil, err
	}

	file, err := os.Create(path)
	if err != nil {
		return nil, err
	}

	fi := vfs.NewFileInfo(key, size)
	d.mu.Lock()
	d.info[key] = fi
	d.LRU.Add(key, fi)
	// Initialize access time with current time
	fi.UpdateAccessBatched(d.timeUpdater)
	d.size += size
	d.mu.Unlock()

	return &diskWriteCloser{
		file:         file,
		disk:         d,
		key:          key,
		declaredSize: size,
	}, nil
}

// diskWriteCloser implements io.WriteCloser for disk files with size adjustment
type diskWriteCloser struct {
	file         *os.File
	disk         *DiskFS
	key          string
	declaredSize int64
}

func (dwc *diskWriteCloser) Write(p []byte) (n int, err error) {
	return dwc.file.Write(p)
}

func (dwc *diskWriteCloser) Close() error {
	// Get the actual file size
	stat, err := dwc.file.Stat()
	if err != nil {
		dwc.file.Close()
		return err
	}

	actualSize := stat.Size()

	// Update the size in FileInfo if it differs from declared size
	dwc.disk.mu.Lock()
	if fi, exists := dwc.disk.info[dwc.key]; exists {
		sizeDiff := actualSize - fi.Size
		fi.Size = actualSize
		dwc.disk.size += sizeDiff
	}
	dwc.disk.mu.Unlock()

	return dwc.file.Close()
}

// Open opens a file for reading
func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
	if key == "" {
		return nil, vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return nil, vfserror.ErrInvalidKey
	}

	// Sanitize key to prevent path traversal
	key = filepath.Clean(key)
	key = strings.ReplaceAll(key, "\\", "/")
	if strings.Contains(key, "..") {
		return nil, vfserror.ErrInvalidKey
	}

	keyMu := d.getKeyLock(key)
	keyMu.RLock()
	defer keyMu.RUnlock()

	d.mu.Lock()
	fi, exists := d.info[key]
	if !exists {
		d.mu.Unlock()
		return nil, vfserror.ErrNotFound
	}
	fi.UpdateAccessBatched(d.timeUpdater)
	d.LRU.MoveToFront(key, d.timeUpdater)
	d.mu.Unlock()

	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	path = strings.ReplaceAll(path, "\\", "/")

	file, err := os.Open(path)
	if err != nil {
		return nil, err
	}

	// Use memory mapping for large files (>1MB) to improve performance
	const mmapThreshold = 1024 * 1024 // 1MB
	if fi.Size > mmapThreshold {
		// Close the regular file handle
		file.Close()

		// Try memory mapping
		mmapFile, err := os.Open(path)
		if err != nil {
			return nil, err
		}

		mapped, err := mmap.Map(mmapFile, mmap.RDONLY, 0)
		if err != nil {
			mmapFile.Close()
			// Fallback to regular file reading
			return os.Open(path)
		}

		return &mmapReadCloser{
			data:   mapped,
			file:   mmapFile,
			offset: 0,
		}, nil
	}

	return file, nil
}

// mmapReadCloser implements io.ReadCloser for memory-mapped files
type mmapReadCloser struct {
	data   mmap.MMap
	file   *os.File
	offset int
}

func (m *mmapReadCloser) Read(p []byte) (n int, err error) {
	if m.offset >= len(m.data) {
		return 0, io.EOF
	}

	n = copy(p, m.data[m.offset:])
	m.offset += n
	return n, nil
}

func (m *mmapReadCloser) Close() error {
	m.data.Unmap()
	return m.file.Close()
}

// Delete removes a file
func (d *DiskFS) Delete(key string) error {
	if key == "" {
		return vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return vfserror.ErrInvalidKey
	}

	keyMu := d.getKeyLock(key)
	keyMu.Lock()
	defer keyMu.Unlock()

	d.mu.Lock()
	fi, exists := d.info[key]
	if !exists {
		d.mu.Unlock()
		return vfserror.ErrNotFound
	}
	d.size -= fi.Size
	d.LRU.Remove(key)
	delete(d.info, key)
	d.mu.Unlock()

	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	path = strings.ReplaceAll(path, "\\", "/")

	err := os.Remove(path)
	if err != nil {
		return err
	}

	return nil
}

// Stat returns file information
func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
	if key == "" {
		return nil, vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return nil, vfserror.ErrInvalidKey
	}

	keyMu := d.getKeyLock(key)
	keyMu.RLock()
	defer keyMu.RUnlock()

	d.mu.RLock()
	defer d.mu.RUnlock()

	if fi, ok := d.info[key]; ok {
		return fi, nil
	}

	// Check if file exists on disk but wasn't indexed (for migration)
	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	path = strings.ReplaceAll(path, "\\", "/")

	if info, err := os.Stat(path); err == nil {
		// File exists in sharded location but not indexed, re-index it
		fi := vfs.NewFileInfoFromOS(info, key)
		// We can't modify the map here because we're in a read lock
		// This is a simplified version - in production you'd need to handle this properly
		return fi, nil
	}

	return nil, vfserror.ErrNotFound
}

// EvictLRU evicts the least recently used files to free up space
func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
	d.mu.Lock()
	defer d.mu.Unlock()

	var evicted uint

	// Evict from LRU list until we free enough space
	for d.size > d.capacity-int64(bytesNeeded) && d.LRU.Len() > 0 {
		// Get the least recently used item
		elem := d.LRU.list.Back()
		if elem == nil {
			break
		}

		fi := elem.Value.(*vfs.FileInfo)
		key := fi.Key

		// Remove from LRU
		d.LRU.Remove(key)

		// Remove from map
		delete(d.info, key)

		// Remove file from disk
		shardedPath := d.shardPath(key)
		path := filepath.Join(d.root, shardedPath)
		path = strings.ReplaceAll(path, "\\", "/")

		if err := os.Remove(path); err != nil {
			// Log error but continue
			continue
		}

		// Update size
		d.size -= fi.Size
		evicted += uint(fi.Size)

		// Clean up key lock
		shardIndex := getShardIndex(key)
		d.keyLocks[shardIndex].Delete(key)
	}

	return evicted
}

// EvictBySize evicts files by size (ascending = smallest first, descending = largest first)
func (d *DiskFS) EvictBySize(bytesNeeded uint, ascending bool) uint {
	d.mu.Lock()
	defer d.mu.Unlock()

	var evicted uint
	var candidates []*vfs.FileInfo

	// Collect all files
	for _, fi := range d.info {
		candidates = append(candidates, fi)
	}

	// Sort by size
	sort.Slice(candidates, func(i, j int) bool {
		if ascending {
			return candidates[i].Size < candidates[j].Size
		}
		return candidates[i].Size > candidates[j].Size
	})

	// Evict files until we free enough space
	for _, fi := range candidates {
		if d.size <= d.capacity-int64(bytesNeeded) {
			break
		}

		key := fi.Key

		// Remove from LRU
		d.LRU.Remove(key)

		// Remove from map
		delete(d.info, key)

		// Remove file from disk
		shardedPath := d.shardPath(key)
		path := filepath.Join(d.root, shardedPath)
		path = strings.ReplaceAll(path, "\\", "/")

		if err := os.Remove(path); err != nil {
			continue
		}

		// Update size
		d.size -= fi.Size
		evicted += uint(fi.Size)

		// Clean up key lock
		shardIndex := getShardIndex(key)
		d.keyLocks[shardIndex].Delete(key)
	}

	return evicted
}

// EvictFIFO evicts files using FIFO (oldest creation time first)
func (d *DiskFS) EvictFIFO(bytesNeeded uint) uint {
	d.mu.Lock()
	defer d.mu.Unlock()

	var evicted uint
	var candidates []*vfs.FileInfo

	// Collect all files
	for _, fi := range d.info {
		candidates = append(candidates, fi)
	}

	// Sort by creation time (oldest first)
	sort.Slice(candidates, func(i, j int) bool {
		return candidates[i].CTime.Before(candidates[j].CTime)
	})

	// Evict oldest files until we free enough space
	for _, fi := range candidates {
		if d.size <= d.capacity-int64(bytesNeeded) {
			break
		}

		key := fi.Key

		// Remove from LRU
		d.LRU.Remove(key)

		// Remove from map
		delete(d.info, key)

		// Remove file from disk
		shardedPath := d.shardPath(key)
		path := filepath.Join(d.root, shardedPath)
		path = strings.ReplaceAll(path, "\\", "/")

		if err := os.Remove(path); err != nil {
			continue
		}

		// Update size
		d.size -= fi.Size
		evicted += uint(fi.Size)

		// Clean up key lock
		shardIndex := getShardIndex(key)
		d.keyLocks[shardIndex].Delete(key)
	}

	return evicted
}