steamcache2/vfs/disk/disk.go

// vfs/disk/disk.go
package disk

import (
	"fmt"
	"io"
	"os"
	"path/filepath"
	"s1d3sw1ped/steamcache2/steamcache/logger"
	"s1d3sw1ped/steamcache2/vfs"
	"s1d3sw1ped/steamcache2/vfs/locks"
	"s1d3sw1ped/steamcache2/vfs/lru"
	"s1d3sw1ped/steamcache2/vfs/vfserror"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/docker/go-units"
	"github.com/edsrzf/mmap-go"
)

// Ensure DiskFS implements VFS.
var _ vfs.VFS = (*DiskFS)(nil)

// DiskFS is a virtual file system that stores files on disk.
type DiskFS struct {
	root string

	info        map[string]*vfs.FileInfo
	capacity    int64
	size        int64
	mu          sync.RWMutex
	keyLocks    []sync.Map // Sharded lock pools for better concurrency
	LRU         *lru.LRUList[*vfs.FileInfo]
	timeUpdater *vfs.BatchedTimeUpdate // Batched time updates for better performance
}

// shardPath converts a Steam cache key to a sharded directory path to reduce inode pressure
func (d *DiskFS) shardPath(key string) string {
	if !strings.HasPrefix(key, "steam/") {
		return key
	}

	// Extract hash part
	hashPart := key[6:] // Remove "steam/" prefix

	if len(hashPart) < 4 {
		// For very short hashes, single level sharding
		if len(hashPart) >= 2 {
			shard1 := hashPart[:2]
			return filepath.Join("steam", shard1, hashPart)
		}
		return filepath.Join("steam", hashPart)
	}

	// Optimal 2-level sharding for Steam hashes (typically 40 chars)
	shard1 := hashPart[:2]  // First 2 chars
	shard2 := hashPart[2:4] // Next 2 chars
	return filepath.Join("steam", shard1, shard2, hashPart)
}

// New creates a new DiskFS.
func New(root string, capacity int64) *DiskFS {
	if capacity <= 0 {
		panic("disk capacity must be greater than 0")
	}

	// Create root directory if it doesn't exist
	os.MkdirAll(root, 0755)

	// Initialize sharded locks
	keyLocks := make([]sync.Map, locks.NumLockShards)

	d := &DiskFS{
		root:        root,
		info:        make(map[string]*vfs.FileInfo),
		capacity:    capacity,
		size:        0,
		keyLocks:    keyLocks,
		LRU:         lru.NewLRUList[*vfs.FileInfo](),
		timeUpdater: vfs.NewBatchedTimeUpdate(100 * time.Millisecond), // Update time every 100ms
	}

	d.init()
	return d
}

// init loads existing files from disk with ultra-fast lazy initialization
func (d *DiskFS) init() {
	tstart := time.Now()

	// Ultra-fast initialization: only scan directory structure, defer file stats
	d.scanDirectoriesOnly()

	// Start background size calculation in a separate goroutine
	go d.calculateSizeInBackground()

	logger.Logger.Info().
		Str("name", d.Name()).
		Str("root", d.root).
		Str("capacity", units.HumanSize(float64(d.capacity))).
		Str("size", units.HumanSize(float64(d.Size()))).
		Str("files", fmt.Sprint(len(d.info))).
		Str("duration", time.Since(tstart).String()).
		Msg("init")
}

// scanDirectoriesOnly performs ultra-fast directory structure scanning without file stats
func (d *DiskFS) scanDirectoriesOnly() {
	// Just ensure the root directory exists and is accessible
	// No file scanning during init - files will be discovered on-demand
	logger.Logger.Debug().
		Str("root", d.root).
		Msg("Directory structure scan completed (lazy file discovery enabled)")
}

// calculateSizeInBackground calculates the total size of all files in the background
func (d *DiskFS) calculateSizeInBackground() {
	tstart := time.Now()

	// Channel for collecting file information
	fileChan := make(chan fileSizeInfo, 1000)

	// Progress tracking
	var totalFiles int64
	var processedFiles int64
	progressTicker := time.NewTicker(2 * time.Second)
	defer progressTicker.Stop()

	// Wait group for workers
	var wg sync.WaitGroup

	// Start directory scanner
	wg.Add(1)
	go func() {
		defer wg.Done()
		defer close(fileChan)
		d.scanFilesForSize(d.root, fileChan, &totalFiles)
	}()

	// Collect results with progress reporting
	var totalSize int64

	// Use a separate goroutine to collect results
	done := make(chan struct{})
	go func() {
		defer close(done)
		for {
			select {
			case fi, ok := <-fileChan:
				if !ok {
					return
				}
				totalSize += fi.size
				processedFiles++
			case <-progressTicker.C:
				if totalFiles > 0 {
					logger.Logger.Debug().
						Int64("processed", processedFiles).
						Int64("total", totalFiles).
						Int64("size", totalSize).
						Float64("progress", float64(processedFiles)/float64(totalFiles)*100).
						Msg("Background size calculation progress")
				}
			}
		}
	}()

	// Wait for scanning to complete
	wg.Wait()
	<-done

	// Update the total size
	d.mu.Lock()
	d.size = totalSize
	d.mu.Unlock()

	logger.Logger.Info().
		Int64("files_scanned", processedFiles).
		Int64("total_size", totalSize).
		Str("duration", time.Since(tstart).String()).
		Msg("Background size calculation completed")
}

// fileSizeInfo represents a file found during size calculation
type fileSizeInfo struct {
	size int64
}

// scanFilesForSize performs recursive file scanning for size calculation only
func (d *DiskFS) scanFilesForSize(dirPath string, fileChan chan<- fileSizeInfo, totalFiles *int64) {
	// Use ReadDir for faster directory listing
	entries, err := os.ReadDir(dirPath)
	if err != nil {
		return
	}

	// Count files first for progress tracking
	fileCount := 0
	for _, entry := range entries {
		if !entry.IsDir() {
			fileCount++
		}
	}
	atomic.AddInt64(totalFiles, int64(fileCount))

	// Process entries concurrently with limited workers
	semaphore := make(chan struct{}, 16) // More workers for size calculation
	var wg sync.WaitGroup

	for _, entry := range entries {
		entryPath := filepath.Join(dirPath, entry.Name())

		if entry.IsDir() {
			// Recursively scan subdirectories
			wg.Add(1)
			go func(path string) {
				defer wg.Done()
				semaphore <- struct{}{}        // Acquire semaphore
				defer func() { <-semaphore }() // Release semaphore
				d.scanFilesForSize(path, fileChan, totalFiles)
			}(entryPath)
		} else {
			// Process file for size only
			wg.Add(1)
			go func(entry os.DirEntry) {
				defer wg.Done()
				semaphore <- struct{}{}        // Acquire semaphore
				defer func() { <-semaphore }() // Release semaphore

				// Get file info for size calculation
				info, err := entry.Info()
				if err != nil {
					return
				}

				// Send file size info
				fileChan <- fileSizeInfo{
					size: info.Size(),
				}
			}(entry)
		}
	}

	wg.Wait()
}

// Name returns the name of this VFS
func (d *DiskFS) Name() string {
	return "DiskFS"
}

// Size returns the current size
func (d *DiskFS) Size() int64 {
	d.mu.RLock()
	defer d.mu.RUnlock()
	return d.size
}

// Capacity returns the maximum capacity
func (d *DiskFS) Capacity() int64 {
	return d.capacity
}

// getKeyLock returns a lock for the given key using sharding
func (d *DiskFS) getKeyLock(key string) *sync.RWMutex {
	return locks.GetKeyLock(d.keyLocks, key)
}

// Create creates a new file
func (d *DiskFS) Create(key string, size int64) (io.WriteCloser, error) {
	if key == "" {
		return nil, vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return nil, vfserror.ErrInvalidKey
	}

	// Sanitize key to prevent path traversal
	key = filepath.Clean(key)
	key = strings.ReplaceAll(key, "\\", "/")
	if strings.Contains(key, "..") {
		return nil, vfserror.ErrInvalidKey
	}

	keyMu := d.getKeyLock(key)
	keyMu.Lock()
	defer keyMu.Unlock()

	d.mu.Lock()
	// Check if file already exists and handle overwrite
	if fi, exists := d.info[key]; exists {
		d.size -= fi.Size
		d.LRU.Remove(key)
		delete(d.info, key)
	}

	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	d.mu.Unlock()

	path = strings.ReplaceAll(path, "\\", "/")
	dir := filepath.Dir(path)
	if err := os.MkdirAll(dir, 0755); err != nil {
		return nil, err
	}

	file, err := os.Create(path)
	if err != nil {
		return nil, err
	}

	fi := vfs.NewFileInfo(key, size)
	d.mu.Lock()
	d.info[key] = fi
	d.LRU.Add(key, fi)
	// Initialize access time with current time
	fi.UpdateAccessBatched(d.timeUpdater)
	// Add to size for new files (not discovered files)
	d.size += size
	d.mu.Unlock()

	return &diskWriteCloser{
		file:         file,
		disk:         d,
		key:          key,
		declaredSize: size,
	}, nil
}

// diskWriteCloser implements io.WriteCloser for disk files with size adjustment
type diskWriteCloser struct {
	file         *os.File
	disk         *DiskFS
	key          string
	declaredSize int64
}

func (dwc *diskWriteCloser) Write(p []byte) (n int, err error) {
	return dwc.file.Write(p)
}

func (dwc *diskWriteCloser) Close() error {
	// Get the actual file size
	stat, err := dwc.file.Stat()
	if err != nil {
		dwc.file.Close()
		return err
	}

	actualSize := stat.Size()

	// Update the size in FileInfo if it differs from declared size
	dwc.disk.mu.Lock()
	if fi, exists := dwc.disk.info[dwc.key]; exists {
		sizeDiff := actualSize - fi.Size
		fi.Size = actualSize
		dwc.disk.size += sizeDiff
	}
	dwc.disk.mu.Unlock()

	return dwc.file.Close()
}

// Open opens a file for reading with lazy discovery
func (d *DiskFS) Open(key string) (io.ReadCloser, error) {
	if key == "" {
		return nil, vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return nil, vfserror.ErrInvalidKey
	}

	// Sanitize key to prevent path traversal
	key = filepath.Clean(key)
	key = strings.ReplaceAll(key, "\\", "/")
	if strings.Contains(key, "..") {
		return nil, vfserror.ErrInvalidKey
	}

	// First, try to get the file info
	d.mu.RLock()
	fi, exists := d.info[key]
	d.mu.RUnlock()

	if !exists {
		// Try lazy discovery
		var err error
		fi, err = d.Stat(key)
		if err != nil {
			return nil, err
		}
	}

	// Update access time and LRU
	d.mu.Lock()
	fi.UpdateAccessBatched(d.timeUpdater)
	d.LRU.MoveToFront(key, d.timeUpdater)
	d.mu.Unlock()

	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	path = strings.ReplaceAll(path, "\\", "/")

	file, err := os.Open(path)
	if err != nil {
		return nil, err
	}

	// Use memory mapping for large files (>1MB) to improve performance
	const mmapThreshold = 1024 * 1024 // 1MB
	if fi.Size > mmapThreshold {
		// Close the regular file handle
		file.Close()

		// Try memory mapping
		mmapFile, err := os.Open(path)
		if err != nil {
			return nil, err
		}

		mapped, err := mmap.Map(mmapFile, mmap.RDONLY, 0)
		if err != nil {
			mmapFile.Close()
			// Fallback to regular file reading
			return os.Open(path)
		}

		return &mmapReadCloser{
			data:   mapped,
			file:   mmapFile,
			offset: 0,
		}, nil
	}

	return file, nil
}

// mmapReadCloser implements io.ReadCloser for memory-mapped files
type mmapReadCloser struct {
	data   mmap.MMap
	file   *os.File
	offset int
}

func (m *mmapReadCloser) Read(p []byte) (n int, err error) {
	if m.offset >= len(m.data) {
		return 0, io.EOF
	}

	n = copy(p, m.data[m.offset:])
	m.offset += n
	return n, nil
}

func (m *mmapReadCloser) Close() error {
	m.data.Unmap()
	return m.file.Close()
}

// Delete removes a file
func (d *DiskFS) Delete(key string) error {
	if key == "" {
		return vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return vfserror.ErrInvalidKey
	}

	keyMu := d.getKeyLock(key)
	keyMu.Lock()
	defer keyMu.Unlock()

	d.mu.Lock()
	fi, exists := d.info[key]
	if !exists {
		d.mu.Unlock()
		return vfserror.ErrNotFound
	}
	d.size -= fi.Size
	d.LRU.Remove(key)
	delete(d.info, key)
	d.mu.Unlock()

	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	path = strings.ReplaceAll(path, "\\", "/")

	err := os.Remove(path)
	if err != nil {
		return err
	}

	return nil
}

// Stat returns file information with lazy discovery
func (d *DiskFS) Stat(key string) (*vfs.FileInfo, error) {
	if key == "" {
		return nil, vfserror.ErrInvalidKey
	}
	if key[0] == '/' {
		return nil, vfserror.ErrInvalidKey
	}

	keyMu := d.getKeyLock(key)

	// First, try to get the file info with read lock
	keyMu.RLock()
	d.mu.RLock()
	if fi, ok := d.info[key]; ok {
		d.mu.RUnlock()
		keyMu.RUnlock()
		return fi, nil
	}
	d.mu.RUnlock()
	keyMu.RUnlock()

	// Lazy discovery: check if file exists on disk and index it
	shardedPath := d.shardPath(key)
	path := filepath.Join(d.root, shardedPath)
	path = strings.ReplaceAll(path, "\\", "/")

	info, err := os.Stat(path)
	if err != nil {
		return nil, vfserror.ErrNotFound
	}

	// File exists, add it to the index with write lock
	keyMu.Lock()
	defer keyMu.Unlock()

	// Double-check after acquiring write lock
	d.mu.Lock()
	if fi, ok := d.info[key]; ok {
		d.mu.Unlock()
		return fi, nil
	}

	// Create and add file info
	fi := vfs.NewFileInfoFromOS(info, key)
	d.info[key] = fi
	d.LRU.Add(key, fi)
	fi.UpdateAccessBatched(d.timeUpdater)
	// Note: Don't add to d.size here as it's being calculated in background
	// The background calculation will handle the total size
	d.mu.Unlock()

	return fi, nil
}

// EvictLRU evicts the least recently used files to free up space
func (d *DiskFS) EvictLRU(bytesNeeded uint) uint {
	d.mu.Lock()
	defer d.mu.Unlock()

	var evicted uint

	// Evict from LRU list until we free enough space
	for d.size > d.capacity-int64(bytesNeeded) && d.LRU.Len() > 0 {
		// Get the least recently used item
		elem := d.LRU.Back()
		if elem == nil {
			break
		}

		fi := elem.Value.(*vfs.FileInfo)
		key := fi.Key

		// Remove from LRU
		d.LRU.Remove(key)

		// Remove from map
		delete(d.info, key)

		// Remove file from disk
		shardedPath := d.shardPath(key)
		path := filepath.Join(d.root, shardedPath)
		path = strings.ReplaceAll(path, "\\", "/")

		if err := os.Remove(path); err != nil {
			// Log error but continue
			continue
		}

		// Update size
		d.size -= fi.Size
		evicted += uint(fi.Size)

		// Clean up key lock
		shardIndex := locks.GetShardIndex(key)
		d.keyLocks[shardIndex].Delete(key)
	}

	return evicted
}

// EvictBySize evicts files by size (ascending = smallest first, descending = largest first)
func (d *DiskFS) EvictBySize(bytesNeeded uint, ascending bool) uint {
	d.mu.Lock()
	defer d.mu.Unlock()

	var evicted uint
	var candidates []*vfs.FileInfo

	// Collect all files
	for _, fi := range d.info {
		candidates = append(candidates, fi)
	}

	// Sort by size
	sort.Slice(candidates, func(i, j int) bool {
		if ascending {
			return candidates[i].Size < candidates[j].Size
		}
		return candidates[i].Size > candidates[j].Size
	})

	// Evict files until we free enough space
	for _, fi := range candidates {
		if d.size <= d.capacity-int64(bytesNeeded) {
			break
		}

		key := fi.Key

		// Remove from LRU
		d.LRU.Remove(key)

		// Remove from map
		delete(d.info, key)

		// Remove file from disk
		shardedPath := d.shardPath(key)
		path := filepath.Join(d.root, shardedPath)
		path = strings.ReplaceAll(path, "\\", "/")

		if err := os.Remove(path); err != nil {
			continue
		}

		// Update size
		d.size -= fi.Size
		evicted += uint(fi.Size)

		// Clean up key lock
		shardIndex := locks.GetShardIndex(key)
		d.keyLocks[shardIndex].Delete(key)
	}

	return evicted
}

// EvictFIFO evicts files using FIFO (oldest creation time first)
func (d *DiskFS) EvictFIFO(bytesNeeded uint) uint {
	d.mu.Lock()
	defer d.mu.Unlock()

	var evicted uint
	var candidates []*vfs.FileInfo

	// Collect all files
	for _, fi := range d.info {
		candidates = append(candidates, fi)
	}

	// Sort by creation time (oldest first)
	sort.Slice(candidates, func(i, j int) bool {
		return candidates[i].CTime.Before(candidates[j].CTime)
	})

	// Evict oldest files until we free enough space
	for _, fi := range candidates {
		if d.size <= d.capacity-int64(bytesNeeded) {
			break
		}

		key := fi.Key

		// Remove from LRU
		d.LRU.Remove(key)

		// Remove from map
		delete(d.info, key)

		// Remove file from disk
		shardedPath := d.shardPath(key)
		path := filepath.Join(d.root, shardedPath)
		path = strings.ReplaceAll(path, "\\", "/")

		if err := os.Remove(path); err != nil {
			continue
		}

		// Update size
		d.size -= fi.Size
		evicted += uint(fi.Size)

		// Clean up key lock
		shardIndex := locks.GetShardIndex(key)
		d.keyLocks[shardIndex].Delete(key)
	}

	return evicted
}