Enhance garbage collection and caching functionality
All checks were successful
PR Check / check-and-test (pull_request) Successful in 21s

- Updated .gitignore to include all .exe files and ensure .smashignore is tracked.
- Expanded README.md with advanced configuration options for garbage collection algorithms, detailing available algorithms and use cases.
- Modified launch.json to include memory and disk garbage collection flags for better configuration.
- Refactored root.go to introduce memoryGC and diskGC flags for garbage collection algorithms.
- Implemented hash extraction and verification in steamcache.go to ensure data integrity during caching.
- Added new tests in steamcache_test.go for hash extraction and verification, ensuring correctness of caching behavior.
- Enhanced garbage collection strategies in gc.go, introducing LFU, FIFO, Largest, Smallest, and Hybrid algorithms with corresponding metrics.
- Updated caching logic to conditionally cache responses based on hash verification results.
This commit is contained in:
2025-07-19 02:27:04 -05:00
parent 00792d87a5
commit 163e64790c
9 changed files with 1037 additions and 79 deletions

View File

@@ -3,17 +3,23 @@ package steamcache
import (
"context"
"crypto/sha1"
"encoding/hex"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
"s1d3sw1ped/SteamCache2/steamcache/logger"
"s1d3sw1ped/SteamCache2/vfs"
"s1d3sw1ped/SteamCache2/vfs/cache"
"s1d3sw1ped/SteamCache2/vfs/disk"
"s1d3sw1ped/SteamCache2/vfs/gc"
"s1d3sw1ped/SteamCache2/vfs/memory"
"sort"
"strings"
"sync"
"time"
@@ -24,6 +30,14 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// min returns the minimum of two integers
func min(a, b int) int {
if a < b {
return a
}
return b
}
var (
requestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
@@ -41,15 +55,97 @@ var (
[]string{"status"},
)
responseTime = promauto.NewHistogram(
responseTime = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "response_time_seconds",
Help: "Response time in seconds",
Buckets: prometheus.DefBuckets,
},
[]string{"cache_status"},
)
)
// hashVerificationTotal tracks hash verification attempts
var hashVerificationTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "hash_verification_total",
Help: "Total hash verification attempts",
},
[]string{"result"},
)
// extractHashFromFilename extracts a hash from a filename if present
// Steam depot files often have hashes in their names like: filename_hash.ext
func extractHashFromFilename(filename string) (string, bool) {
// Common patterns for Steam depot files with hashes
patterns := []*regexp.Regexp{
regexp.MustCompile(`^([a-fA-F0-9]{40})$`), // Standalone SHA1 hash (40 hex chars)
regexp.MustCompile(`^([a-fA-F0-9]{40})\.`), // SHA1 hash with extension
}
for _, pattern := range patterns {
if matches := pattern.FindStringSubmatch(filename); len(matches) > 1 {
return strings.ToLower(matches[1]), true
}
}
// Debug: log when we don't find a hash pattern
if strings.Contains(filename, "manifest") {
logger.Logger.Debug().
Str("filename", filename).
Msg("No hash pattern found in manifest filename")
}
return "", false
}
// calculateFileHash calculates the SHA1 hash of the given data
func calculateFileHash(data []byte) string {
hash := sha1.Sum(data)
return hex.EncodeToString(hash[:])
}
// calculateResponseHash calculates the SHA1 hash of the full HTTP response
func calculateResponseHash(resp *http.Response, bodyData []byte) string {
hash := sha1.New()
// Include status line
statusLine := fmt.Sprintf("HTTP/1.1 %d %s\n", resp.StatusCode, resp.Status)
hash.Write([]byte(statusLine))
// Include headers (sorted for consistency)
headers := make([]string, 0, len(resp.Header))
for key, values := range resp.Header {
for _, value := range values {
headers = append(headers, fmt.Sprintf("%s: %s\n", key, value))
}
}
sort.Strings(headers)
for _, header := range headers {
hash.Write([]byte(header))
}
// Include empty line between headers and body
hash.Write([]byte("\n"))
// Include body
hash.Write(bodyData)
return hex.EncodeToString(hash.Sum(nil))
}
// verifyFileHash verifies that the file content matches the expected hash
func verifyFileHash(data []byte, expectedHash string) bool {
actualHash := calculateFileHash(data)
return strings.EqualFold(actualHash, expectedHash)
}
// verifyResponseHash verifies that the full HTTP response matches the expected hash
func verifyResponseHash(resp *http.Response, bodyData []byte, expectedHash string) bool {
actualHash := calculateResponseHash(resp, bodyData)
return strings.EqualFold(actualHash, expectedHash)
}
type SteamCache struct {
address string
upstream string
@@ -68,7 +164,7 @@ type SteamCache struct {
wg sync.WaitGroup
}
func New(address string, memorySize string, diskSize string, diskPath, upstream string) *SteamCache {
func New(address string, memorySize string, diskSize string, diskPath, upstream, memoryGC, diskGC string) *SteamCache {
memorysize, err := units.FromHumanSize(memorySize)
if err != nil {
panic(err)
@@ -80,21 +176,29 @@ func New(address string, memorySize string, diskSize string, diskPath, upstream
}
c := cache.New(
gc.PromotionDecider,
gc.AdaptivePromotionDeciderFunc,
)
var m *memory.MemoryFS
var mgc *gc.GCFS
if memorysize > 0 {
m = memory.New(memorysize)
mgc = gc.New(m, gc.LRUGC)
memoryGCAlgo := gc.GCAlgorithm(memoryGC)
if memoryGCAlgo == "" {
memoryGCAlgo = gc.LRU // default to LRU
}
mgc = gc.New(m, gc.GetGCAlgorithm(memoryGCAlgo))
}
var d *disk.DiskFS
var dgc *gc.GCFS
if disksize > 0 {
d = disk.New(diskPath, disksize)
dgc = gc.New(d, gc.LRUGC)
diskGCAlgo := gc.GCAlgorithm(diskGC)
if diskGCAlgo == "" {
diskGCAlgo = gc.LRU // default to LRU
}
dgc = gc.New(d, gc.GetGCAlgorithm(diskGCAlgo))
}
// configure the cache to match the specified mode (memory only, disk only, or memory and disk) based on the provided sizes
@@ -152,6 +256,14 @@ func New(address string, memorySize string, diskSize string, diskPath, upstream
},
}
// Log GC algorithm configuration
if m != nil {
logger.Logger.Info().Str("memory_gc", memoryGC).Msg("Memory cache GC algorithm configured")
}
if d != nil {
logger.Logger.Info().Str("disk_gc", diskGC).Msg("Disk cache GC algorithm configured")
}
if d != nil {
if d.Size() > d.Capacity() {
gc.LRUGC(d, uint(d.Size()-d.Capacity()))
@@ -223,7 +335,6 @@ func (sc *SteamCache) ServeHTTP(w http.ResponseWriter, r *http.Request) {
path := strings.Split(r.URL.String(), "?")[0]
tstart := time.Now()
defer func() { responseTime.Observe(time.Since(tstart).Seconds()) }()
cacheKey := strings.ReplaceAll(path[1:], "\\", "/") // replace all backslashes with forward slashes shouldn't be necessary but just in case
@@ -252,6 +363,7 @@ func (sc *SteamCache) ServeHTTP(w http.ResponseWriter, r *http.Request) {
requestsTotal.WithLabelValues(r.Method, "200").Inc()
cacheStatusTotal.WithLabelValues("HIT").Inc()
responseTime.WithLabelValues("HIT").Observe(time.Since(tstart).Seconds())
return
}
@@ -328,27 +440,95 @@ func (sc *SteamCache) ServeHTTP(w http.ResponseWriter, r *http.Request) {
size := resp.ContentLength
// this is sortof not needed as we should always be able to get a writer from the cache as long as the gc is able to reclaim enough space aka the file is not bigger than the disk can handle
ww := w.(io.Writer) // default writer to write to the response writer
writer, _ := sc.vfs.Create(cacheKey, size) // create a writer to write to the cache
if writer != nil { // if the writer is not nil, it means the cache is writable
defer writer.Close() // close the writer when done
ww = io.MultiWriter(w, writer) // write to both the response writer and the cache writer
// Read the entire response body into memory for hash verification
bodyData, err := io.ReadAll(resp.Body)
if err != nil {
requestsTotal.WithLabelValues(r.Method, "500").Inc()
logger.Logger.Error().Err(err).Str("url", req.URL.String()).Msg("Failed to read response body")
http.Error(w, "Failed to read response body", http.StatusInternalServerError)
return
}
w.Header().Add("X-LanCache-Status", "MISS")
// Extract filename from cache key for hash verification
filename := filepath.Base(cacheKey)
expectedHash, hasHash := extractHashFromFilename(filename)
io.Copy(ww, resp.Body)
// Debug logging for manifest files
if strings.Contains(cacheKey, "manifest") {
logger.Logger.Debug().
Str("key", cacheKey).
Str("filename", filename).
Bool("hasHash", hasHash).
Str("expectedHash", expectedHash).
Int64("content_length_header", resp.ContentLength).
Int("actual_content_length", len(bodyData)).
Msg("Manifest file hash verification debug")
}
// Hash verification using Steam's X-Content-Sha header and content length verification
hashVerified := true
if hasHash {
// Get the hash from Steam's X-Content-Sha header
steamHash := resp.Header.Get("X-Content-Sha")
// Verify using Steam's hash
if strings.EqualFold(steamHash, expectedHash) {
hashVerificationTotal.WithLabelValues("success").Inc()
} else {
hashVerificationTotal.WithLabelValues("failed").Inc()
logger.Logger.Error().
Str("key", cacheKey).
Str("expected_hash", expectedHash).
Str("steam_hash", steamHash).
Int("content_length", len(bodyData)).
Msg("Steam hash verification failed - Steam's hash doesn't match filename")
hashVerified = false
}
} else {
hashVerificationTotal.WithLabelValues("no_hash").Inc()
}
// Always verify content length as an additional safety check
if resp.ContentLength > 0 && int64(len(bodyData)) != resp.ContentLength {
hashVerificationTotal.WithLabelValues("content_length_failed").Inc()
logger.Logger.Error().
Str("key", cacheKey).
Int("actual_content_length", len(bodyData)).
Int64("expected_content_length", resp.ContentLength).
Msg("Content length verification failed")
hashVerified = false
} else if resp.ContentLength > 0 {
hashVerificationTotal.WithLabelValues("content_length_success").Inc()
}
// Write to response (always serve the file)
w.Header().Add("X-LanCache-Status", "MISS")
w.Write(bodyData)
// Only cache the file if hash verification passed (or no hash was present)
if hashVerified {
writer, _ := sc.vfs.Create(cacheKey, size)
if writer != nil {
defer writer.Close()
writer.Write(bodyData)
}
} else {
logger.Logger.Warn().
Str("key", cacheKey).
Msg("File served but not cached due to hash verification failure")
}
logger.Logger.Info().
Str("key", cacheKey).
Str("host", r.Host).
Str("status", "MISS").
Bool("hash_verified", hasHash).
Dur("duration", time.Since(tstart)).
Msg("request")
requestsTotal.WithLabelValues(r.Method, "200").Inc()
cacheStatusTotal.WithLabelValues("MISS").Inc()
responseTime.WithLabelValues("MISS").Observe(time.Since(tstart).Seconds())
return
}

View File

@@ -3,6 +3,7 @@ package steamcache
import (
"io"
"net/http"
"os"
"path/filepath"
"testing"
@@ -13,7 +14,7 @@ func TestCaching(t *testing.T) {
os.WriteFile(filepath.Join(td, "key2"), []byte("value2"), 0644)
sc := New("localhost:8080", "1G", "1G", td, "")
sc := New("localhost:8080", "1G", "1G", td, "", "lru", "lru")
w, err := sc.vfs.Create("key", 5)
if err != nil {
@@ -84,7 +85,7 @@ func TestCaching(t *testing.T) {
}
func TestCacheMissAndHit(t *testing.T) {
sc := New("localhost:8080", "0", "1G", t.TempDir(), "")
sc := New("localhost:8080", "0", "1G", t.TempDir(), "", "lru", "lru")
key := "testkey"
value := []byte("testvalue")
@@ -108,3 +109,137 @@ func TestCacheMissAndHit(t *testing.T) {
t.Errorf("expected %s, got %s", value, got)
}
}
func TestHashExtraction(t *testing.T) {
// Test the specific key from the user's issue
testCases := []struct {
filename string
expectedHash string
shouldHaveHash bool
}{
{
filename: "e89c81a1a926eb4732e146bc806491da8a7d89ca",
expectedHash: "e89c81a1a926eb4732e146bc806491da8a7d89ca",
shouldHaveHash: true, // Now it should work with the new standalone hash pattern
},
{
filename: "chunk_e89c81a1a926eb4732e146bc806491da8a7d89ca",
expectedHash: "",
shouldHaveHash: false, // No longer supported with simplified patterns
},
{
filename: "file.e89c81a1a926eb4732e146bc806491da8a7d89ca.chunk",
expectedHash: "",
shouldHaveHash: false, // No longer supported with simplified patterns
},
{
filename: "chunk_abc123def456",
expectedHash: "",
shouldHaveHash: false, // Not 40 chars
},
}
for _, tc := range testCases {
hash, hasHash := extractHashFromFilename(tc.filename)
if hasHash != tc.shouldHaveHash {
t.Errorf("filename: %s, expected hasHash: %v, got: %v", tc.filename, tc.shouldHaveHash, hasHash)
}
if hasHash && hash != tc.expectedHash {
t.Errorf("filename: %s, expected hash: %s, got: %s", tc.filename, tc.expectedHash, hash)
}
}
}
func TestHashCalculation(t *testing.T) {
// Test data
testData := []byte("Hello, World!")
// Calculate hash
hash := calculateFileHash(testData)
// Expected SHA1 hash of "Hello, World!"
expectedHash := "0a0a9f2a6772942557ab5355d76af442f8f65e01"
if hash != expectedHash {
t.Errorf("Hash calculation failed: expected %s, got %s", expectedHash, hash)
}
// Test verification
if !verifyFileHash(testData, expectedHash) {
t.Error("Hash verification failed for correct hash")
}
if verifyFileHash(testData, "wronghash") {
t.Error("Hash verification passed for wrong hash")
}
}
func TestHashVerificationWithRealData(t *testing.T) {
// Test with some real data to ensure our hash calculation is correct
testCases := []struct {
data string
expected string
}{
{"", "da39a3ee5e6b4b0d3255bfef95601890afd80709"}, // SHA1 of empty string
{"test", "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"}, // SHA1 of "test"
{"Hello, World!", "0a0a9f2a6772942557ab5355d76af442f8f65e01"}, // SHA1 of "Hello, World!"
}
for _, tc := range testCases {
data := []byte(tc.data)
hash := calculateFileHash(data)
if hash != tc.expected {
t.Errorf("Hash calculation failed for '%s': expected %s, got %s", tc.data, tc.expected, hash)
}
if !verifyFileHash(data, tc.expected) {
t.Errorf("Hash verification failed for '%s'", tc.data)
}
}
}
func TestResponseHashCalculation(t *testing.T) {
// Create a mock HTTP response
resp := &http.Response{
StatusCode: 200,
Status: "200 OK",
Header: http.Header{
"Content-Type": []string{"application/octet-stream"},
"Content-Length": []string{"13"},
"Cache-Control": []string{"public, max-age=3600"},
},
}
bodyData := []byte("Hello, World!")
// Calculate response hash
responseHash := calculateResponseHash(resp, bodyData)
// The hash should be different from just the body hash
bodyHash := calculateFileHash(bodyData)
if responseHash == bodyHash {
t.Error("Response hash should be different from body hash when headers are present")
}
// Test that the same response produces the same hash
responseHash2 := calculateResponseHash(resp, bodyData)
if responseHash != responseHash2 {
t.Error("Response hash should be consistent for the same response")
}
// Test with different headers
resp2 := &http.Response{
StatusCode: 200,
Status: "200 OK",
Header: http.Header{
"Content-Type": []string{"text/plain"},
"Content-Length": []string{"13"},
},
}
responseHash3 := calculateResponseHash(resp2, bodyData)
if responseHash == responseHash3 {
t.Error("Response hash should be different for different headers")
}
}