Enhance logging and context handling in job management. Introduce a logger initialization with configurable parameters in the manager and runner commands. Update job context handling to use tar files instead of tar.gz, and implement ETag generation for improved caching. Refactor API endpoints to support new context file structure and enhance error handling in job submissions. Add support for unhide objects and auto-execution options in job creation requests.

This commit is contained in:
2025-11-24 21:48:05 -06:00
parent a029714e08
commit 4ac05d50a1
23 changed files with 4133 additions and 1311 deletions

View File

@@ -1,12 +1,17 @@
package api
import (
"compress/gzip"
"database/sql"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
@@ -38,6 +43,15 @@ type Server struct {
// Mutexes for each frontend connection to serialize writes
frontendConnsWriteMu map[string]*sync.Mutex // key: "jobId:taskId"
frontendConnsWriteMuMu sync.RWMutex
// Job list WebSocket connections (key: userID)
jobListConns map[int64]*websocket.Conn
jobListConnsMu sync.RWMutex
// Single job WebSocket connections (key: "userId:jobId")
jobConns map[string]*websocket.Conn
jobConnsMu sync.RWMutex
// Mutexes for job WebSocket connections
jobConnsWriteMu map[string]*sync.Mutex
jobConnsWriteMuMu sync.RWMutex
// Throttling for progress updates (per job)
progressUpdateTimes map[int64]time.Time // key: jobID
progressUpdateTimesMu sync.RWMutex
@@ -66,6 +80,9 @@ func NewServer(db *database.DB, auth *authpkg.Auth, storage *storage.Storage) (*
runnerConns: make(map[int64]*websocket.Conn),
frontendConns: make(map[string]*websocket.Conn),
frontendConnsWriteMu: make(map[string]*sync.Mutex),
jobListConns: make(map[int64]*websocket.Conn),
jobConns: make(map[string]*websocket.Conn),
jobConnsWriteMu: make(map[string]*sync.Mutex),
progressUpdateTimes: make(map[int64]time.Time),
}
@@ -83,16 +100,62 @@ func (s *Server) setupMiddleware() {
// Note: Timeout middleware is NOT applied globally to avoid conflicts with WebSocket connections
// WebSocket connections are long-lived and should not have HTTP timeouts
// Add gzip compression for JSON responses
s.router.Use(gzipMiddleware)
s.router.Use(cors.Handler(cors.Options{
AllowedOrigins: []string{"*"},
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "Range"},
ExposedHeaders: []string{"Link", "Content-Range", "Accept-Ranges", "Content-Length"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "Range", "If-None-Match"},
ExposedHeaders: []string{"Link", "Content-Range", "Accept-Ranges", "Content-Length", "ETag"},
AllowCredentials: true,
MaxAge: 300,
}))
}
// gzipMiddleware compresses responses with gzip if client supports it
func gzipMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Skip compression for WebSocket upgrades
if strings.ToLower(r.Header.Get("Upgrade")) == "websocket" {
next.ServeHTTP(w, r)
return
}
// Check if client accepts gzip
if !strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") {
next.ServeHTTP(w, r)
return
}
// Create gzip writer
gz := gzip.NewWriter(w)
defer gz.Close()
w.Header().Set("Content-Encoding", "gzip")
w.Header().Set("Vary", "Accept-Encoding")
// Wrap response writer
gzw := &gzipResponseWriter{Writer: gz, ResponseWriter: w}
next.ServeHTTP(gzw, r)
})
}
// gzipResponseWriter wraps http.ResponseWriter to add gzip compression
type gzipResponseWriter struct {
io.Writer
http.ResponseWriter
}
func (w *gzipResponseWriter) Write(b []byte) (int, error) {
return w.Writer.Write(b)
}
func (w *gzipResponseWriter) WriteHeader(statusCode int) {
// Don't set Content-Length when using gzip - it will be set automatically
w.ResponseWriter.WriteHeader(statusCode)
}
// setupRoutes configures routes
func (s *Server) setupRoutes() {
// Public routes
@@ -118,16 +181,21 @@ func (s *Server) setupRoutes() {
r.Post("/", s.handleCreateJob)
r.Post("/upload", s.handleUploadFileForJobCreation) // Upload before job creation
r.Get("/", s.handleListJobs)
r.Get("/summary", s.handleListJobsSummary)
r.Post("/batch", s.handleBatchGetJobs)
r.Get("/{id}", s.handleGetJob)
r.Delete("/{id}", s.handleCancelJob)
r.Post("/{id}/delete", s.handleDeleteJob)
r.Post("/{id}/upload", s.handleUploadJobFile)
r.Get("/{id}/files", s.handleListJobFiles)
r.Get("/{id}/files/count", s.handleGetJobFilesCount)
r.Get("/{id}/context", s.handleListContextArchive)
r.Get("/{id}/files/{fileId}/download", s.handleDownloadJobFile)
r.Get("/{id}/video", s.handleStreamVideo)
r.Get("/{id}/metadata", s.handleGetJobMetadata)
r.Get("/{id}/tasks", s.handleListJobTasks)
r.Get("/{id}/tasks/summary", s.handleListJobTasksSummary)
r.Post("/{id}/tasks/batch", s.handleBatchGetTasks)
r.Get("/{id}/tasks/{taskId}/logs", s.handleGetTaskLogs)
// WebSocket route - no timeout middleware (long-lived connection)
r.With(func(next http.Handler) http.Handler {
@@ -138,6 +206,19 @@ func (s *Server) setupRoutes() {
}).Get("/{id}/tasks/{taskId}/logs/ws", s.handleStreamTaskLogsWebSocket)
r.Get("/{id}/tasks/{taskId}/steps", s.handleGetTaskSteps)
r.Post("/{id}/tasks/{taskId}/retry", s.handleRetryTask)
// WebSocket routes for real-time updates
r.With(func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Remove timeout middleware for WebSocket
next.ServeHTTP(w, r)
})
}).Get("/ws", s.handleJobsWebSocket)
r.With(func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Remove timeout middleware for WebSocket
next.ServeHTTP(w, r)
})
}).Get("/{id}/ws", s.handleJobWebSocket)
})
// Admin routes
@@ -181,7 +262,8 @@ func (s *Server) setupRoutes() {
})
r.Post("/tasks/{id}/progress", s.handleUpdateTaskProgress)
r.Post("/tasks/{id}/steps", s.handleUpdateTaskStep)
r.Get("/jobs/{jobId}/context.tar.gz", s.handleDownloadJobContext)
r.Get("/jobs/{jobId}/context.tar", s.handleDownloadJobContext)
r.Get("/files/{jobId}/{fileName}", s.handleDownloadFileForRunner)
r.Post("/files/{jobId}/upload", s.handleUploadFileFromRunner)
r.Get("/jobs/{jobId}/status", s.handleGetJobStatusForRunner)
r.Get("/jobs/{jobId}/files", s.handleGetJobFilesForRunner)
@@ -311,12 +393,14 @@ func (s *Server) handleLogout(w http.ResponseWriter, r *http.Request) {
func (s *Server) handleGetMe(w http.ResponseWriter, r *http.Request) {
cookie, err := r.Cookie("session_id")
if err != nil {
log.Printf("Authentication failed: missing session cookie in /auth/me")
s.respondError(w, http.StatusUnauthorized, "Not authenticated")
return
}
session, ok := s.auth.GetSession(cookie.Value)
if !ok {
log.Printf("Authentication failed: invalid session cookie in /auth/me")
s.respondError(w, http.StatusUnauthorized, "Invalid session")
return
}
@@ -410,6 +494,7 @@ func (s *Server) handleLocalLogin(w http.ResponseWriter, r *http.Request) {
session, err := s.auth.LocalLogin(req.Username, req.Password)
if err != nil {
log.Printf("Authentication failed: invalid credentials for username '%s'", req.Username)
s.respondError(w, http.StatusUnauthorized, "Invalid credentials")
return
}
@@ -512,6 +597,7 @@ func parseID(r *http.Request, param string) (int64, error) {
func (s *Server) StartBackgroundTasks() {
go s.recoverStuckTasks()
go s.cleanupOldRenderJobs()
go s.cleanupOldTempDirectories()
}
// recoverStuckTasks periodically checks for dead runners and stuck tasks
@@ -621,6 +707,7 @@ func (s *Server) recoverTaskTimeouts() {
err := rows.Scan(&taskID, &runnerID, &retryCount, &maxRetries, &timeoutSeconds, &startedAt)
if err != nil {
log.Printf("Failed to scan task row in recoverTaskTimeouts: %v", err)
continue
}
@@ -659,3 +746,72 @@ func (s *Server) recoverTaskTimeouts() {
}
}
}
// cleanupOldTempDirectories periodically cleans up old temporary directories
func (s *Server) cleanupOldTempDirectories() {
// Run cleanup every hour
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
// Run once immediately on startup
s.cleanupOldTempDirectoriesOnce()
for range ticker.C {
s.cleanupOldTempDirectoriesOnce()
}
}
// cleanupOldTempDirectoriesOnce removes temp directories older than 1 hour
func (s *Server) cleanupOldTempDirectoriesOnce() {
defer func() {
if r := recover(); r != nil {
log.Printf("Panic in cleanupOldTempDirectories: %v", r)
}
}()
tempPath := filepath.Join(s.storage.BasePath(), "temp")
// Check if temp directory exists
if _, err := os.Stat(tempPath); os.IsNotExist(err) {
return
}
// Read all entries in temp directory
entries, err := os.ReadDir(tempPath)
if err != nil {
log.Printf("Failed to read temp directory: %v", err)
return
}
now := time.Now()
cleanedCount := 0
for _, entry := range entries {
if !entry.IsDir() {
continue
}
entryPath := filepath.Join(tempPath, entry.Name())
// Get directory info to check modification time
info, err := entry.Info()
if err != nil {
continue
}
// Remove directories older than 1 hour
age := now.Sub(info.ModTime())
if age > 1*time.Hour {
if err := os.RemoveAll(entryPath); err != nil {
log.Printf("Warning: Failed to clean up old temp directory %s: %v", entryPath, err)
} else {
cleanedCount++
log.Printf("Cleaned up old temp directory: %s (age: %v)", entryPath, age)
}
}
}
if cleanedCount > 0 {
log.Printf("Cleaned up %d old temp directories", cleanedCount)
}
}