This commit is contained in:
2025-11-22 05:40:31 -06:00
parent 87cb54a17d
commit fb2e318eaa
12 changed files with 1891 additions and 353 deletions

View File

@@ -11,12 +11,17 @@ import (
"log"
"mime/multipart"
"net/http"
"net/url"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"github.com/gorilla/websocket"
"fuego/pkg/types"
)
// Client represents a runner client
@@ -29,6 +34,9 @@ type Client struct {
runnerID int64
runnerSecret string
managerSecret string
wsConn *websocket.Conn
wsConnMu sync.Mutex
stopChan chan struct{}
}
// NewClient creates a new runner client
@@ -39,6 +47,7 @@ func NewClient(managerURL, name, hostname, ipAddress string) *Client {
hostname: hostname,
ipAddress: ipAddress,
httpClient: &http.Client{Timeout: 30 * time.Second},
stopChan: make(chan struct{}),
}
}
@@ -121,81 +130,219 @@ func (c *Client) doSignedRequest(method, path string, body []byte) (*http.Respon
return c.httpClient.Do(req)
}
// HeartbeatLoop sends periodic heartbeats to the manager
// ConnectWebSocket establishes a WebSocket connection to the manager
func (c *Client) ConnectWebSocket() error {
if c.runnerID == 0 || c.runnerSecret == "" {
return fmt.Errorf("runner not authenticated")
}
// Build WebSocket URL with authentication
timestamp := time.Now().Unix()
path := "/api/runner/ws"
// Sign the request
message := fmt.Sprintf("GET\n%s\n\n%d", path, timestamp)
h := hmac.New(sha256.New, []byte(c.runnerSecret))
h.Write([]byte(message))
signature := hex.EncodeToString(h.Sum(nil))
// Convert HTTP URL to WebSocket URL
wsURL := strings.Replace(c.managerURL, "http://", "ws://", 1)
wsURL = strings.Replace(wsURL, "https://", "wss://", 1)
wsURL = fmt.Sprintf("%s%s?runner_id=%d&signature=%s&timestamp=%d",
wsURL, path, c.runnerID, signature, timestamp)
// Parse URL
u, err := url.Parse(wsURL)
if err != nil {
return fmt.Errorf("invalid WebSocket URL: %w", err)
}
// Connect
dialer := websocket.Dialer{
HandshakeTimeout: 10 * time.Second,
}
conn, _, err := dialer.Dial(u.String(), nil)
if err != nil {
return fmt.Errorf("failed to connect WebSocket: %w", err)
}
c.wsConnMu.Lock()
if c.wsConn != nil {
c.wsConn.Close()
}
c.wsConn = conn
c.wsConnMu.Unlock()
log.Printf("WebSocket connected to manager")
return nil
}
// ConnectWebSocketWithReconnect connects with automatic reconnection
func (c *Client) ConnectWebSocketWithReconnect() {
backoff := 1 * time.Second
maxBackoff := 60 * time.Second
for {
err := c.ConnectWebSocket()
if err == nil {
backoff = 1 * time.Second // Reset on success
c.HandleWebSocketMessages()
} else {
log.Printf("WebSocket connection failed: %v, retrying in %v", err, backoff)
time.Sleep(backoff)
backoff *= 2
if backoff > maxBackoff {
backoff = maxBackoff
}
}
// Check if we should stop
select {
case <-c.stopChan:
return
default:
}
}
}
// HandleWebSocketMessages handles incoming WebSocket messages
func (c *Client) HandleWebSocketMessages() {
c.wsConnMu.Lock()
conn := c.wsConn
c.wsConnMu.Unlock()
if conn == nil {
return
}
// Set pong handler
conn.SetPongHandler(func(string) error {
return nil
})
// Handle messages
for {
var msg map[string]interface{}
err := conn.ReadJSON(&msg)
if err != nil {
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
log.Printf("WebSocket error: %v", err)
}
c.wsConnMu.Lock()
c.wsConn = nil
c.wsConnMu.Unlock()
return
}
msgType, _ := msg["type"].(string)
switch msgType {
case "task_assignment":
c.handleTaskAssignment(msg)
case "ping":
// Respond to ping with pong (automatic)
}
}
}
// handleTaskAssignment handles a task assignment message
func (c *Client) handleTaskAssignment(msg map[string]interface{}) {
data, ok := msg["data"].(map[string]interface{})
if !ok {
log.Printf("Invalid task assignment message")
return
}
taskID, _ := data["task_id"].(float64)
jobID, _ := data["job_id"].(float64)
jobName, _ := data["job_name"].(string)
outputFormat, _ := data["output_format"].(string)
frameStart, _ := data["frame_start"].(float64)
frameEnd, _ := data["frame_end"].(float64)
inputFilesRaw, _ := data["input_files"].([]interface{})
if len(inputFilesRaw) == 0 {
log.Printf("No input files for task %v", taskID)
c.sendTaskComplete(int64(taskID), "", false, "No input files")
return
}
// Convert to task map format
taskMap := map[string]interface{}{
"id": taskID,
"job_id": jobID,
"frame_start": frameStart,
"frame_end": frameEnd,
}
// Process the task
go func() {
if err := c.processTask(taskMap, jobName, outputFormat, inputFilesRaw); err != nil {
log.Printf("Failed to process task %v: %v", taskID, err)
c.sendTaskComplete(int64(taskID), "", false, err.Error())
}
}()
}
// HeartbeatLoop sends periodic heartbeats via WebSocket
func (c *Client) HeartbeatLoop() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for range ticker.C {
req := map[string]interface{}{}
body, _ := json.Marshal(req)
c.wsConnMu.RLock()
conn := c.wsConn
c.wsConnMu.RUnlock()
resp, err := c.doSignedRequest("POST", "/api/runner/heartbeat?runner_id="+fmt.Sprintf("%d", c.runnerID), body)
if err != nil {
log.Printf("Heartbeat failed: %v", err)
continue
}
resp.Body.Close()
}
}
// ProcessTasks polls for tasks and processes them
func (c *Client) ProcessTasks() {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for range ticker.C {
tasks, err := c.getTasks()
if err != nil {
log.Printf("Failed to get tasks: %v", err)
continue
}
for _, taskData := range tasks {
taskMap, ok := taskData["task"].(map[string]interface{})
if !ok {
continue
if conn != nil {
// Send heartbeat via WebSocket
msg := map[string]interface{}{
"type": "heartbeat",
"timestamp": time.Now().Unix(),
}
jobName, _ := taskData["job_name"].(string)
outputFormat, _ := taskData["output_format"].(string)
inputFilesRaw, _ := taskData["input_files"].([]interface{})
if len(inputFilesRaw) == 0 {
log.Printf("No input files for task %v", taskMap["id"])
continue
}
// Process the task
if err := c.processTask(taskMap, jobName, outputFormat, inputFilesRaw); err != nil {
taskID, _ := taskMap["id"].(float64)
log.Printf("Failed to process task %v: %v", taskID, err)
c.completeTask(int64(taskID), "", false, err.Error())
if err := conn.WriteJSON(msg); err != nil {
log.Printf("Failed to send heartbeat: %v", err)
}
}
}
}
// getTasks fetches tasks from the manager
func (c *Client) getTasks() ([]map[string]interface{}, error) {
path := fmt.Sprintf("/api/runner/tasks?runner_id=%d", c.runnerID)
resp, err := c.doSignedRequest("GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// sendLog sends a log entry to the manager via WebSocket
func (c *Client) sendLog(taskID int64, logLevel types.LogLevel, message, stepName string) {
c.wsConnMu.RLock()
conn := c.wsConn
c.wsConnMu.RUnlock()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("failed to get tasks: %s", string(body))
if conn != nil {
msg := map[string]interface{}{
"type": "log_entry",
"data": map[string]interface{}{
"task_id": taskID,
"log_level": string(logLevel),
"message": message,
"step_name": stepName,
},
"timestamp": time.Now().Unix(),
}
if err := conn.WriteJSON(msg); err != nil {
log.Printf("Failed to send log: %v", err)
}
} else {
log.Printf("WebSocket not connected, cannot send log")
}
}
var tasks []map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&tasks); err != nil {
return nil, err
// sendStepUpdate sends a step start/complete event to the manager
func (c *Client) sendStepUpdate(taskID int64, stepName string, status types.StepStatus, errorMsg string) {
// This would ideally be a separate endpoint, but for now we'll use logs
msg := fmt.Sprintf("Step %s: %s", stepName, status)
if errorMsg != "" {
msg += " - " + errorMsg
}
return tasks, nil
logLevel := types.LogLevelInfo
if status == types.StepStatusFailed {
logLevel = types.LogLevelError
}
c.sendLog(taskID, logLevel, msg, stepName)
}
// processTask processes a single task
@@ -205,6 +352,7 @@ func (c *Client) processTask(task map[string]interface{}, jobName, outputFormat
frameStart := int(task["frame_start"].(float64))
frameEnd := int(task["frame_end"].(float64))
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s", jobID, frameStart, frameEnd, outputFormat), "")
log.Printf("Processing task %d: job %d, frames %d-%d, format: %s", taskID, jobID, frameStart, frameEnd, outputFormat)
// Create work directory
@@ -214,11 +362,14 @@ func (c *Client) processTask(task map[string]interface{}, jobName, outputFormat
}
defer os.RemoveAll(workDir)
// Download input files
// Step: download
c.sendStepUpdate(taskID, "download", types.StepStatusRunning, "")
c.sendLog(taskID, types.LogLevelInfo, "Downloading input files...", "download")
blendFile := ""
for _, filePath := range inputFiles {
filePathStr := filePath.(string)
if err := c.downloadFile(filePathStr, workDir); err != nil {
c.sendStepUpdate(taskID, "download", types.StepStatusFailed, err.Error())
return fmt.Errorf("failed to download file %s: %w", filePathStr, err)
}
if filepath.Ext(filePathStr) == ".blend" {
@@ -227,8 +378,12 @@ func (c *Client) processTask(task map[string]interface{}, jobName, outputFormat
}
if blendFile == "" {
return fmt.Errorf("no .blend file found in input files")
err := fmt.Errorf("no .blend file found in input files")
c.sendStepUpdate(taskID, "download", types.StepStatusFailed, err.Error())
return err
}
c.sendStepUpdate(taskID, "download", types.StepStatusCompleted, "")
c.sendLog(taskID, types.LogLevelInfo, "Input files downloaded successfully", "download")
// Render frames
outputDir := filepath.Join(workDir, "output")
@@ -244,30 +399,60 @@ func (c *Client) processTask(task map[string]interface{}, jobName, outputFormat
outputPattern := filepath.Join(outputDir, fmt.Sprintf("frame_%%04d.%s", strings.ToLower(renderFormat)))
// Step: render_blender
c.sendStepUpdate(taskID, "render_blender", types.StepStatusRunning, "")
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting Blender render for frame %d...", frameStart), "render_blender")
// Execute Blender
cmd := exec.Command("blender", "-b", blendFile, "-o", outputPattern, "-f", fmt.Sprintf("%d", frameStart))
cmd.Dir = workDir
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("blender failed: %w\nOutput: %s", err, string(output))
errMsg := fmt.Sprintf("blender failed: %w\nOutput: %s", err, string(output))
c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
c.sendStepUpdate(taskID, "render_blender", types.StepStatusFailed, errMsg)
return fmt.Errorf(errMsg)
}
// Find rendered output file
outputFile := filepath.Join(outputDir, fmt.Sprintf("frame_%04d.%s", frameStart, strings.ToLower(renderFormat)))
if _, err := os.Stat(outputFile); os.IsNotExist(err) {
return fmt.Errorf("output file not found: %s", outputFile)
errMsg := fmt.Sprintf("output file not found: %s", outputFile)
c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
c.sendStepUpdate(taskID, "render_blender", types.StepStatusFailed, errMsg)
return fmt.Errorf(errMsg)
}
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Blender render completed for frame %d", frameStart), "render_blender")
c.sendStepUpdate(taskID, "render_blender", types.StepStatusCompleted, "")
// Upload frame file
// Step: upload or upload_frames
uploadStepName := "upload"
if outputFormat == "MP4" {
uploadStepName = "upload_frames"
}
c.sendStepUpdate(taskID, uploadStepName, types.StepStatusRunning, "")
c.sendLog(taskID, types.LogLevelInfo, "Uploading output file...", uploadStepName)
outputPath, err := c.uploadFile(jobID, outputFile)
if err != nil {
return fmt.Errorf("failed to upload output: %w", err)
errMsg := fmt.Sprintf("failed to upload output: %w", err)
c.sendLog(taskID, types.LogLevelError, errMsg, uploadStepName)
c.sendStepUpdate(taskID, uploadStepName, types.StepStatusFailed, errMsg)
return fmt.Errorf(errMsg)
}
c.sendLog(taskID, types.LogLevelInfo, "Output file uploaded successfully", uploadStepName)
c.sendStepUpdate(taskID, uploadStepName, types.StepStatusCompleted, "")
// Step: complete
c.sendStepUpdate(taskID, "complete", types.StepStatusRunning, "")
c.sendLog(taskID, types.LogLevelInfo, "Task completed successfully", "complete")
// Mark task as complete
if err := c.completeTask(taskID, outputPath, true, ""); err != nil {
c.sendStepUpdate(taskID, "complete", types.StepStatusFailed, err.Error())
return err
}
c.sendStepUpdate(taskID, "complete", types.StepStatusCompleted, "")
// For MP4 format, check if all frames are done and generate video
if outputFormat == "MP4" {
@@ -599,29 +784,33 @@ func (c *Client) uploadFile(jobID int64, filePath string) (string, error) {
return result.FilePath, nil
}
// completeTask marks a task as complete
// completeTask marks a task as complete via WebSocket (or HTTP fallback)
func (c *Client) completeTask(taskID int64, outputPath string, success bool, errorMsg string) error {
req := map[string]interface{}{
"output_path": outputPath,
"success": success,
}
if !success {
req["error"] = errorMsg
}
body, _ := json.Marshal(req)
path := fmt.Sprintf("/api/runner/tasks/%d/complete?runner_id=%d", taskID, c.runnerID)
resp, err := c.doSignedRequest("POST", path, body)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("failed to complete task: %s", string(body))
}
return nil
return c.sendTaskComplete(taskID, outputPath, success, errorMsg)
}
// sendTaskComplete sends task completion via WebSocket
func (c *Client) sendTaskComplete(taskID int64, outputPath string, success bool, errorMsg string) error {
c.wsConnMu.RLock()
conn := c.wsConn
c.wsConnMu.RUnlock()
if conn != nil {
msg := map[string]interface{}{
"type": "task_complete",
"data": map[string]interface{}{
"task_id": taskID,
"output_path": outputPath,
"success": success,
"error": errorMsg,
},
"timestamp": time.Now().Unix(),
}
if err := conn.WriteJSON(msg); err != nil {
return fmt.Errorf("failed to send task completion: %w", err)
}
return nil
}
return fmt.Errorf("WebSocket not connected, cannot complete task")
}