- Updated gpuErrorSubstrings to include case-insensitive matching for GPU backend errors, improving error detection reliability. - Modified checkGPUErrorLine to convert log lines to lowercase before checking for error indicators, ensuring consistent matching across different log formats.
371 lines
11 KiB
Go
371 lines
11 KiB
Go
package tasks
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"jiggablend/internal/runner/blender"
|
|
"jiggablend/internal/runner/workspace"
|
|
"jiggablend/pkg/scripts"
|
|
"jiggablend/pkg/types"
|
|
)
|
|
|
|
// RenderProcessor handles render tasks.
|
|
type RenderProcessor struct{}
|
|
|
|
// NewRenderProcessor creates a new render processor.
|
|
func NewRenderProcessor() *RenderProcessor {
|
|
return &RenderProcessor{}
|
|
}
|
|
|
|
// gpuErrorSubstrings are log line substrings that indicate a GPU backend error (matched case-insensitively); any match triggers full GPU lockout.
|
|
var gpuErrorSubstrings = []string{
|
|
"illegal address in hip", // HIP (AMD) e.g. "Illegal address in HIP" or "Illegal address in hip"
|
|
"hiperror", // hipError* codes
|
|
"hip error",
|
|
"cuda error",
|
|
"cuerror",
|
|
"optix error",
|
|
"oneapi error",
|
|
"opencl error",
|
|
}
|
|
|
|
// checkGPUErrorLine checks a log line for GPU error indicators and triggers runner GPU lockout if found.
|
|
func (p *RenderProcessor) checkGPUErrorLine(ctx *Context, line string) {
|
|
lower := strings.ToLower(line)
|
|
for _, sub := range gpuErrorSubstrings {
|
|
if strings.Contains(lower, sub) {
|
|
if ctx.OnGPUError != nil {
|
|
ctx.OnGPUError()
|
|
}
|
|
ctx.Warn(fmt.Sprintf("GPU error detected in log (%q); GPU disabled for subsequent jobs", sub))
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process executes a render task.
|
|
func (p *RenderProcessor) Process(ctx *Context) error {
|
|
if err := ctx.CheckCancelled(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s",
|
|
ctx.JobID, ctx.Frame, ctx.FrameEnd, ctx.GetOutputFormat()))
|
|
log.Printf("Processing task %d: job %d, frames %d-%d", ctx.TaskID, ctx.JobID, ctx.Frame, ctx.FrameEnd)
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Starting task: job %d, frame %d, format: %s",
|
|
ctx.JobID, ctx.Frame, ctx.GetOutputFormat()))
|
|
log.Printf("Processing task %d: job %d, frame %d", ctx.TaskID, ctx.JobID, ctx.Frame)
|
|
}
|
|
|
|
// Find .blend file
|
|
blendFile, err := workspace.FindFirstBlendFile(ctx.WorkDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to find blend file: %w", err)
|
|
}
|
|
|
|
// Get Blender binary
|
|
blenderBinary := "blender"
|
|
if version := ctx.GetBlenderVersion(); version != "" {
|
|
ctx.Info(fmt.Sprintf("Job requires Blender %s", version))
|
|
binaryPath, err := ctx.Blender.GetBinaryPath(version)
|
|
if err != nil {
|
|
ctx.Warn(fmt.Sprintf("Could not get Blender %s, using system blender: %v", version, err))
|
|
} else {
|
|
blenderBinary = binaryPath
|
|
ctx.Info(fmt.Sprintf("Using Blender binary: %s", blenderBinary))
|
|
}
|
|
} else {
|
|
ctx.Info("No Blender version specified, using system blender")
|
|
}
|
|
|
|
// Create output directory
|
|
outputDir := filepath.Join(ctx.WorkDir, "output")
|
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
|
return fmt.Errorf("failed to create output directory: %w", err)
|
|
}
|
|
|
|
// Create home directory for Blender inside workspace
|
|
blenderHome := filepath.Join(ctx.WorkDir, "home")
|
|
if err := os.MkdirAll(blenderHome, 0755); err != nil {
|
|
return fmt.Errorf("failed to create Blender home directory: %w", err)
|
|
}
|
|
|
|
// We always render EXR (linear) for VFX accuracy; job output_format is the deliverable (EXR sequence or video).
|
|
renderFormat := "EXR"
|
|
|
|
if ctx.ShouldForceCPU() {
|
|
ctx.Info("GPU lockout active: using CPU rendering only")
|
|
}
|
|
|
|
// Create render script
|
|
if err := p.createRenderScript(ctx, renderFormat); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Render
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Starting Blender render for frames %d-%d...", ctx.Frame, ctx.FrameEnd))
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Starting Blender render for frame %d...", ctx.Frame))
|
|
}
|
|
if err := p.runBlender(ctx, blenderBinary, blendFile, outputDir, renderFormat, blenderHome); err != nil {
|
|
if errors.Is(err, ErrJobCancelled) {
|
|
ctx.Warn("Render stopped because job was cancelled")
|
|
return err
|
|
}
|
|
ctx.Error(fmt.Sprintf("Blender render failed: %v", err))
|
|
return err
|
|
}
|
|
|
|
// Verify output (range or single frame)
|
|
if err := p.verifyOutputRange(ctx, outputDir, renderFormat); err != nil {
|
|
ctx.Error(fmt.Sprintf("Output verification failed: %v", err))
|
|
return err
|
|
}
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Blender render completed for frames %d-%d", ctx.Frame, ctx.FrameEnd))
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Blender render completed for frame %d", ctx.Frame))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *RenderProcessor) createRenderScript(ctx *Context, renderFormat string) error {
|
|
formatFilePath := filepath.Join(ctx.WorkDir, "output_format.txt")
|
|
renderSettingsFilePath := filepath.Join(ctx.WorkDir, "render_settings.json")
|
|
|
|
// Build unhide code conditionally
|
|
unhideCode := ""
|
|
if ctx.ShouldUnhideObjects() {
|
|
unhideCode = scripts.UnhideObjects
|
|
}
|
|
|
|
// Load template and replace placeholders
|
|
scriptContent := scripts.RenderBlenderTemplate
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{UNHIDE_CODE}}", unhideCode)
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{FORMAT_FILE_PATH}}", fmt.Sprintf("%q", formatFilePath))
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{RENDER_SETTINGS_FILE}}", fmt.Sprintf("%q", renderSettingsFilePath))
|
|
|
|
scriptPath := filepath.Join(ctx.WorkDir, "enable_gpu.py")
|
|
if err := os.WriteFile(scriptPath, []byte(scriptContent), 0644); err != nil {
|
|
errMsg := fmt.Sprintf("failed to create GPU enable script: %v", err)
|
|
ctx.Error(errMsg)
|
|
return errors.New(errMsg)
|
|
}
|
|
|
|
// Write EXR to format file so Blender script sets OPEN_EXR (job output_format is for downstream deliverable only).
|
|
ctx.Info("Writing output format 'EXR' to format file")
|
|
if err := os.WriteFile(formatFilePath, []byte("EXR"), 0644); err != nil {
|
|
errMsg := fmt.Sprintf("failed to create format file: %v", err)
|
|
ctx.Error(errMsg)
|
|
return errors.New(errMsg)
|
|
}
|
|
|
|
// Write render settings: merge job metadata with runner force_cpu (GPU lockout)
|
|
var settingsMap map[string]interface{}
|
|
if ctx.Metadata != nil && ctx.Metadata.RenderSettings.EngineSettings != nil {
|
|
raw, err := json.Marshal(ctx.Metadata.RenderSettings)
|
|
if err == nil {
|
|
_ = json.Unmarshal(raw, &settingsMap)
|
|
}
|
|
}
|
|
if settingsMap == nil {
|
|
settingsMap = make(map[string]interface{})
|
|
}
|
|
settingsMap["force_cpu"] = ctx.ShouldForceCPU()
|
|
settingsJSON, err := json.Marshal(settingsMap)
|
|
if err == nil {
|
|
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
|
|
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, outputDir, renderFormat, blenderHome string) error {
|
|
scriptPath := filepath.Join(ctx.WorkDir, "enable_gpu.py")
|
|
|
|
args := []string{"-b", blendFile, "--python", scriptPath}
|
|
if ctx.ShouldEnableExecution() {
|
|
args = append(args, "--enable-autoexec")
|
|
}
|
|
|
|
// Output pattern
|
|
outputPattern := filepath.Join(outputDir, fmt.Sprintf("frame_####.%s", strings.ToLower(renderFormat)))
|
|
outputAbsPattern, _ := filepath.Abs(outputPattern)
|
|
args = append(args, "-o", outputAbsPattern)
|
|
|
|
// Render single frame or range: -f N for one frame, -s start -e end -a for range
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
args = append(args, "-s", fmt.Sprintf("%d", ctx.Frame), "-e", fmt.Sprintf("%d", ctx.FrameEnd), "-a")
|
|
} else {
|
|
args = append(args, "-f", fmt.Sprintf("%d", ctx.Frame))
|
|
}
|
|
|
|
cmd := exec.Command(blenderBinary, args...)
|
|
cmd.Dir = ctx.WorkDir
|
|
|
|
// Set up environment with custom HOME directory
|
|
env := os.Environ()
|
|
// Remove existing HOME if present and add our custom one
|
|
newEnv := make([]string, 0, len(env)+1)
|
|
for _, e := range env {
|
|
if !strings.HasPrefix(e, "HOME=") {
|
|
newEnv = append(newEnv, e)
|
|
}
|
|
}
|
|
newEnv = append(newEnv, fmt.Sprintf("HOME=%s", blenderHome))
|
|
cmd.Env = newEnv
|
|
|
|
// Set up pipes
|
|
stdoutPipe, err := cmd.StdoutPipe()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create stdout pipe: %w", err)
|
|
}
|
|
|
|
stderrPipe, err := cmd.StderrPipe()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create stderr pipe: %w", err)
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
return fmt.Errorf("failed to start blender: %w", err)
|
|
}
|
|
stopMonitor := ctx.StartCancellationMonitor(cmd, "render")
|
|
defer stopMonitor()
|
|
|
|
// Track process
|
|
ctx.Processes.Track(ctx.TaskID, cmd)
|
|
defer ctx.Processes.Untrack(ctx.TaskID)
|
|
|
|
// Stream stdout and watch for GPU error lines (lock out all GPU on any backend error)
|
|
stdoutDone := make(chan bool)
|
|
go func() {
|
|
defer close(stdoutDone)
|
|
scanner := bufio.NewScanner(stdoutPipe)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if line != "" {
|
|
p.checkGPUErrorLine(ctx, line)
|
|
shouldFilter, logLevel := blender.FilterLog(line)
|
|
if !shouldFilter {
|
|
ctx.Log(logLevel, line)
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Stream stderr and watch for GPU error lines
|
|
stderrDone := make(chan bool)
|
|
go func() {
|
|
defer close(stderrDone)
|
|
scanner := bufio.NewScanner(stderrPipe)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if line != "" {
|
|
p.checkGPUErrorLine(ctx, line)
|
|
shouldFilter, logLevel := blender.FilterLog(line)
|
|
if !shouldFilter {
|
|
if logLevel == types.LogLevelInfo {
|
|
logLevel = types.LogLevelWarn
|
|
}
|
|
ctx.Log(logLevel, line)
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Wait for completion
|
|
err = cmd.Wait()
|
|
<-stdoutDone
|
|
<-stderrDone
|
|
|
|
if err != nil {
|
|
if cancelled, checkErr := ctx.IsJobCancelled(); checkErr == nil && cancelled {
|
|
return ErrJobCancelled
|
|
}
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
if exitErr.ExitCode() == 137 {
|
|
return errors.New("Blender was killed due to excessive memory usage (OOM)")
|
|
}
|
|
}
|
|
return fmt.Errorf("blender failed: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// verifyOutputRange checks that output files exist for the task's frame range (first and last at minimum).
|
|
func (p *RenderProcessor) verifyOutputRange(ctx *Context, outputDir, renderFormat string) error {
|
|
entries, err := os.ReadDir(outputDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read output directory: %w", err)
|
|
}
|
|
|
|
ctx.Info("Checking output directory for files...")
|
|
ext := strings.ToLower(renderFormat)
|
|
|
|
// Check first and last frame in range (minimum required for range; single frame = one check)
|
|
framesToCheck := []int{ctx.Frame}
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
framesToCheck = append(framesToCheck, ctx.FrameEnd)
|
|
}
|
|
for _, frame := range framesToCheck {
|
|
found := false
|
|
// Try frame_0001.ext, frame_1.ext, 0001.ext
|
|
for _, name := range []string{
|
|
fmt.Sprintf("frame_%04d.%s", frame, ext),
|
|
fmt.Sprintf("frame_%d.%s", frame, ext),
|
|
fmt.Sprintf("%04d.%s", frame, ext),
|
|
} {
|
|
if _, err := os.Stat(filepath.Join(outputDir, name)); err == nil {
|
|
found = true
|
|
ctx.Info(fmt.Sprintf("Found output file: %s", name))
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
// Search entries for this frame number
|
|
frameStr := fmt.Sprintf("%d", frame)
|
|
frameStrPadded := fmt.Sprintf("%04d", frame)
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
continue
|
|
}
|
|
fileName := entry.Name()
|
|
if strings.Contains(fileName, "%04d") || strings.Contains(fileName, "%d") {
|
|
continue
|
|
}
|
|
if (strings.Contains(fileName, frameStrPadded) ||
|
|
strings.Contains(fileName, frameStr)) && strings.HasSuffix(strings.ToLower(fileName), ext) {
|
|
found = true
|
|
ctx.Info(fmt.Sprintf("Found output file: %s", fileName))
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
fileList := []string{}
|
|
for _, e := range entries {
|
|
if !e.IsDir() {
|
|
fileList = append(fileList, e.Name())
|
|
}
|
|
}
|
|
return fmt.Errorf("output file for frame %d not found; files in output directory: %v", frame, fileList)
|
|
}
|
|
}
|
|
return nil
|
|
}
|