- Removed the `--disable-hiprt` flag from the runner command, simplifying the rendering options for users. - Updated the `jiggablend-runner` script and README to reflect the removal of the HIPRT control flag, enhancing clarity in usage instructions. - Enhanced the installation script to provide clearer examples for running the jiggablend manager and runner, improving user experience during setup. - Implemented a more robust GPU backend detection mechanism, allowing for better compatibility with various hardware configurations.
383 lines
12 KiB
Go
383 lines
12 KiB
Go
package tasks
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"jiggablend/internal/runner/blender"
|
|
"jiggablend/internal/runner/workspace"
|
|
"jiggablend/pkg/scripts"
|
|
"jiggablend/pkg/types"
|
|
)
|
|
|
|
// RenderProcessor handles render tasks.
|
|
type RenderProcessor struct{}
|
|
|
|
// NewRenderProcessor creates a new render processor.
|
|
func NewRenderProcessor() *RenderProcessor {
|
|
return &RenderProcessor{}
|
|
}
|
|
|
|
// gpuErrorSubstrings are log line substrings that indicate a GPU backend error (matched case-insensitively); any match triggers full GPU lockout.
|
|
var gpuErrorSubstrings = []string{
|
|
"illegal address in hip", // HIP (AMD) e.g. "Illegal address in HIP" or "Illegal address in hip"
|
|
"hiperror", // hipError* codes
|
|
"hip error",
|
|
"cuda error",
|
|
"cuerror",
|
|
"optix error",
|
|
"oneapi error",
|
|
"opencl error",
|
|
}
|
|
|
|
// checkGPUErrorLine checks a log line for GPU error indicators and triggers runner GPU lockout if found.
|
|
func (p *RenderProcessor) checkGPUErrorLine(ctx *Context, line string) {
|
|
lower := strings.ToLower(line)
|
|
for _, sub := range gpuErrorSubstrings {
|
|
if strings.Contains(lower, sub) {
|
|
if ctx.OnGPUError != nil {
|
|
ctx.OnGPUError()
|
|
}
|
|
ctx.Warn(fmt.Sprintf("GPU error detected in log (%q); GPU disabled for subsequent jobs", sub))
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process executes a render task.
|
|
func (p *RenderProcessor) Process(ctx *Context) error {
|
|
if err := ctx.CheckCancelled(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s",
|
|
ctx.JobID, ctx.Frame, ctx.FrameEnd, ctx.GetOutputFormat()))
|
|
log.Printf("Processing task %d: job %d, frames %d-%d", ctx.TaskID, ctx.JobID, ctx.Frame, ctx.FrameEnd)
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Starting task: job %d, frame %d, format: %s",
|
|
ctx.JobID, ctx.Frame, ctx.GetOutputFormat()))
|
|
log.Printf("Processing task %d: job %d, frame %d", ctx.TaskID, ctx.JobID, ctx.Frame)
|
|
}
|
|
|
|
// Find .blend file
|
|
blendFile, err := workspace.FindFirstBlendFile(ctx.WorkDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to find blend file: %w", err)
|
|
}
|
|
|
|
// Get Blender binary
|
|
blenderBinary := "blender"
|
|
if version := ctx.GetBlenderVersion(); version != "" {
|
|
ctx.Info(fmt.Sprintf("Job requires Blender %s", version))
|
|
binaryPath, err := ctx.Blender.GetBinaryPath(version)
|
|
if err != nil {
|
|
ctx.Warn(fmt.Sprintf("Could not get Blender %s, using system blender: %v", version, err))
|
|
} else {
|
|
blenderBinary = binaryPath
|
|
ctx.Info(fmt.Sprintf("Using Blender binary: %s", blenderBinary))
|
|
}
|
|
} else {
|
|
ctx.Info("No Blender version specified, using system blender")
|
|
}
|
|
|
|
// Create output directory
|
|
outputDir := filepath.Join(ctx.WorkDir, "output")
|
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
|
return fmt.Errorf("failed to create output directory: %w", err)
|
|
}
|
|
|
|
// Create home directory for Blender inside workspace
|
|
blenderHome := filepath.Join(ctx.WorkDir, "home")
|
|
if err := os.MkdirAll(blenderHome, 0755); err != nil {
|
|
return fmt.Errorf("failed to create Blender home directory: %w", err)
|
|
}
|
|
|
|
// We always render EXR (linear) for VFX accuracy; job output_format is the deliverable (EXR sequence or video).
|
|
renderFormat := "EXR"
|
|
|
|
if ctx.ShouldForceCPU() {
|
|
if ctx.ForceCPURendering {
|
|
ctx.Info("Runner compatibility flag is enabled: forcing CPU rendering for this job")
|
|
} else if ctx.GPUDetectionFailed {
|
|
ctx.Info("GPU backend detection failed at startup—we could not determine available GPU backends, so rendering will use CPU to avoid compatibility issues")
|
|
} else {
|
|
ctx.Info("GPU lockout active: using CPU rendering only")
|
|
}
|
|
}
|
|
|
|
// Create render script
|
|
if err := p.createRenderScript(ctx, renderFormat); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Render
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Starting Blender render for frames %d-%d...", ctx.Frame, ctx.FrameEnd))
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Starting Blender render for frame %d...", ctx.Frame))
|
|
}
|
|
if err := p.runBlender(ctx, blenderBinary, blendFile, outputDir, renderFormat, blenderHome); err != nil {
|
|
if errors.Is(err, ErrJobCancelled) {
|
|
ctx.Warn("Render stopped because job was cancelled")
|
|
return err
|
|
}
|
|
ctx.Error(fmt.Sprintf("Blender render failed: %v", err))
|
|
return err
|
|
}
|
|
|
|
// Verify output (range or single frame)
|
|
if err := p.verifyOutputRange(ctx, outputDir, renderFormat); err != nil {
|
|
ctx.Error(fmt.Sprintf("Output verification failed: %v", err))
|
|
return err
|
|
}
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Blender render completed for frames %d-%d", ctx.Frame, ctx.FrameEnd))
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Blender render completed for frame %d", ctx.Frame))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *RenderProcessor) createRenderScript(ctx *Context, renderFormat string) error {
|
|
formatFilePath := filepath.Join(ctx.WorkDir, "output_format.txt")
|
|
renderSettingsFilePath := filepath.Join(ctx.WorkDir, "render_settings.json")
|
|
|
|
// Build unhide code conditionally
|
|
unhideCode := ""
|
|
if ctx.ShouldUnhideObjects() {
|
|
unhideCode = scripts.UnhideObjects
|
|
}
|
|
|
|
// Load template and replace placeholders
|
|
scriptContent := scripts.RenderBlenderTemplate
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{UNHIDE_CODE}}", unhideCode)
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{FORMAT_FILE_PATH}}", fmt.Sprintf("%q", formatFilePath))
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{RENDER_SETTINGS_FILE}}", fmt.Sprintf("%q", renderSettingsFilePath))
|
|
|
|
scriptPath := filepath.Join(ctx.WorkDir, "enable_gpu.py")
|
|
if err := os.WriteFile(scriptPath, []byte(scriptContent), 0644); err != nil {
|
|
errMsg := fmt.Sprintf("failed to create GPU enable script: %v", err)
|
|
ctx.Error(errMsg)
|
|
return errors.New(errMsg)
|
|
}
|
|
|
|
// Write EXR to format file so Blender script sets OPEN_EXR (job output_format is for downstream deliverable only).
|
|
ctx.Info("Writing output format 'EXR' to format file")
|
|
if err := os.WriteFile(formatFilePath, []byte("EXR"), 0644); err != nil {
|
|
errMsg := fmt.Sprintf("failed to create format file: %v", err)
|
|
ctx.Error(errMsg)
|
|
return errors.New(errMsg)
|
|
}
|
|
|
|
// Write render settings: merge job metadata with runner force_cpu (GPU lockout)
|
|
var settingsMap map[string]interface{}
|
|
if ctx.Metadata != nil && ctx.Metadata.RenderSettings.EngineSettings != nil {
|
|
raw, err := json.Marshal(ctx.Metadata.RenderSettings)
|
|
if err == nil {
|
|
_ = json.Unmarshal(raw, &settingsMap)
|
|
}
|
|
}
|
|
if settingsMap == nil {
|
|
settingsMap = make(map[string]interface{})
|
|
}
|
|
settingsMap["force_cpu"] = ctx.ShouldForceCPU()
|
|
settingsJSON, err := json.Marshal(settingsMap)
|
|
if err == nil {
|
|
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
|
|
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, outputDir, renderFormat, blenderHome string) error {
|
|
scriptPath := filepath.Join(ctx.WorkDir, "enable_gpu.py")
|
|
|
|
args := []string{"-b", blendFile, "--python", scriptPath}
|
|
if ctx.ShouldEnableExecution() {
|
|
args = append(args, "--enable-autoexec")
|
|
}
|
|
|
|
// Output pattern
|
|
outputPattern := filepath.Join(outputDir, fmt.Sprintf("frame_####.%s", strings.ToLower(renderFormat)))
|
|
outputAbsPattern, _ := filepath.Abs(outputPattern)
|
|
args = append(args, "-o", outputAbsPattern)
|
|
|
|
// Render single frame or range: -f N for one frame, -s start -e end -a for range
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
args = append(args, "-s", fmt.Sprintf("%d", ctx.Frame), "-e", fmt.Sprintf("%d", ctx.FrameEnd), "-a")
|
|
} else {
|
|
args = append(args, "-f", fmt.Sprintf("%d", ctx.Frame))
|
|
}
|
|
|
|
cmd := exec.Command(blenderBinary, args...)
|
|
cmd.Dir = ctx.WorkDir
|
|
|
|
// Set up environment: LD_LIBRARY_PATH for tarball Blender, then custom HOME
|
|
env := os.Environ()
|
|
env = blender.TarballEnv(blenderBinary, env)
|
|
newEnv := make([]string, 0, len(env)+1)
|
|
for _, e := range env {
|
|
if !strings.HasPrefix(e, "HOME=") {
|
|
newEnv = append(newEnv, e)
|
|
}
|
|
}
|
|
newEnv = append(newEnv, fmt.Sprintf("HOME=%s", blenderHome))
|
|
cmd.Env = newEnv
|
|
|
|
// Set up pipes
|
|
stdoutPipe, err := cmd.StdoutPipe()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create stdout pipe: %w", err)
|
|
}
|
|
|
|
stderrPipe, err := cmd.StderrPipe()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create stderr pipe: %w", err)
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
return fmt.Errorf("failed to start blender: %w", err)
|
|
}
|
|
stopMonitor := ctx.StartCancellationMonitor(cmd, "render")
|
|
defer stopMonitor()
|
|
|
|
// Track process
|
|
ctx.Processes.Track(ctx.TaskID, cmd)
|
|
defer ctx.Processes.Untrack(ctx.TaskID)
|
|
|
|
// Stream stdout and watch for GPU error lines (lock out all GPU on any backend error)
|
|
stdoutDone := make(chan bool)
|
|
go func() {
|
|
defer close(stdoutDone)
|
|
scanner := bufio.NewScanner(stdoutPipe)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if line != "" {
|
|
p.checkGPUErrorLine(ctx, line)
|
|
shouldFilter, logLevel := blender.FilterLog(line)
|
|
if !shouldFilter {
|
|
ctx.Log(logLevel, line)
|
|
}
|
|
}
|
|
}
|
|
if err := scanner.Err(); err != nil {
|
|
log.Printf("Error reading stdout: %v", err)
|
|
}
|
|
}()
|
|
|
|
// Stream stderr and watch for GPU error lines
|
|
stderrDone := make(chan bool)
|
|
go func() {
|
|
defer close(stderrDone)
|
|
scanner := bufio.NewScanner(stderrPipe)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if line != "" {
|
|
p.checkGPUErrorLine(ctx, line)
|
|
shouldFilter, logLevel := blender.FilterLog(line)
|
|
if !shouldFilter {
|
|
if logLevel == types.LogLevelInfo {
|
|
logLevel = types.LogLevelWarn
|
|
}
|
|
ctx.Log(logLevel, line)
|
|
}
|
|
}
|
|
}
|
|
if err := scanner.Err(); err != nil {
|
|
log.Printf("Error reading stderr: %v", err)
|
|
}
|
|
}()
|
|
|
|
// Wait for completion
|
|
err = cmd.Wait()
|
|
<-stdoutDone
|
|
<-stderrDone
|
|
|
|
if err != nil {
|
|
if cancelled, checkErr := ctx.IsJobCancelled(); checkErr == nil && cancelled {
|
|
return ErrJobCancelled
|
|
}
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
if exitErr.ExitCode() == 137 {
|
|
return errors.New("Blender was killed due to excessive memory usage (OOM)")
|
|
}
|
|
}
|
|
return fmt.Errorf("blender failed: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// verifyOutputRange checks that output files exist for the task's frame range (first and last at minimum).
|
|
func (p *RenderProcessor) verifyOutputRange(ctx *Context, outputDir, renderFormat string) error {
|
|
entries, err := os.ReadDir(outputDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read output directory: %w", err)
|
|
}
|
|
|
|
ctx.Info("Checking output directory for files...")
|
|
ext := strings.ToLower(renderFormat)
|
|
|
|
// Check first and last frame in range (minimum required for range; single frame = one check)
|
|
framesToCheck := []int{ctx.Frame}
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
framesToCheck = append(framesToCheck, ctx.FrameEnd)
|
|
}
|
|
for _, frame := range framesToCheck {
|
|
found := false
|
|
// Try frame_0001.ext, frame_1.ext, 0001.ext
|
|
for _, name := range []string{
|
|
fmt.Sprintf("frame_%04d.%s", frame, ext),
|
|
fmt.Sprintf("frame_%d.%s", frame, ext),
|
|
fmt.Sprintf("%04d.%s", frame, ext),
|
|
} {
|
|
if _, err := os.Stat(filepath.Join(outputDir, name)); err == nil {
|
|
found = true
|
|
ctx.Info(fmt.Sprintf("Found output file: %s", name))
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
// Search entries for this frame number
|
|
frameStr := fmt.Sprintf("%d", frame)
|
|
frameStrPadded := fmt.Sprintf("%04d", frame)
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
continue
|
|
}
|
|
fileName := entry.Name()
|
|
if strings.Contains(fileName, "%04d") || strings.Contains(fileName, "%d") {
|
|
continue
|
|
}
|
|
if (strings.Contains(fileName, frameStrPadded) ||
|
|
strings.Contains(fileName, frameStr)) && strings.HasSuffix(strings.ToLower(fileName), ext) {
|
|
found = true
|
|
ctx.Info(fmt.Sprintf("Found output file: %s", fileName))
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
fileList := []string{}
|
|
for _, e := range entries {
|
|
if !e.IsDir() {
|
|
fileList = append(fileList, e.Name())
|
|
}
|
|
}
|
|
return fmt.Errorf("output file for frame %d not found; files in output directory: %v", frame, fileList)
|
|
}
|
|
}
|
|
return nil
|
|
}
|