- Added functionality to detect GPU backends (HIP and NVIDIA) during runner registration, enhancing compatibility for Blender versions below 4.x. - Introduced a new method, DetectAndStoreGPUBackends, to download the latest Blender and run a detection script, storing the results for future rendering decisions. - Updated rendering logic to force CPU rendering when HIP is detected on systems with Blender < 4.x, ensuring stability and compatibility. - Enhanced the Context structure to include flags for GPU detection status, improving error handling and rendering decisions based on GPU availability.
380 lines
12 KiB
Go
380 lines
12 KiB
Go
package tasks
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"jiggablend/internal/runner/blender"
|
|
"jiggablend/internal/runner/workspace"
|
|
"jiggablend/pkg/scripts"
|
|
"jiggablend/pkg/types"
|
|
)
|
|
|
|
// RenderProcessor handles render tasks.
|
|
type RenderProcessor struct{}
|
|
|
|
// NewRenderProcessor creates a new render processor.
|
|
func NewRenderProcessor() *RenderProcessor {
|
|
return &RenderProcessor{}
|
|
}
|
|
|
|
// gpuErrorSubstrings are log line substrings that indicate a GPU backend error (matched case-insensitively); any match triggers full GPU lockout.
|
|
var gpuErrorSubstrings = []string{
|
|
"illegal address in hip", // HIP (AMD) e.g. "Illegal address in HIP" or "Illegal address in hip"
|
|
"hiperror", // hipError* codes
|
|
"hip error",
|
|
"cuda error",
|
|
"cuerror",
|
|
"optix error",
|
|
"oneapi error",
|
|
"opencl error",
|
|
}
|
|
|
|
// checkGPUErrorLine checks a log line for GPU error indicators and triggers runner GPU lockout if found.
|
|
func (p *RenderProcessor) checkGPUErrorLine(ctx *Context, line string) {
|
|
lower := strings.ToLower(line)
|
|
for _, sub := range gpuErrorSubstrings {
|
|
if strings.Contains(lower, sub) {
|
|
if ctx.OnGPUError != nil {
|
|
ctx.OnGPUError()
|
|
}
|
|
ctx.Warn(fmt.Sprintf("GPU error detected in log (%q); GPU disabled for subsequent jobs", sub))
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process executes a render task.
|
|
func (p *RenderProcessor) Process(ctx *Context) error {
|
|
if err := ctx.CheckCancelled(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s",
|
|
ctx.JobID, ctx.Frame, ctx.FrameEnd, ctx.GetOutputFormat()))
|
|
log.Printf("Processing task %d: job %d, frames %d-%d", ctx.TaskID, ctx.JobID, ctx.Frame, ctx.FrameEnd)
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Starting task: job %d, frame %d, format: %s",
|
|
ctx.JobID, ctx.Frame, ctx.GetOutputFormat()))
|
|
log.Printf("Processing task %d: job %d, frame %d", ctx.TaskID, ctx.JobID, ctx.Frame)
|
|
}
|
|
|
|
// Find .blend file
|
|
blendFile, err := workspace.FindFirstBlendFile(ctx.WorkDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to find blend file: %w", err)
|
|
}
|
|
|
|
// Get Blender binary
|
|
blenderBinary := "blender"
|
|
if version := ctx.GetBlenderVersion(); version != "" {
|
|
ctx.Info(fmt.Sprintf("Job requires Blender %s", version))
|
|
binaryPath, err := ctx.Blender.GetBinaryPath(version)
|
|
if err != nil {
|
|
ctx.Warn(fmt.Sprintf("Could not get Blender %s, using system blender: %v", version, err))
|
|
} else {
|
|
blenderBinary = binaryPath
|
|
ctx.Info(fmt.Sprintf("Using Blender binary: %s", blenderBinary))
|
|
}
|
|
} else {
|
|
ctx.Info("No Blender version specified, using system blender")
|
|
}
|
|
|
|
// Create output directory
|
|
outputDir := filepath.Join(ctx.WorkDir, "output")
|
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
|
return fmt.Errorf("failed to create output directory: %w", err)
|
|
}
|
|
|
|
// Create home directory for Blender inside workspace
|
|
blenderHome := filepath.Join(ctx.WorkDir, "home")
|
|
if err := os.MkdirAll(blenderHome, 0755); err != nil {
|
|
return fmt.Errorf("failed to create Blender home directory: %w", err)
|
|
}
|
|
|
|
// We always render EXR (linear) for VFX accuracy; job output_format is the deliverable (EXR sequence or video).
|
|
renderFormat := "EXR"
|
|
|
|
if ctx.ShouldForceCPU() {
|
|
v := ctx.GetBlenderVersion()
|
|
major := parseBlenderMajor(v)
|
|
isPre4 := v != "" && major >= 0 && major < 4
|
|
if ctx.GPUDetectionFailed {
|
|
ctx.Info("GPU backend detection failed at startup—we could not determine whether this machine has HIP (AMD) or NVIDIA GPUs, so rendering will use CPU to avoid compatibility issues")
|
|
} else if isPre4 && ctx.HasHIP {
|
|
ctx.Info("Blender < 4.x has no official HIP support: using CPU rendering only")
|
|
} else {
|
|
ctx.Info("GPU lockout active: using CPU rendering only")
|
|
}
|
|
}
|
|
|
|
// Create render script
|
|
if err := p.createRenderScript(ctx, renderFormat); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Render
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Starting Blender render for frames %d-%d...", ctx.Frame, ctx.FrameEnd))
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Starting Blender render for frame %d...", ctx.Frame))
|
|
}
|
|
if err := p.runBlender(ctx, blenderBinary, blendFile, outputDir, renderFormat, blenderHome); err != nil {
|
|
if errors.Is(err, ErrJobCancelled) {
|
|
ctx.Warn("Render stopped because job was cancelled")
|
|
return err
|
|
}
|
|
ctx.Error(fmt.Sprintf("Blender render failed: %v", err))
|
|
return err
|
|
}
|
|
|
|
// Verify output (range or single frame)
|
|
if err := p.verifyOutputRange(ctx, outputDir, renderFormat); err != nil {
|
|
ctx.Error(fmt.Sprintf("Output verification failed: %v", err))
|
|
return err
|
|
}
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
ctx.Info(fmt.Sprintf("Blender render completed for frames %d-%d", ctx.Frame, ctx.FrameEnd))
|
|
} else {
|
|
ctx.Info(fmt.Sprintf("Blender render completed for frame %d", ctx.Frame))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *RenderProcessor) createRenderScript(ctx *Context, renderFormat string) error {
|
|
formatFilePath := filepath.Join(ctx.WorkDir, "output_format.txt")
|
|
renderSettingsFilePath := filepath.Join(ctx.WorkDir, "render_settings.json")
|
|
|
|
// Build unhide code conditionally
|
|
unhideCode := ""
|
|
if ctx.ShouldUnhideObjects() {
|
|
unhideCode = scripts.UnhideObjects
|
|
}
|
|
|
|
// Load template and replace placeholders
|
|
scriptContent := scripts.RenderBlenderTemplate
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{UNHIDE_CODE}}", unhideCode)
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{FORMAT_FILE_PATH}}", fmt.Sprintf("%q", formatFilePath))
|
|
scriptContent = strings.ReplaceAll(scriptContent, "{{RENDER_SETTINGS_FILE}}", fmt.Sprintf("%q", renderSettingsFilePath))
|
|
|
|
scriptPath := filepath.Join(ctx.WorkDir, "enable_gpu.py")
|
|
if err := os.WriteFile(scriptPath, []byte(scriptContent), 0644); err != nil {
|
|
errMsg := fmt.Sprintf("failed to create GPU enable script: %v", err)
|
|
ctx.Error(errMsg)
|
|
return errors.New(errMsg)
|
|
}
|
|
|
|
// Write EXR to format file so Blender script sets OPEN_EXR (job output_format is for downstream deliverable only).
|
|
ctx.Info("Writing output format 'EXR' to format file")
|
|
if err := os.WriteFile(formatFilePath, []byte("EXR"), 0644); err != nil {
|
|
errMsg := fmt.Sprintf("failed to create format file: %v", err)
|
|
ctx.Error(errMsg)
|
|
return errors.New(errMsg)
|
|
}
|
|
|
|
// Write render settings: merge job metadata with runner force_cpu (GPU lockout)
|
|
var settingsMap map[string]interface{}
|
|
if ctx.Metadata != nil && ctx.Metadata.RenderSettings.EngineSettings != nil {
|
|
raw, err := json.Marshal(ctx.Metadata.RenderSettings)
|
|
if err == nil {
|
|
_ = json.Unmarshal(raw, &settingsMap)
|
|
}
|
|
}
|
|
if settingsMap == nil {
|
|
settingsMap = make(map[string]interface{})
|
|
}
|
|
settingsMap["force_cpu"] = ctx.ShouldForceCPU()
|
|
settingsJSON, err := json.Marshal(settingsMap)
|
|
if err == nil {
|
|
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
|
|
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, outputDir, renderFormat, blenderHome string) error {
|
|
scriptPath := filepath.Join(ctx.WorkDir, "enable_gpu.py")
|
|
|
|
args := []string{"-b", blendFile, "--python", scriptPath}
|
|
if ctx.ShouldEnableExecution() {
|
|
args = append(args, "--enable-autoexec")
|
|
}
|
|
|
|
// Output pattern
|
|
outputPattern := filepath.Join(outputDir, fmt.Sprintf("frame_####.%s", strings.ToLower(renderFormat)))
|
|
outputAbsPattern, _ := filepath.Abs(outputPattern)
|
|
args = append(args, "-o", outputAbsPattern)
|
|
|
|
// Render single frame or range: -f N for one frame, -s start -e end -a for range
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
args = append(args, "-s", fmt.Sprintf("%d", ctx.Frame), "-e", fmt.Sprintf("%d", ctx.FrameEnd), "-a")
|
|
} else {
|
|
args = append(args, "-f", fmt.Sprintf("%d", ctx.Frame))
|
|
}
|
|
|
|
cmd := exec.Command(blenderBinary, args...)
|
|
cmd.Dir = ctx.WorkDir
|
|
|
|
// Set up environment: LD_LIBRARY_PATH for tarball Blender, then custom HOME
|
|
env := os.Environ()
|
|
env = blender.TarballEnv(blenderBinary, env)
|
|
newEnv := make([]string, 0, len(env)+1)
|
|
for _, e := range env {
|
|
if !strings.HasPrefix(e, "HOME=") {
|
|
newEnv = append(newEnv, e)
|
|
}
|
|
}
|
|
newEnv = append(newEnv, fmt.Sprintf("HOME=%s", blenderHome))
|
|
cmd.Env = newEnv
|
|
|
|
// Set up pipes
|
|
stdoutPipe, err := cmd.StdoutPipe()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create stdout pipe: %w", err)
|
|
}
|
|
|
|
stderrPipe, err := cmd.StderrPipe()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create stderr pipe: %w", err)
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
return fmt.Errorf("failed to start blender: %w", err)
|
|
}
|
|
stopMonitor := ctx.StartCancellationMonitor(cmd, "render")
|
|
defer stopMonitor()
|
|
|
|
// Track process
|
|
ctx.Processes.Track(ctx.TaskID, cmd)
|
|
defer ctx.Processes.Untrack(ctx.TaskID)
|
|
|
|
// Stream stdout and watch for GPU error lines (lock out all GPU on any backend error)
|
|
stdoutDone := make(chan bool)
|
|
go func() {
|
|
defer close(stdoutDone)
|
|
scanner := bufio.NewScanner(stdoutPipe)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if line != "" {
|
|
p.checkGPUErrorLine(ctx, line)
|
|
shouldFilter, logLevel := blender.FilterLog(line)
|
|
if !shouldFilter {
|
|
ctx.Log(logLevel, line)
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Stream stderr and watch for GPU error lines
|
|
stderrDone := make(chan bool)
|
|
go func() {
|
|
defer close(stderrDone)
|
|
scanner := bufio.NewScanner(stderrPipe)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if line != "" {
|
|
p.checkGPUErrorLine(ctx, line)
|
|
shouldFilter, logLevel := blender.FilterLog(line)
|
|
if !shouldFilter {
|
|
if logLevel == types.LogLevelInfo {
|
|
logLevel = types.LogLevelWarn
|
|
}
|
|
ctx.Log(logLevel, line)
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Wait for completion
|
|
err = cmd.Wait()
|
|
<-stdoutDone
|
|
<-stderrDone
|
|
|
|
if err != nil {
|
|
if cancelled, checkErr := ctx.IsJobCancelled(); checkErr == nil && cancelled {
|
|
return ErrJobCancelled
|
|
}
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
if exitErr.ExitCode() == 137 {
|
|
return errors.New("Blender was killed due to excessive memory usage (OOM)")
|
|
}
|
|
}
|
|
return fmt.Errorf("blender failed: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// verifyOutputRange checks that output files exist for the task's frame range (first and last at minimum).
|
|
func (p *RenderProcessor) verifyOutputRange(ctx *Context, outputDir, renderFormat string) error {
|
|
entries, err := os.ReadDir(outputDir)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read output directory: %w", err)
|
|
}
|
|
|
|
ctx.Info("Checking output directory for files...")
|
|
ext := strings.ToLower(renderFormat)
|
|
|
|
// Check first and last frame in range (minimum required for range; single frame = one check)
|
|
framesToCheck := []int{ctx.Frame}
|
|
if ctx.FrameEnd > ctx.Frame {
|
|
framesToCheck = append(framesToCheck, ctx.FrameEnd)
|
|
}
|
|
for _, frame := range framesToCheck {
|
|
found := false
|
|
// Try frame_0001.ext, frame_1.ext, 0001.ext
|
|
for _, name := range []string{
|
|
fmt.Sprintf("frame_%04d.%s", frame, ext),
|
|
fmt.Sprintf("frame_%d.%s", frame, ext),
|
|
fmt.Sprintf("%04d.%s", frame, ext),
|
|
} {
|
|
if _, err := os.Stat(filepath.Join(outputDir, name)); err == nil {
|
|
found = true
|
|
ctx.Info(fmt.Sprintf("Found output file: %s", name))
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
// Search entries for this frame number
|
|
frameStr := fmt.Sprintf("%d", frame)
|
|
frameStrPadded := fmt.Sprintf("%04d", frame)
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
continue
|
|
}
|
|
fileName := entry.Name()
|
|
if strings.Contains(fileName, "%04d") || strings.Contains(fileName, "%d") {
|
|
continue
|
|
}
|
|
if (strings.Contains(fileName, frameStrPadded) ||
|
|
strings.Contains(fileName, frameStr)) && strings.HasSuffix(strings.ToLower(fileName), ext) {
|
|
found = true
|
|
ctx.Info(fmt.Sprintf("Found output file: %s", fileName))
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
fileList := []string{}
|
|
for _, e := range entries {
|
|
if !e.IsDir() {
|
|
fileList = append(fileList, e.Name())
|
|
}
|
|
}
|
|
return fmt.Errorf("output file for frame %d not found; files in output directory: %v", frame, fileList)
|
|
}
|
|
}
|
|
return nil
|
|
}
|