- Added functionality to detect GPU backends (HIP and NVIDIA) during runner registration, enhancing compatibility for Blender versions below 4.x. - Introduced a new method, DetectAndStoreGPUBackends, to download the latest Blender and run a detection script, storing the results for future rendering decisions. - Updated rendering logic to force CPU rendering when HIP is detected on systems with Blender < 4.x, ensuring stability and compatibility. - Enhanced the Context structure to include flags for GPU detection status, improving error handling and rendering decisions based on GPU availability.
286 lines
8.0 KiB
Go
286 lines
8.0 KiB
Go
// Package tasks provides task processing implementations.
|
|
package tasks
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"jiggablend/internal/runner/api"
|
|
"jiggablend/internal/runner/blender"
|
|
"jiggablend/internal/runner/encoding"
|
|
"jiggablend/internal/runner/workspace"
|
|
"jiggablend/pkg/executils"
|
|
"jiggablend/pkg/types"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// Processor handles a specific task type.
|
|
type Processor interface {
|
|
Process(ctx *Context) error
|
|
}
|
|
|
|
// Context provides task execution context.
|
|
type Context struct {
|
|
TaskID int64
|
|
JobID int64
|
|
JobName string
|
|
Frame int // frame start (inclusive); kept for backward compat
|
|
FrameEnd int // frame end (inclusive); same as Frame for single-frame
|
|
TaskType string
|
|
WorkDir string
|
|
JobToken string
|
|
Metadata *types.BlendMetadata
|
|
|
|
Manager *api.ManagerClient
|
|
JobConn *api.JobConnection
|
|
Workspace *workspace.Manager
|
|
Blender *blender.Manager
|
|
Encoder *encoding.Selector
|
|
Processes *executils.ProcessTracker
|
|
|
|
// GPULockedOut is set when the runner has detected a GPU error (e.g. HIP) and disables GPU for all jobs.
|
|
GPULockedOut bool
|
|
// HasHIP is true when the runner detected HIP (AMD) devices at startup. Used to force CPU for Blender < 4.x only when HIP is present.
|
|
HasHIP bool
|
|
// GPUDetectionFailed is true when startup GPU backend detection could not run; we force CPU for all versions (could not determine HIP vs NVIDIA).
|
|
GPUDetectionFailed bool
|
|
// OnGPUError is called when a GPU error line is seen in render logs; typically sets runner GPU lockout.
|
|
OnGPUError func()
|
|
}
|
|
|
|
// ErrJobCancelled indicates the manager-side job was cancelled during execution.
|
|
var ErrJobCancelled = errors.New("job cancelled")
|
|
|
|
// NewContext creates a new task context. frameEnd should be >= frame; if 0 or less than frame, it is treated as single-frame (frameEnd = frame).
|
|
// gpuLockedOut is the runner's current GPU lockout state; hasHIP means the runner has HIP (AMD) devices (force CPU for Blender < 4.x only when true); gpuDetectionFailed means detection failed at startup (force CPU for all versions—could not determine HIP vs NVIDIA); onGPUError is called when a GPU error is detected in logs (may be nil).
|
|
func NewContext(
|
|
taskID, jobID int64,
|
|
jobName string,
|
|
frameStart, frameEnd int,
|
|
taskType string,
|
|
workDir string,
|
|
jobToken string,
|
|
metadata *types.BlendMetadata,
|
|
manager *api.ManagerClient,
|
|
jobConn *api.JobConnection,
|
|
ws *workspace.Manager,
|
|
blenderMgr *blender.Manager,
|
|
encoder *encoding.Selector,
|
|
processes *executils.ProcessTracker,
|
|
gpuLockedOut bool,
|
|
hasHIP bool,
|
|
gpuDetectionFailed bool,
|
|
onGPUError func(),
|
|
) *Context {
|
|
if frameEnd < frameStart {
|
|
frameEnd = frameStart
|
|
}
|
|
return &Context{
|
|
TaskID: taskID,
|
|
JobID: jobID,
|
|
JobName: jobName,
|
|
Frame: frameStart,
|
|
FrameEnd: frameEnd,
|
|
TaskType: taskType,
|
|
WorkDir: workDir,
|
|
JobToken: jobToken,
|
|
Metadata: metadata,
|
|
Manager: manager,
|
|
JobConn: jobConn,
|
|
Workspace: ws,
|
|
Blender: blenderMgr,
|
|
Encoder: encoder,
|
|
Processes: processes,
|
|
GPULockedOut: gpuLockedOut,
|
|
HasHIP: hasHIP,
|
|
GPUDetectionFailed: gpuDetectionFailed,
|
|
OnGPUError: onGPUError,
|
|
}
|
|
}
|
|
|
|
// Log sends a log entry to the manager.
|
|
func (c *Context) Log(level types.LogLevel, message string) {
|
|
if c.JobConn != nil {
|
|
c.JobConn.Log(c.TaskID, level, message)
|
|
}
|
|
}
|
|
|
|
// Info logs an info message.
|
|
func (c *Context) Info(message string) {
|
|
c.Log(types.LogLevelInfo, message)
|
|
}
|
|
|
|
// Warn logs a warning message.
|
|
func (c *Context) Warn(message string) {
|
|
c.Log(types.LogLevelWarn, message)
|
|
}
|
|
|
|
// Error logs an error message.
|
|
func (c *Context) Error(message string) {
|
|
c.Log(types.LogLevelError, message)
|
|
}
|
|
|
|
// Progress sends a progress update.
|
|
func (c *Context) Progress(progress float64) {
|
|
if c.JobConn != nil {
|
|
c.JobConn.Progress(c.TaskID, progress)
|
|
}
|
|
}
|
|
|
|
// OutputUploaded notifies that an output file was uploaded.
|
|
func (c *Context) OutputUploaded(fileName string) {
|
|
if c.JobConn != nil {
|
|
c.JobConn.OutputUploaded(c.TaskID, fileName)
|
|
}
|
|
}
|
|
|
|
// Complete sends task completion.
|
|
func (c *Context) Complete(success bool, errorMsg error) {
|
|
if c.JobConn != nil {
|
|
c.JobConn.Complete(c.TaskID, success, errorMsg)
|
|
}
|
|
}
|
|
|
|
// GetOutputFormat returns the output format from metadata or default.
|
|
func (c *Context) GetOutputFormat() string {
|
|
if c.Metadata != nil && c.Metadata.RenderSettings.OutputFormat != "" {
|
|
return c.Metadata.RenderSettings.OutputFormat
|
|
}
|
|
return "PNG"
|
|
}
|
|
|
|
// GetFrameRate returns the frame rate from metadata or default.
|
|
func (c *Context) GetFrameRate() float64 {
|
|
if c.Metadata != nil && c.Metadata.RenderSettings.FrameRate > 0 {
|
|
return c.Metadata.RenderSettings.FrameRate
|
|
}
|
|
return 24.0
|
|
}
|
|
|
|
// GetBlenderVersion returns the Blender version from metadata.
|
|
func (c *Context) GetBlenderVersion() string {
|
|
if c.Metadata != nil {
|
|
return c.Metadata.BlenderVersion
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// ShouldUnhideObjects returns whether to unhide objects.
|
|
func (c *Context) ShouldUnhideObjects() bool {
|
|
return c.Metadata != nil && c.Metadata.UnhideObjects != nil && *c.Metadata.UnhideObjects
|
|
}
|
|
|
|
// ShouldEnableExecution returns whether to enable auto-execution.
|
|
func (c *Context) ShouldEnableExecution() bool {
|
|
return c.Metadata != nil && c.Metadata.EnableExecution != nil && *c.Metadata.EnableExecution
|
|
}
|
|
|
|
// ShouldForceCPU returns true if GPU should be disabled and CPU rendering forced
|
|
// (runner GPU lockout, GPU detection failed at startup for any version, metadata force_cpu,
|
|
// or Blender < 4.x when the runner has HIP).
|
|
func (c *Context) ShouldForceCPU() bool {
|
|
if c.GPULockedOut {
|
|
return true
|
|
}
|
|
// Detection failed at startup: we could not determine HIP vs NVIDIA, so force CPU for all versions.
|
|
if c.GPUDetectionFailed {
|
|
return true
|
|
}
|
|
v := c.GetBlenderVersion()
|
|
major := parseBlenderMajor(v)
|
|
isPre4 := v != "" && major >= 0 && major < 4
|
|
// Blender < 4.x: force CPU when runner has HIP (no official HIP support).
|
|
if isPre4 && c.HasHIP {
|
|
return true
|
|
}
|
|
if c.Metadata != nil && c.Metadata.RenderSettings.EngineSettings != nil {
|
|
if v, ok := c.Metadata.RenderSettings.EngineSettings["force_cpu"]; ok {
|
|
if b, ok := v.(bool); ok && b {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// parseBlenderMajor returns the major version number from a string like "4.2.3" or "3.6".
|
|
// Returns -1 if the version cannot be parsed.
|
|
func parseBlenderMajor(version string) int {
|
|
version = strings.TrimSpace(version)
|
|
if version == "" {
|
|
return -1
|
|
}
|
|
parts := strings.SplitN(version, ".", 2)
|
|
major, err := strconv.Atoi(parts[0])
|
|
if err != nil {
|
|
return -1
|
|
}
|
|
return major
|
|
}
|
|
|
|
// IsJobCancelled checks whether the manager marked this job as cancelled.
|
|
func (c *Context) IsJobCancelled() (bool, error) {
|
|
if c.Manager == nil {
|
|
return false, nil
|
|
}
|
|
status, err := c.Manager.GetJobStatus(c.JobID)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return status == types.JobStatusCancelled, nil
|
|
}
|
|
|
|
// CheckCancelled returns ErrJobCancelled if the job was cancelled.
|
|
func (c *Context) CheckCancelled() error {
|
|
cancelled, err := c.IsJobCancelled()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to check job status: %w", err)
|
|
}
|
|
if cancelled {
|
|
return ErrJobCancelled
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// StartCancellationMonitor polls manager status and kills cmd if job is cancelled.
|
|
// Caller must invoke returned stop function when cmd exits.
|
|
func (c *Context) StartCancellationMonitor(cmd *exec.Cmd, taskLabel string) func() {
|
|
stop := make(chan struct{})
|
|
var once sync.Once
|
|
|
|
go func() {
|
|
ticker := time.NewTicker(2 * time.Second)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-stop:
|
|
return
|
|
case <-ticker.C:
|
|
cancelled, err := c.IsJobCancelled()
|
|
if err != nil {
|
|
c.Warn(fmt.Sprintf("Could not check cancellation for %s task: %v", taskLabel, err))
|
|
continue
|
|
}
|
|
if !cancelled {
|
|
continue
|
|
}
|
|
c.Warn(fmt.Sprintf("Job %d was cancelled, stopping %s task early", c.JobID, taskLabel))
|
|
if cmd != nil && cmd.Process != nil {
|
|
_ = cmd.Process.Kill()
|
|
}
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
return func() {
|
|
once.Do(func() {
|
|
close(stop)
|
|
})
|
|
}
|
|
}
|