Compare commits
6 Commits
0.0.5
...
28cb50492c
| Author | SHA1 | Date | |
|---|---|---|---|
| 28cb50492c | |||
| dc525fbaa4 | |||
| 5303f01f7c | |||
| bc39fd438b | |||
| 4c7f168bce | |||
| 6833bb4013 |
@@ -154,6 +154,9 @@ bin/jiggablend runner --api-key <your-api-key>
|
||||
# With custom options
|
||||
bin/jiggablend runner --manager http://localhost:8080 --name my-runner --api-key <key> --log-file runner.log
|
||||
|
||||
# Hardware compatibility flags (force CPU + disable HIPRT)
|
||||
bin/jiggablend runner --api-key <key> --force-cpu-rendering --disable-hiprt
|
||||
|
||||
# Using environment variables
|
||||
JIGGABLEND_MANAGER=http://localhost:8080 JIGGABLEND_API_KEY=<key> bin/jiggablend runner
|
||||
```
|
||||
|
||||
@@ -37,6 +37,8 @@ func init() {
|
||||
runnerCmd.Flags().String("log-level", "info", "Log level (debug, info, warn, error)")
|
||||
runnerCmd.Flags().BoolP("verbose", "v", false, "Enable verbose logging (same as --log-level=debug)")
|
||||
runnerCmd.Flags().Duration("poll-interval", 5*time.Second, "Job polling interval")
|
||||
runnerCmd.Flags().Bool("force-cpu-rendering", false, "Force CPU rendering for all jobs (disables GPU rendering)")
|
||||
runnerCmd.Flags().Bool("disable-hiprt", false, "Disable HIPRT acceleration in Blender Cycles")
|
||||
|
||||
// Bind flags to viper with JIGGABLEND_ prefix
|
||||
runnerViper.SetEnvPrefix("JIGGABLEND")
|
||||
@@ -51,6 +53,8 @@ func init() {
|
||||
runnerViper.BindPFlag("log_level", runnerCmd.Flags().Lookup("log-level"))
|
||||
runnerViper.BindPFlag("verbose", runnerCmd.Flags().Lookup("verbose"))
|
||||
runnerViper.BindPFlag("poll_interval", runnerCmd.Flags().Lookup("poll-interval"))
|
||||
runnerViper.BindPFlag("force_cpu_rendering", runnerCmd.Flags().Lookup("force-cpu-rendering"))
|
||||
runnerViper.BindPFlag("disable_hiprt", runnerCmd.Flags().Lookup("disable-hiprt"))
|
||||
}
|
||||
|
||||
func runRunner(cmd *cobra.Command, args []string) {
|
||||
@@ -63,6 +67,8 @@ func runRunner(cmd *cobra.Command, args []string) {
|
||||
logLevel := runnerViper.GetString("log_level")
|
||||
verbose := runnerViper.GetBool("verbose")
|
||||
pollInterval := runnerViper.GetDuration("poll_interval")
|
||||
forceCPURendering := runnerViper.GetBool("force_cpu_rendering")
|
||||
disableHIPRT := runnerViper.GetBool("disable_hiprt")
|
||||
|
||||
var r *runner.Runner
|
||||
|
||||
@@ -118,7 +124,7 @@ func runRunner(cmd *cobra.Command, args []string) {
|
||||
}
|
||||
|
||||
// Create runner
|
||||
r = runner.New(managerURL, name, hostname)
|
||||
r = runner.New(managerURL, name, hostname, forceCPURendering, disableHIPRT)
|
||||
|
||||
// Check for required tools early to fail fast
|
||||
if err := r.CheckRequiredTools(); err != nil {
|
||||
@@ -161,6 +167,9 @@ func runRunner(cmd *cobra.Command, args []string) {
|
||||
runnerID, err = r.Register(apiKey)
|
||||
if err == nil {
|
||||
logger.Infof("Registered runner with ID: %d", runnerID)
|
||||
// Download latest Blender and detect HIP vs NVIDIA so we only force CPU for Blender < 4.x when using HIP
|
||||
logger.Info("Detecting GPU backends (HIP/NVIDIA) for Blender < 4.x policy...")
|
||||
r.DetectAndStoreGPUBackends()
|
||||
break
|
||||
}
|
||||
|
||||
|
||||
113
installer.sh
Normal file
113
installer.sh
Normal file
@@ -0,0 +1,113 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Simple script to install the latest jiggablend binary for Linux AMD64
|
||||
# and create wrapper scripts for manager and runner using test setup
|
||||
|
||||
# Dependencies: curl, jq, tar, sha256sum, sudo (for installation to /usr/local/bin)
|
||||
|
||||
REPO="s1d3sw1ped/jiggablend"
|
||||
API_URL="https://git.s1d3sw1ped.com/api/v1/repos/${REPO}/releases/latest"
|
||||
ASSET_NAME="jiggablend-linux-amd64.tar.gz"
|
||||
|
||||
echo "Fetching latest release information..."
|
||||
RELEASE_JSON=$(curl -s "$API_URL")
|
||||
|
||||
TAG=$(echo "$RELEASE_JSON" | jq -r '.tag_name')
|
||||
echo "Latest version: $TAG"
|
||||
|
||||
ASSET_URL=$(echo "$RELEASE_JSON" | jq -r ".assets[] | select(.name == \"$ASSET_NAME\") | .browser_download_url")
|
||||
if [ -z "$ASSET_URL" ]; then
|
||||
echo "Error: Asset $ASSET_NAME not found in latest release."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CHECKSUM_URL=$(echo "$RELEASE_JSON" | jq -r '.assets[] | select(.name == "checksums.txt") | .browser_download_url')
|
||||
if [ -z "$CHECKSUM_URL" ]; then
|
||||
echo "Error: checksums.txt not found in latest release."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Downloading $ASSET_NAME..."
|
||||
curl -L -o "$ASSET_NAME" "$ASSET_URL"
|
||||
|
||||
echo "Downloading checksums.txt..."
|
||||
curl -L -o "checksums.txt" "$CHECKSUM_URL"
|
||||
|
||||
echo "Verifying checksum..."
|
||||
if ! sha256sum --ignore-missing --quiet -c checksums.txt; then
|
||||
echo "Error: Checksum verification failed."
|
||||
rm -f "$ASSET_NAME" checksums.txt
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Extracting..."
|
||||
tar -xzf "$ASSET_NAME"
|
||||
|
||||
echo "Installing binary to /usr/local/bin (requires sudo)..."
|
||||
sudo install -m 0755 jiggablend /usr/local/bin/
|
||||
|
||||
echo "Creating manager wrapper script..."
|
||||
cat << 'EOF' > jiggablend-manager.sh
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Wrapper to run jiggablend manager with test setup
|
||||
# Run this in a directory where you want the db, storage, and logs
|
||||
|
||||
mkdir -p logs
|
||||
rm -f logs/manager.log
|
||||
|
||||
# Initialize test configuration
|
||||
jiggablend manager config enable localauth
|
||||
jiggablend manager config set fixed-apikey jk_r0_test_key_123456789012345678901234567890 -f -y
|
||||
jiggablend manager config add user test@example.com testpassword --admin -f -y
|
||||
|
||||
# Run manager
|
||||
jiggablend manager -l logs/manager.log
|
||||
EOF
|
||||
chmod +x jiggablend-manager.sh
|
||||
sudo install -m 0755 jiggablend-manager.sh /usr/local/bin/jiggablend-manager
|
||||
rm -f jiggablend-manager.sh
|
||||
|
||||
echo "Creating runner wrapper script..."
|
||||
cat << 'EOF' > jiggablend-runner.sh
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Wrapper to run jiggablend runner with test setup
|
||||
# Usage: jiggablend-runner [MANAGER_URL] [RUNNER_FLAGS...]
|
||||
# Default MANAGER_URL: http://localhost:8080
|
||||
# Run this in a directory where you want the logs
|
||||
|
||||
MANAGER_URL="http://localhost:8080"
|
||||
if [[ $# -gt 0 && "$1" != -* ]]; then
|
||||
MANAGER_URL="$1"
|
||||
shift
|
||||
fi
|
||||
|
||||
EXTRA_ARGS=("$@")
|
||||
|
||||
mkdir -p logs
|
||||
rm -f logs/runner.log
|
||||
|
||||
# Run runner
|
||||
jiggablend runner -l logs/runner.log --api-key=jk_r0_test_key_123456789012345678901234567890 --manager "$MANAGER_URL" "${EXTRA_ARGS[@]}"
|
||||
EOF
|
||||
chmod +x jiggablend-runner.sh
|
||||
sudo install -m 0755 jiggablend-runner.sh /usr/local/bin/jiggablend-runner
|
||||
rm -f jiggablend-runner.sh
|
||||
|
||||
echo "Cleaning up..."
|
||||
rm -f "$ASSET_NAME" checksums.txt jiggablend
|
||||
|
||||
echo "Installation complete!"
|
||||
echo "Binary: jiggablend"
|
||||
echo "Wrappers: jiggablend-manager, jiggablend-runner"
|
||||
echo "Run 'jiggablend-manager' to start the manager with test config."
|
||||
echo "Run 'jiggablend-runner [url] [runner flags...]' to start the runner."
|
||||
echo "Example: jiggablend-runner http://your-manager:8080 --force-cpu-rendering --disable-hiprt"
|
||||
echo "Note: Depending on whether you're running the manager or runner, additional dependencies like Blender, ImageMagick, or FFmpeg may be required. See the project README for details."
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"time"
|
||||
|
||||
authpkg "jiggablend/internal/auth"
|
||||
"jiggablend/internal/runner/blender"
|
||||
"jiggablend/pkg/executils"
|
||||
"jiggablend/pkg/scripts"
|
||||
"jiggablend/pkg/types"
|
||||
@@ -2036,12 +2037,13 @@ func (s *Manager) runBlenderMetadataExtraction(blendFile, workDir, blenderVersio
|
||||
}
|
||||
}
|
||||
|
||||
// Execute Blender using executils
|
||||
// Execute Blender using executils (set LD_LIBRARY_PATH for tarball installs)
|
||||
runEnv := blender.TarballEnv(blenderBinary, os.Environ())
|
||||
result, err := executils.RunCommand(
|
||||
blenderBinary,
|
||||
[]string{"-b", blendFileRel, "--python", "extract_metadata.py"},
|
||||
workDir,
|
||||
nil, // inherit environment
|
||||
runEnv,
|
||||
0, // no task ID for metadata extraction
|
||||
nil, // no process tracker needed
|
||||
)
|
||||
|
||||
@@ -442,3 +442,32 @@ func (m *ManagerClient) DownloadBlender(version string) (io.ReadCloser, error) {
|
||||
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
// blenderVersionsResponse is the response from GET /api/blender/versions.
|
||||
type blenderVersionsResponse struct {
|
||||
Versions []struct {
|
||||
Full string `json:"full"`
|
||||
} `json:"versions"`
|
||||
}
|
||||
|
||||
// GetLatestBlenderVersion returns the latest Blender version string (e.g. "4.2.3") from the manager.
|
||||
// Uses the flat versions list which is newest-first.
|
||||
func (m *ManagerClient) GetLatestBlenderVersion() (string, error) {
|
||||
resp, err := m.Request("GET", "/api/blender/versions", nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to fetch blender versions: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return "", fmt.Errorf("blender versions returned status %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
var out blenderVersionsResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
||||
return "", fmt.Errorf("failed to decode blender versions: %w", err)
|
||||
}
|
||||
if len(out.Versions) == 0 {
|
||||
return "", fmt.Errorf("no blender versions available")
|
||||
}
|
||||
return out.Versions[0].Full, nil
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"jiggablend/internal/runner/api"
|
||||
"jiggablend/internal/runner/workspace"
|
||||
@@ -85,3 +86,42 @@ func (m *Manager) GetBinaryForJob(version string) (string, error) {
|
||||
return m.GetBinaryPath(version)
|
||||
}
|
||||
|
||||
// TarballEnv returns a copy of baseEnv with LD_LIBRARY_PATH set so that a
|
||||
// tarball Blender installation can find its bundled libs (e.g. lib/python3.x).
|
||||
// If blenderBinary is the system "blender" or has no path component, baseEnv is
|
||||
// returned unchanged.
|
||||
func TarballEnv(blenderBinary string, baseEnv []string) []string {
|
||||
if blenderBinary == "" || blenderBinary == "blender" {
|
||||
return baseEnv
|
||||
}
|
||||
if !strings.Contains(blenderBinary, string(os.PathSeparator)) {
|
||||
return baseEnv
|
||||
}
|
||||
blenderDir := filepath.Dir(blenderBinary)
|
||||
libDir := filepath.Join(blenderDir, "lib")
|
||||
ldLib := libDir
|
||||
for _, e := range baseEnv {
|
||||
if strings.HasPrefix(e, "LD_LIBRARY_PATH=") {
|
||||
existing := strings.TrimPrefix(e, "LD_LIBRARY_PATH=")
|
||||
if existing != "" {
|
||||
ldLib = libDir + ":" + existing
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
out := make([]string, 0, len(baseEnv)+1)
|
||||
done := false
|
||||
for _, e := range baseEnv {
|
||||
if strings.HasPrefix(e, "LD_LIBRARY_PATH=") {
|
||||
out = append(out, "LD_LIBRARY_PATH="+ldLib)
|
||||
done = true
|
||||
continue
|
||||
}
|
||||
out = append(out, e)
|
||||
}
|
||||
if !done {
|
||||
out = append(out, "LD_LIBRARY_PATH="+ldLib)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
|
||||
45
internal/runner/blender/detect.go
Normal file
45
internal/runner/blender/detect.go
Normal file
@@ -0,0 +1,45 @@
|
||||
// Package blender: GPU backend detection for HIP vs NVIDIA.
|
||||
package blender
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"jiggablend/pkg/scripts"
|
||||
)
|
||||
|
||||
// DetectGPUBackends runs a minimal Blender script to detect whether HIP (AMD) and/or
|
||||
// NVIDIA (CUDA/OptiX) devices are available. Use this to decide whether to force CPU
|
||||
// for Blender < 4.x (only force when HIP is present, since HIP has no official support pre-4).
|
||||
func DetectGPUBackends(blenderBinary, scriptDir string) (hasHIP, hasNVIDIA bool, err error) {
|
||||
scriptPath := filepath.Join(scriptDir, "detect_gpu_backends.py")
|
||||
if err := os.WriteFile(scriptPath, []byte(scripts.DetectGPUBackends), 0644); err != nil {
|
||||
return false, false, fmt.Errorf("write detection script: %w", err)
|
||||
}
|
||||
defer os.Remove(scriptPath)
|
||||
|
||||
env := TarballEnv(blenderBinary, os.Environ())
|
||||
cmd := exec.Command(blenderBinary, "-b", "--python", scriptPath)
|
||||
cmd.Env = env
|
||||
cmd.Dir = scriptDir
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return false, false, fmt.Errorf("run blender detection: %w (output: %s)", err, string(out))
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(out)))
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
switch line {
|
||||
case "HAS_HIP":
|
||||
hasHIP = true
|
||||
case "HAS_NVIDIA":
|
||||
hasNVIDIA = true
|
||||
}
|
||||
}
|
||||
return hasHIP, hasNVIDIA, scanner.Err()
|
||||
}
|
||||
@@ -40,10 +40,29 @@ type Runner struct {
|
||||
|
||||
fingerprint string
|
||||
fingerprintMu sync.RWMutex
|
||||
|
||||
// gpuLockedOut is set when logs indicate a GPU error (e.g. HIP "Illegal address");
|
||||
// when true, the runner forces CPU rendering for all subsequent jobs.
|
||||
gpuLockedOut bool
|
||||
gpuLockedOutMu sync.RWMutex
|
||||
|
||||
// hasHIP/hasNVIDIA are set at startup by running latest Blender to detect GPU backends.
|
||||
// Used to force CPU only for Blender < 4.x when HIP is present (no official HIP support pre-4).
|
||||
// gpuDetectionFailed is true when detection could not run; we then force CPU for all versions (we could not determine HIP vs NVIDIA).
|
||||
gpuBackendMu sync.RWMutex
|
||||
hasHIP bool
|
||||
hasNVIDIA bool
|
||||
gpuBackendProbed bool
|
||||
gpuDetectionFailed bool
|
||||
|
||||
// forceCPURendering forces CPU rendering for all jobs regardless of metadata/backend detection.
|
||||
forceCPURendering bool
|
||||
// disableHIPRT disables HIPRT acceleration when configuring Cycles HIP devices.
|
||||
disableHIPRT bool
|
||||
}
|
||||
|
||||
// New creates a new runner.
|
||||
func New(managerURL, name, hostname string) *Runner {
|
||||
func New(managerURL, name, hostname string, forceCPURendering, disableHIPRT bool) *Runner {
|
||||
manager := api.NewManagerClient(managerURL)
|
||||
|
||||
r := &Runner{
|
||||
@@ -53,6 +72,9 @@ func New(managerURL, name, hostname string) *Runner {
|
||||
processes: executils.NewProcessTracker(),
|
||||
stopChan: make(chan struct{}),
|
||||
processors: make(map[string]tasks.Processor),
|
||||
|
||||
forceCPURendering: forceCPURendering,
|
||||
disableHIPRT: disableHIPRT,
|
||||
}
|
||||
|
||||
// Generate fingerprint
|
||||
@@ -119,6 +141,58 @@ func (r *Runner) Register(apiKey string) (int64, error) {
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// DetectAndStoreGPUBackends downloads the latest Blender from the manager (if needed),
|
||||
// runs a detection script to see if HIP (AMD) and/or NVIDIA devices are available,
|
||||
// and stores the result. Call after Register. Used so we only force CPU for Blender < 4.x
|
||||
// when the runner has HIP (no official HIP support pre-4); NVIDIA is allowed.
|
||||
func (r *Runner) DetectAndStoreGPUBackends() {
|
||||
r.gpuBackendMu.Lock()
|
||||
defer r.gpuBackendMu.Unlock()
|
||||
if r.gpuBackendProbed {
|
||||
return
|
||||
}
|
||||
latestVer, err := r.manager.GetLatestBlenderVersion()
|
||||
if err != nil {
|
||||
log.Printf("GPU backend detection failed (could not get latest Blender version: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err)
|
||||
r.gpuBackendProbed = true
|
||||
r.gpuDetectionFailed = true
|
||||
return
|
||||
}
|
||||
binaryPath, err := r.blender.GetBinaryPath(latestVer)
|
||||
if err != nil {
|
||||
log.Printf("GPU backend detection failed (could not get Blender binary: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err)
|
||||
r.gpuBackendProbed = true
|
||||
r.gpuDetectionFailed = true
|
||||
return
|
||||
}
|
||||
hasHIP, hasNVIDIA, err := blender.DetectGPUBackends(binaryPath, r.workspace.BaseDir())
|
||||
if err != nil {
|
||||
log.Printf("GPU backend detection failed (script error: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err)
|
||||
r.gpuBackendProbed = true
|
||||
r.gpuDetectionFailed = true
|
||||
return
|
||||
}
|
||||
r.hasHIP = hasHIP
|
||||
r.hasNVIDIA = hasNVIDIA
|
||||
r.gpuBackendProbed = true
|
||||
r.gpuDetectionFailed = false
|
||||
log.Printf("GPU backend detection: HIP=%v NVIDIA=%v (Blender < 4.x will force CPU only when HIP is present)", hasHIP, hasNVIDIA)
|
||||
}
|
||||
|
||||
// HasHIP returns whether the runner detected HIP (AMD) devices. Used to force CPU for Blender < 4.x only when HIP is present.
|
||||
func (r *Runner) HasHIP() bool {
|
||||
r.gpuBackendMu.RLock()
|
||||
defer r.gpuBackendMu.RUnlock()
|
||||
return r.hasHIP
|
||||
}
|
||||
|
||||
// GPUDetectionFailed returns true when startup GPU backend detection could not run or failed. When true, all jobs use CPU because we could not determine HIP vs NVIDIA.
|
||||
func (r *Runner) GPUDetectionFailed() bool {
|
||||
r.gpuBackendMu.RLock()
|
||||
defer r.gpuBackendMu.RUnlock()
|
||||
return r.gpuDetectionFailed
|
||||
}
|
||||
|
||||
// Start starts the job polling loop.
|
||||
func (r *Runner) Start(pollInterval time.Duration) {
|
||||
log.Printf("Starting job polling loop (interval: %v)", pollInterval)
|
||||
@@ -238,6 +312,12 @@ func (r *Runner) executeJob(job *api.NextJobResponse) (err error) {
|
||||
r.blender,
|
||||
r.encoder,
|
||||
r.processes,
|
||||
r.IsGPULockedOut(),
|
||||
r.HasHIP(),
|
||||
r.GPUDetectionFailed(),
|
||||
r.forceCPURendering,
|
||||
r.disableHIPRT,
|
||||
func() { r.SetGPULockedOut(true) },
|
||||
)
|
||||
|
||||
ctx.Info(fmt.Sprintf("Task assignment received (job: %d, type: %s)",
|
||||
@@ -388,3 +468,21 @@ func (r *Runner) GetFingerprint() string {
|
||||
func (r *Runner) GetID() int64 {
|
||||
return r.id
|
||||
}
|
||||
|
||||
// SetGPULockedOut sets whether GPU use is locked out due to a detected GPU error.
|
||||
// When true, the runner will force CPU rendering for all jobs.
|
||||
func (r *Runner) SetGPULockedOut(locked bool) {
|
||||
r.gpuLockedOutMu.Lock()
|
||||
defer r.gpuLockedOutMu.Unlock()
|
||||
r.gpuLockedOut = locked
|
||||
if locked {
|
||||
log.Printf("GPU lockout enabled: GPU rendering disabled for subsequent jobs (CPU only)")
|
||||
}
|
||||
}
|
||||
|
||||
// IsGPULockedOut returns whether GPU use is currently locked out.
|
||||
func (r *Runner) IsGPULockedOut() bool {
|
||||
r.gpuLockedOutMu.RLock()
|
||||
defer r.gpuLockedOutMu.RUnlock()
|
||||
return r.gpuLockedOut
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"jiggablend/pkg/executils"
|
||||
"jiggablend/pkg/types"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
@@ -38,12 +40,26 @@ type Context struct {
|
||||
Blender *blender.Manager
|
||||
Encoder *encoding.Selector
|
||||
Processes *executils.ProcessTracker
|
||||
|
||||
// GPULockedOut is set when the runner has detected a GPU error (e.g. HIP) and disables GPU for all jobs.
|
||||
GPULockedOut bool
|
||||
// HasHIP is true when the runner detected HIP (AMD) devices at startup. Used to force CPU for Blender < 4.x only when HIP is present.
|
||||
HasHIP bool
|
||||
// GPUDetectionFailed is true when startup GPU backend detection could not run; we force CPU for all versions (could not determine HIP vs NVIDIA).
|
||||
GPUDetectionFailed bool
|
||||
// OnGPUError is called when a GPU error line is seen in render logs; typically sets runner GPU lockout.
|
||||
OnGPUError func()
|
||||
// ForceCPURendering is a runner-level override that forces CPU rendering for all jobs.
|
||||
ForceCPURendering bool
|
||||
// DisableHIPRT is a runner-level override that disables HIPRT acceleration in Blender.
|
||||
DisableHIPRT bool
|
||||
}
|
||||
|
||||
// ErrJobCancelled indicates the manager-side job was cancelled during execution.
|
||||
var ErrJobCancelled = errors.New("job cancelled")
|
||||
|
||||
// NewContext creates a new task context. frameEnd should be >= frame; if 0 or less than frame, it is treated as single-frame (frameEnd = frame).
|
||||
// gpuLockedOut is the runner's current GPU lockout state; hasHIP means the runner has HIP (AMD) devices (force CPU for Blender < 4.x only when true); gpuDetectionFailed means detection failed at startup (force CPU for all versions—could not determine HIP vs NVIDIA); onGPUError is called when a GPU error is detected in logs (may be nil).
|
||||
func NewContext(
|
||||
taskID, jobID int64,
|
||||
jobName string,
|
||||
@@ -58,26 +74,38 @@ func NewContext(
|
||||
blenderMgr *blender.Manager,
|
||||
encoder *encoding.Selector,
|
||||
processes *executils.ProcessTracker,
|
||||
gpuLockedOut bool,
|
||||
hasHIP bool,
|
||||
gpuDetectionFailed bool,
|
||||
forceCPURendering bool,
|
||||
disableHIPRT bool,
|
||||
onGPUError func(),
|
||||
) *Context {
|
||||
if frameEnd < frameStart {
|
||||
frameEnd = frameStart
|
||||
}
|
||||
return &Context{
|
||||
TaskID: taskID,
|
||||
JobID: jobID,
|
||||
JobName: jobName,
|
||||
Frame: frameStart,
|
||||
FrameEnd: frameEnd,
|
||||
TaskType: taskType,
|
||||
WorkDir: workDir,
|
||||
JobToken: jobToken,
|
||||
Metadata: metadata,
|
||||
Manager: manager,
|
||||
JobConn: jobConn,
|
||||
Workspace: ws,
|
||||
Blender: blenderMgr,
|
||||
Encoder: encoder,
|
||||
Processes: processes,
|
||||
TaskID: taskID,
|
||||
JobID: jobID,
|
||||
JobName: jobName,
|
||||
Frame: frameStart,
|
||||
FrameEnd: frameEnd,
|
||||
TaskType: taskType,
|
||||
WorkDir: workDir,
|
||||
JobToken: jobToken,
|
||||
Metadata: metadata,
|
||||
Manager: manager,
|
||||
JobConn: jobConn,
|
||||
Workspace: ws,
|
||||
Blender: blenderMgr,
|
||||
Encoder: encoder,
|
||||
Processes: processes,
|
||||
GPULockedOut: gpuLockedOut,
|
||||
HasHIP: hasHIP,
|
||||
GPUDetectionFailed: gpuDetectionFailed,
|
||||
ForceCPURendering: forceCPURendering,
|
||||
DisableHIPRT: disableHIPRT,
|
||||
OnGPUError: onGPUError,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,6 +186,52 @@ func (c *Context) ShouldEnableExecution() bool {
|
||||
return c.Metadata != nil && c.Metadata.EnableExecution != nil && *c.Metadata.EnableExecution
|
||||
}
|
||||
|
||||
// ShouldForceCPU returns true if GPU should be disabled and CPU rendering forced
|
||||
// (runner GPU lockout, GPU detection failed at startup for any version, metadata force_cpu,
|
||||
// or Blender < 4.x when the runner has HIP).
|
||||
func (c *Context) ShouldForceCPU() bool {
|
||||
if c.ForceCPURendering {
|
||||
return true
|
||||
}
|
||||
if c.GPULockedOut {
|
||||
return true
|
||||
}
|
||||
// Detection failed at startup: we could not determine HIP vs NVIDIA, so force CPU for all versions.
|
||||
if c.GPUDetectionFailed {
|
||||
return true
|
||||
}
|
||||
v := c.GetBlenderVersion()
|
||||
major := parseBlenderMajor(v)
|
||||
isPre4 := v != "" && major >= 0 && major < 4
|
||||
// Blender < 4.x: force CPU when runner has HIP (no official HIP support).
|
||||
if isPre4 && c.HasHIP {
|
||||
return true
|
||||
}
|
||||
if c.Metadata != nil && c.Metadata.RenderSettings.EngineSettings != nil {
|
||||
if v, ok := c.Metadata.RenderSettings.EngineSettings["force_cpu"]; ok {
|
||||
if b, ok := v.(bool); ok && b {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// parseBlenderMajor returns the major version number from a string like "4.2.3" or "3.6".
|
||||
// Returns -1 if the version cannot be parsed.
|
||||
func parseBlenderMajor(version string) int {
|
||||
version = strings.TrimSpace(version)
|
||||
if version == "" {
|
||||
return -1
|
||||
}
|
||||
parts := strings.SplitN(version, ".", 2)
|
||||
major, err := strconv.Atoi(parts[0])
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
return major
|
||||
}
|
||||
|
||||
// IsJobCancelled checks whether the manager marked this job as cancelled.
|
||||
func (c *Context) IsJobCancelled() (bool, error) {
|
||||
if c.Manager == nil {
|
||||
|
||||
@@ -25,6 +25,32 @@ func NewRenderProcessor() *RenderProcessor {
|
||||
return &RenderProcessor{}
|
||||
}
|
||||
|
||||
// gpuErrorSubstrings are log line substrings that indicate a GPU backend error (matched case-insensitively); any match triggers full GPU lockout.
|
||||
var gpuErrorSubstrings = []string{
|
||||
"illegal address in hip", // HIP (AMD) e.g. "Illegal address in HIP" or "Illegal address in hip"
|
||||
"hiperror", // hipError* codes
|
||||
"hip error",
|
||||
"cuda error",
|
||||
"cuerror",
|
||||
"optix error",
|
||||
"oneapi error",
|
||||
"opencl error",
|
||||
}
|
||||
|
||||
// checkGPUErrorLine checks a log line for GPU error indicators and triggers runner GPU lockout if found.
|
||||
func (p *RenderProcessor) checkGPUErrorLine(ctx *Context, line string) {
|
||||
lower := strings.ToLower(line)
|
||||
for _, sub := range gpuErrorSubstrings {
|
||||
if strings.Contains(lower, sub) {
|
||||
if ctx.OnGPUError != nil {
|
||||
ctx.OnGPUError()
|
||||
}
|
||||
ctx.Warn(fmt.Sprintf("GPU error detected in log (%q); GPU disabled for subsequent jobs", sub))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process executes a render task.
|
||||
func (p *RenderProcessor) Process(ctx *Context) error {
|
||||
if err := ctx.CheckCancelled(); err != nil {
|
||||
@@ -77,6 +103,21 @@ func (p *RenderProcessor) Process(ctx *Context) error {
|
||||
// We always render EXR (linear) for VFX accuracy; job output_format is the deliverable (EXR sequence or video).
|
||||
renderFormat := "EXR"
|
||||
|
||||
if ctx.ShouldForceCPU() {
|
||||
v := ctx.GetBlenderVersion()
|
||||
major := parseBlenderMajor(v)
|
||||
isPre4 := v != "" && major >= 0 && major < 4
|
||||
if ctx.ForceCPURendering {
|
||||
ctx.Info("Runner compatibility flag is enabled: forcing CPU rendering for this job")
|
||||
} else if ctx.GPUDetectionFailed {
|
||||
ctx.Info("GPU backend detection failed at startup—we could not determine whether this machine has HIP (AMD) or NVIDIA GPUs, so rendering will use CPU to avoid compatibility issues")
|
||||
} else if isPre4 && ctx.HasHIP {
|
||||
ctx.Info("Blender < 4.x has no official HIP support: using CPU rendering only")
|
||||
} else {
|
||||
ctx.Info("GPU lockout active: using CPU rendering only")
|
||||
}
|
||||
}
|
||||
|
||||
// Create render script
|
||||
if err := p.createRenderScript(ctx, renderFormat); err != nil {
|
||||
return err
|
||||
@@ -142,13 +183,23 @@ func (p *RenderProcessor) createRenderScript(ctx *Context, renderFormat string)
|
||||
return errors.New(errMsg)
|
||||
}
|
||||
|
||||
// Write render settings if available
|
||||
// Write render settings: merge job metadata with runner force_cpu (GPU lockout)
|
||||
var settingsMap map[string]interface{}
|
||||
if ctx.Metadata != nil && ctx.Metadata.RenderSettings.EngineSettings != nil {
|
||||
settingsJSON, err := json.Marshal(ctx.Metadata.RenderSettings)
|
||||
raw, err := json.Marshal(ctx.Metadata.RenderSettings)
|
||||
if err == nil {
|
||||
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
|
||||
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
|
||||
}
|
||||
_ = json.Unmarshal(raw, &settingsMap)
|
||||
}
|
||||
}
|
||||
if settingsMap == nil {
|
||||
settingsMap = make(map[string]interface{})
|
||||
}
|
||||
settingsMap["force_cpu"] = ctx.ShouldForceCPU()
|
||||
settingsMap["disable_hiprt"] = ctx.DisableHIPRT
|
||||
settingsJSON, err := json.Marshal(settingsMap)
|
||||
if err == nil {
|
||||
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
|
||||
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,9 +229,9 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
cmd := exec.Command(blenderBinary, args...)
|
||||
cmd.Dir = ctx.WorkDir
|
||||
|
||||
// Set up environment with custom HOME directory
|
||||
// Set up environment: LD_LIBRARY_PATH for tarball Blender, then custom HOME
|
||||
env := os.Environ()
|
||||
// Remove existing HOME if present and add our custom one
|
||||
env = blender.TarballEnv(blenderBinary, env)
|
||||
newEnv := make([]string, 0, len(env)+1)
|
||||
for _, e := range env {
|
||||
if !strings.HasPrefix(e, "HOME=") {
|
||||
@@ -211,7 +262,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
ctx.Processes.Track(ctx.TaskID, cmd)
|
||||
defer ctx.Processes.Untrack(ctx.TaskID)
|
||||
|
||||
// Stream stdout
|
||||
// Stream stdout and watch for GPU error lines (lock out all GPU on any backend error)
|
||||
stdoutDone := make(chan bool)
|
||||
go func() {
|
||||
defer close(stdoutDone)
|
||||
@@ -219,6 +270,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if line != "" {
|
||||
p.checkGPUErrorLine(ctx, line)
|
||||
shouldFilter, logLevel := blender.FilterLog(line)
|
||||
if !shouldFilter {
|
||||
ctx.Log(logLevel, line)
|
||||
@@ -227,7 +279,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
}
|
||||
}()
|
||||
|
||||
// Stream stderr
|
||||
// Stream stderr and watch for GPU error lines
|
||||
stderrDone := make(chan bool)
|
||||
go func() {
|
||||
defer close(stderrDone)
|
||||
@@ -235,6 +287,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if line != "" {
|
||||
p.checkGPUErrorLine(ctx, line)
|
||||
shouldFilter, logLevel := blender.FilterLog(line)
|
||||
if !shouldFilter {
|
||||
if logLevel == types.LogLevelInfo {
|
||||
|
||||
@@ -11,3 +11,6 @@ var UnhideObjects string
|
||||
//go:embed scripts/render_blender.py.template
|
||||
var RenderBlenderTemplate string
|
||||
|
||||
//go:embed scripts/detect_gpu_backends.py
|
||||
var DetectGPUBackends string
|
||||
|
||||
|
||||
39
pkg/scripts/scripts/detect_gpu_backends.py
Normal file
39
pkg/scripts/scripts/detect_gpu_backends.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# Minimal script to detect HIP (AMD) and NVIDIA (CUDA/OptiX) backends for Cycles.
|
||||
# Run with: blender -b --python detect_gpu_backends.py
|
||||
# Prints HAS_HIP and/or HAS_NVIDIA to stdout, one per line.
|
||||
import sys
|
||||
|
||||
def main():
|
||||
try:
|
||||
prefs = bpy.context.preferences
|
||||
if not hasattr(prefs, 'addons') or 'cycles' not in prefs.addons:
|
||||
return
|
||||
cprefs = prefs.addons['cycles'].preferences
|
||||
has_hip = False
|
||||
has_nvidia = False
|
||||
for device_type in ('HIP', 'CUDA', 'OPTIX'):
|
||||
try:
|
||||
cprefs.compute_device_type = device_type
|
||||
cprefs.refresh_devices()
|
||||
devs = []
|
||||
if hasattr(cprefs, 'get_devices'):
|
||||
devs = cprefs.get_devices()
|
||||
elif hasattr(cprefs, 'devices') and cprefs.devices:
|
||||
devs = list(cprefs.devices) if hasattr(cprefs.devices, '__iter__') else [cprefs.devices]
|
||||
if devs:
|
||||
if device_type == 'HIP':
|
||||
has_hip = True
|
||||
if device_type in ('CUDA', 'OPTIX'):
|
||||
has_nvidia = True
|
||||
except Exception:
|
||||
pass
|
||||
if has_hip:
|
||||
print('HAS_HIP', flush=True)
|
||||
if has_nvidia:
|
||||
print('HAS_NVIDIA', flush=True)
|
||||
except Exception as e:
|
||||
print('ERROR', str(e), file=sys.stderr, flush=True)
|
||||
sys.exit(1)
|
||||
|
||||
import bpy
|
||||
main()
|
||||
@@ -175,9 +175,13 @@ if render_settings_override:
|
||||
if current_engine == 'CYCLES':
|
||||
# Check if CPU rendering is forced
|
||||
force_cpu = False
|
||||
disable_hiprt = False
|
||||
if render_settings_override and render_settings_override.get('force_cpu'):
|
||||
force_cpu = render_settings_override.get('force_cpu', False)
|
||||
print("Force CPU rendering is enabled - skipping GPU detection")
|
||||
if render_settings_override and render_settings_override.get('disable_hiprt'):
|
||||
disable_hiprt = render_settings_override.get('disable_hiprt', False)
|
||||
print("Disable HIPRT flag is enabled")
|
||||
|
||||
# Ensure Cycles addon is enabled
|
||||
try:
|
||||
@@ -321,7 +325,16 @@ if current_engine == 'CYCLES':
|
||||
try:
|
||||
if best_device_type == 'HIP':
|
||||
# HIPRT (HIP Ray Tracing) for AMD GPUs
|
||||
if hasattr(cycles_prefs, 'use_hiprt'):
|
||||
if disable_hiprt:
|
||||
if hasattr(cycles_prefs, 'use_hiprt'):
|
||||
cycles_prefs.use_hiprt = False
|
||||
print(f" Disabled HIPRT (HIP Ray Tracing) via runner compatibility flag")
|
||||
elif hasattr(scene.cycles, 'use_hiprt'):
|
||||
scene.cycles.use_hiprt = False
|
||||
print(f" Disabled HIPRT (HIP Ray Tracing) via runner compatibility flag")
|
||||
else:
|
||||
print(f" HIPRT toggle not available on this Blender version")
|
||||
elif hasattr(cycles_prefs, 'use_hiprt'):
|
||||
cycles_prefs.use_hiprt = True
|
||||
print(f" Enabled HIPRT (HIP Ray Tracing) for faster rendering")
|
||||
elif hasattr(scene.cycles, 'use_hiprt'):
|
||||
|
||||
Reference in New Issue
Block a user