Compare commits
3 Commits
0.0.5
...
bc39fd438b
| Author | SHA1 | Date | |
|---|---|---|---|
| bc39fd438b | |||
| 4c7f168bce | |||
| 6833bb4013 |
106
installer.sh
Normal file
106
installer.sh
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Simple script to install the latest jiggablend binary for Linux AMD64
|
||||
# and create wrapper scripts for manager and runner using test setup
|
||||
|
||||
# Dependencies: curl, jq, tar, sha256sum, sudo (for installation to /usr/local/bin)
|
||||
|
||||
REPO="s1d3sw1ped/jiggablend"
|
||||
API_URL="https://git.s1d3sw1ped.com/api/v1/repos/${REPO}/releases/latest"
|
||||
ASSET_NAME="jiggablend-linux-amd64.tar.gz"
|
||||
|
||||
echo "Fetching latest release information..."
|
||||
RELEASE_JSON=$(curl -s "$API_URL")
|
||||
|
||||
TAG=$(echo "$RELEASE_JSON" | jq -r '.tag_name')
|
||||
echo "Latest version: $TAG"
|
||||
|
||||
ASSET_URL=$(echo "$RELEASE_JSON" | jq -r ".assets[] | select(.name == \"$ASSET_NAME\") | .browser_download_url")
|
||||
if [ -z "$ASSET_URL" ]; then
|
||||
echo "Error: Asset $ASSET_NAME not found in latest release."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CHECKSUM_URL=$(echo "$RELEASE_JSON" | jq -r '.assets[] | select(.name == "checksums.txt") | .browser_download_url')
|
||||
if [ -z "$CHECKSUM_URL" ]; then
|
||||
echo "Error: checksums.txt not found in latest release."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Downloading $ASSET_NAME..."
|
||||
curl -L -o "$ASSET_NAME" "$ASSET_URL"
|
||||
|
||||
echo "Downloading checksums.txt..."
|
||||
curl -L -o "checksums.txt" "$CHECKSUM_URL"
|
||||
|
||||
echo "Verifying checksum..."
|
||||
if ! sha256sum --ignore-missing --quiet -c checksums.txt; then
|
||||
echo "Error: Checksum verification failed."
|
||||
rm -f "$ASSET_NAME" checksums.txt
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Extracting..."
|
||||
tar -xzf "$ASSET_NAME"
|
||||
|
||||
echo "Installing binary to /usr/local/bin (requires sudo)..."
|
||||
sudo install -m 0755 jiggablend /usr/local/bin/
|
||||
|
||||
echo "Creating manager wrapper script..."
|
||||
cat << 'EOF' > jiggablend-manager.sh
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Wrapper to run jiggablend manager with test setup
|
||||
# Run this in a directory where you want the db, storage, and logs
|
||||
|
||||
mkdir -p logs
|
||||
rm -f logs/manager.log
|
||||
|
||||
# Initialize test configuration
|
||||
jiggablend manager config enable localauth
|
||||
jiggablend manager config set fixed-apikey jk_r0_test_key_123456789012345678901234567890 -f -y
|
||||
jiggablend manager config add user test@example.com testpassword --admin -f -y
|
||||
|
||||
# Run manager
|
||||
jiggablend manager -l logs/manager.log
|
||||
EOF
|
||||
chmod +x jiggablend-manager.sh
|
||||
sudo install -m 0755 jiggablend-manager.sh /usr/local/bin/jiggablend-manager
|
||||
rm -f jiggablend-manager.sh
|
||||
|
||||
echo "Creating runner wrapper script..."
|
||||
cat << 'EOF' > jiggablend-runner.sh
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Wrapper to run jiggablend runner with test setup
|
||||
# Usage: jiggablend-runner [MANAGER_URL]
|
||||
# Default MANAGER_URL: http://localhost:8080
|
||||
# Run this in a directory where you want the logs
|
||||
|
||||
MANAGER_URL="${1:-http://localhost:8080}"
|
||||
|
||||
mkdir -p logs
|
||||
rm -f logs/runner.log
|
||||
|
||||
# Run runner
|
||||
jiggablend runner -l logs/runner.log --api-key=jk_r0_test_key_123456789012345678901234567890 --manager "$MANAGER_URL"
|
||||
EOF
|
||||
chmod +x jiggablend-runner.sh
|
||||
sudo install -m 0755 jiggablend-runner.sh /usr/local/bin/jiggablend-runner
|
||||
rm -f jiggablend-runner.sh
|
||||
|
||||
echo "Cleaning up..."
|
||||
rm -f "$ASSET_NAME" checksums.txt jiggablend
|
||||
|
||||
echo "Installation complete!"
|
||||
echo "Binary: jiggablend"
|
||||
echo "Wrappers: jiggablend-manager, jiggablend-runner"
|
||||
echo "Run 'jiggablend-manager' to start the manager with test config."
|
||||
echo "Run 'jiggablend-runner [url]' to start the runner, e.g., jiggablend-runner http://your-manager:8080"
|
||||
echo "Note: Depending on whether you're running the manager or runner, additional dependencies like Blender, ImageMagick, or FFmpeg may be required. See the project README for details."
|
||||
@@ -40,6 +40,11 @@ type Runner struct {
|
||||
|
||||
fingerprint string
|
||||
fingerprintMu sync.RWMutex
|
||||
|
||||
// gpuLockedOut is set when logs indicate a GPU error (e.g. HIP "Illegal address");
|
||||
// when true, the runner forces CPU rendering for all subsequent jobs.
|
||||
gpuLockedOut bool
|
||||
gpuLockedOutMu sync.RWMutex
|
||||
}
|
||||
|
||||
// New creates a new runner.
|
||||
@@ -238,6 +243,8 @@ func (r *Runner) executeJob(job *api.NextJobResponse) (err error) {
|
||||
r.blender,
|
||||
r.encoder,
|
||||
r.processes,
|
||||
r.IsGPULockedOut(),
|
||||
func() { r.SetGPULockedOut(true) },
|
||||
)
|
||||
|
||||
ctx.Info(fmt.Sprintf("Task assignment received (job: %d, type: %s)",
|
||||
@@ -388,3 +395,21 @@ func (r *Runner) GetFingerprint() string {
|
||||
func (r *Runner) GetID() int64 {
|
||||
return r.id
|
||||
}
|
||||
|
||||
// SetGPULockedOut sets whether GPU use is locked out due to a detected GPU error.
|
||||
// When true, the runner will force CPU rendering for all jobs.
|
||||
func (r *Runner) SetGPULockedOut(locked bool) {
|
||||
r.gpuLockedOutMu.Lock()
|
||||
defer r.gpuLockedOutMu.Unlock()
|
||||
r.gpuLockedOut = locked
|
||||
if locked {
|
||||
log.Printf("GPU lockout enabled: GPU rendering disabled for subsequent jobs (CPU only)")
|
||||
}
|
||||
}
|
||||
|
||||
// IsGPULockedOut returns whether GPU use is currently locked out.
|
||||
func (r *Runner) IsGPULockedOut() bool {
|
||||
r.gpuLockedOutMu.RLock()
|
||||
defer r.gpuLockedOutMu.RUnlock()
|
||||
return r.gpuLockedOut
|
||||
}
|
||||
|
||||
@@ -38,12 +38,18 @@ type Context struct {
|
||||
Blender *blender.Manager
|
||||
Encoder *encoding.Selector
|
||||
Processes *executils.ProcessTracker
|
||||
|
||||
// GPULockedOut is set when the runner has detected a GPU error (e.g. HIP) and disables GPU for all jobs.
|
||||
GPULockedOut bool
|
||||
// OnGPUError is called when a GPU error line is seen in render logs; typically sets runner GPU lockout.
|
||||
OnGPUError func()
|
||||
}
|
||||
|
||||
// ErrJobCancelled indicates the manager-side job was cancelled during execution.
|
||||
var ErrJobCancelled = errors.New("job cancelled")
|
||||
|
||||
// NewContext creates a new task context. frameEnd should be >= frame; if 0 or less than frame, it is treated as single-frame (frameEnd = frame).
|
||||
// gpuLockedOut is the runner's current GPU lockout state; onGPUError is called when a GPU error is detected in logs (may be nil).
|
||||
func NewContext(
|
||||
taskID, jobID int64,
|
||||
jobName string,
|
||||
@@ -58,26 +64,30 @@ func NewContext(
|
||||
blenderMgr *blender.Manager,
|
||||
encoder *encoding.Selector,
|
||||
processes *executils.ProcessTracker,
|
||||
gpuLockedOut bool,
|
||||
onGPUError func(),
|
||||
) *Context {
|
||||
if frameEnd < frameStart {
|
||||
frameEnd = frameStart
|
||||
}
|
||||
return &Context{
|
||||
TaskID: taskID,
|
||||
JobID: jobID,
|
||||
JobName: jobName,
|
||||
Frame: frameStart,
|
||||
FrameEnd: frameEnd,
|
||||
TaskType: taskType,
|
||||
WorkDir: workDir,
|
||||
JobToken: jobToken,
|
||||
Metadata: metadata,
|
||||
Manager: manager,
|
||||
JobConn: jobConn,
|
||||
Workspace: ws,
|
||||
Blender: blenderMgr,
|
||||
Encoder: encoder,
|
||||
Processes: processes,
|
||||
TaskID: taskID,
|
||||
JobID: jobID,
|
||||
JobName: jobName,
|
||||
Frame: frameStart,
|
||||
FrameEnd: frameEnd,
|
||||
TaskType: taskType,
|
||||
WorkDir: workDir,
|
||||
JobToken: jobToken,
|
||||
Metadata: metadata,
|
||||
Manager: manager,
|
||||
JobConn: jobConn,
|
||||
Workspace: ws,
|
||||
Blender: blenderMgr,
|
||||
Encoder: encoder,
|
||||
Processes: processes,
|
||||
GPULockedOut: gpuLockedOut,
|
||||
OnGPUError: onGPUError,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,6 +168,22 @@ func (c *Context) ShouldEnableExecution() bool {
|
||||
return c.Metadata != nil && c.Metadata.EnableExecution != nil && *c.Metadata.EnableExecution
|
||||
}
|
||||
|
||||
// ShouldForceCPU returns true if GPU should be disabled and CPU rendering forced
|
||||
// (runner GPU lockout or metadata force_cpu in engine_settings).
|
||||
func (c *Context) ShouldForceCPU() bool {
|
||||
if c.GPULockedOut {
|
||||
return true
|
||||
}
|
||||
if c.Metadata != nil && c.Metadata.RenderSettings.EngineSettings != nil {
|
||||
if v, ok := c.Metadata.RenderSettings.EngineSettings["force_cpu"]; ok {
|
||||
if b, ok := v.(bool); ok && b {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsJobCancelled checks whether the manager marked this job as cancelled.
|
||||
func (c *Context) IsJobCancelled() (bool, error) {
|
||||
if c.Manager == nil {
|
||||
|
||||
@@ -25,6 +25,32 @@ func NewRenderProcessor() *RenderProcessor {
|
||||
return &RenderProcessor{}
|
||||
}
|
||||
|
||||
// gpuErrorSubstrings are log line substrings that indicate a GPU backend error (matched case-insensitively); any match triggers full GPU lockout.
|
||||
var gpuErrorSubstrings = []string{
|
||||
"illegal address in hip", // HIP (AMD) e.g. "Illegal address in HIP" or "Illegal address in hip"
|
||||
"hiperror", // hipError* codes
|
||||
"hip error",
|
||||
"cuda error",
|
||||
"cuerror",
|
||||
"optix error",
|
||||
"oneapi error",
|
||||
"opencl error",
|
||||
}
|
||||
|
||||
// checkGPUErrorLine checks a log line for GPU error indicators and triggers runner GPU lockout if found.
|
||||
func (p *RenderProcessor) checkGPUErrorLine(ctx *Context, line string) {
|
||||
lower := strings.ToLower(line)
|
||||
for _, sub := range gpuErrorSubstrings {
|
||||
if strings.Contains(lower, sub) {
|
||||
if ctx.OnGPUError != nil {
|
||||
ctx.OnGPUError()
|
||||
}
|
||||
ctx.Warn(fmt.Sprintf("GPU error detected in log (%q); GPU disabled for subsequent jobs", sub))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process executes a render task.
|
||||
func (p *RenderProcessor) Process(ctx *Context) error {
|
||||
if err := ctx.CheckCancelled(); err != nil {
|
||||
@@ -77,6 +103,10 @@ func (p *RenderProcessor) Process(ctx *Context) error {
|
||||
// We always render EXR (linear) for VFX accuracy; job output_format is the deliverable (EXR sequence or video).
|
||||
renderFormat := "EXR"
|
||||
|
||||
if ctx.ShouldForceCPU() {
|
||||
ctx.Info("GPU lockout active: using CPU rendering only")
|
||||
}
|
||||
|
||||
// Create render script
|
||||
if err := p.createRenderScript(ctx, renderFormat); err != nil {
|
||||
return err
|
||||
@@ -142,13 +172,22 @@ func (p *RenderProcessor) createRenderScript(ctx *Context, renderFormat string)
|
||||
return errors.New(errMsg)
|
||||
}
|
||||
|
||||
// Write render settings if available
|
||||
// Write render settings: merge job metadata with runner force_cpu (GPU lockout)
|
||||
var settingsMap map[string]interface{}
|
||||
if ctx.Metadata != nil && ctx.Metadata.RenderSettings.EngineSettings != nil {
|
||||
settingsJSON, err := json.Marshal(ctx.Metadata.RenderSettings)
|
||||
raw, err := json.Marshal(ctx.Metadata.RenderSettings)
|
||||
if err == nil {
|
||||
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
|
||||
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
|
||||
}
|
||||
_ = json.Unmarshal(raw, &settingsMap)
|
||||
}
|
||||
}
|
||||
if settingsMap == nil {
|
||||
settingsMap = make(map[string]interface{})
|
||||
}
|
||||
settingsMap["force_cpu"] = ctx.ShouldForceCPU()
|
||||
settingsJSON, err := json.Marshal(settingsMap)
|
||||
if err == nil {
|
||||
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
|
||||
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,7 +250,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
ctx.Processes.Track(ctx.TaskID, cmd)
|
||||
defer ctx.Processes.Untrack(ctx.TaskID)
|
||||
|
||||
// Stream stdout
|
||||
// Stream stdout and watch for GPU error lines (lock out all GPU on any backend error)
|
||||
stdoutDone := make(chan bool)
|
||||
go func() {
|
||||
defer close(stdoutDone)
|
||||
@@ -219,6 +258,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if line != "" {
|
||||
p.checkGPUErrorLine(ctx, line)
|
||||
shouldFilter, logLevel := blender.FilterLog(line)
|
||||
if !shouldFilter {
|
||||
ctx.Log(logLevel, line)
|
||||
@@ -227,7 +267,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
}
|
||||
}()
|
||||
|
||||
// Stream stderr
|
||||
// Stream stderr and watch for GPU error lines
|
||||
stderrDone := make(chan bool)
|
||||
go func() {
|
||||
defer close(stderrDone)
|
||||
@@ -235,6 +275,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if line != "" {
|
||||
p.checkGPUErrorLine(ctx, line)
|
||||
shouldFilter, logLevel := blender.FilterLog(line)
|
||||
if !shouldFilter {
|
||||
if logLevel == types.LogLevelInfo {
|
||||
|
||||
Reference in New Issue
Block a user