massive changes and it works

This commit is contained in:
2025-11-23 10:58:24 -06:00
parent 30aa969433
commit 2a0ff98834
3499 changed files with 7770 additions and 634687 deletions

View File

@@ -1,13 +1,19 @@
package main
import (
"crypto/rand"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"os/signal"
"strings"
"syscall"
"time"
"fuego/internal/runner"
"jiggablend/internal/runner"
)
type SecretsFile struct {
@@ -18,19 +24,16 @@ type SecretsFile struct {
func main() {
var (
managerURL = flag.String("manager", getEnv("MANAGER_URL", "http://localhost:8080"), "Manager URL")
name = flag.String("name", getEnv("RUNNER_NAME", ""), "Runner name")
hostname = flag.String("hostname", getEnv("RUNNER_HOSTNAME", ""), "Runner hostname")
ipAddress = flag.String("ip", getEnv("RUNNER_IP", ""), "Runner IP address")
token = flag.String("token", getEnv("REGISTRATION_TOKEN", ""), "Registration token")
secretsFile = flag.String("secrets-file", getEnv("SECRETS_FILE", ""), "Path to secrets file for persistent storage")
managerURL = flag.String("manager", getEnv("MANAGER_URL", "http://localhost:8080"), "Manager URL")
name = flag.String("name", getEnv("RUNNER_NAME", ""), "Runner name")
hostname = flag.String("hostname", getEnv("RUNNER_HOSTNAME", ""), "Runner hostname")
ipAddress = flag.String("ip", getEnv("RUNNER_IP", ""), "Runner IP address")
token = flag.String("token", getEnv("REGISTRATION_TOKEN", ""), "Registration token")
secretsFile = flag.String("secrets-file", getEnv("SECRETS_FILE", ""), "Path to secrets file for persistent storage (default: ./runner-secrets.json, or ./runner-secrets-{id}.json if multiple runners)")
runnerIDSuffix = flag.String("runner-id", getEnv("RUNNER_ID", ""), "Unique runner ID suffix (auto-generated if not provided)")
)
flag.Parse()
if *name == "" {
hostname, _ := os.Hostname()
*name = fmt.Sprintf("runner-%s", hostname)
}
if *hostname == "" {
*hostname, _ = os.Hostname()
}
@@ -38,8 +41,54 @@ func main() {
*ipAddress = "127.0.0.1"
}
// Generate or use provided runner ID suffix
runnerIDStr := *runnerIDSuffix
if runnerIDStr == "" {
runnerIDStr = generateShortID()
}
// Generate runner name with ID if not provided
if *name == "" {
*name = fmt.Sprintf("runner-%s-%s", *hostname, runnerIDStr)
} else {
// Append ID to provided name to ensure uniqueness
*name = fmt.Sprintf("%s-%s", *name, runnerIDStr)
}
// Set default secrets file if not provided - always use current directory
if *secretsFile == "" {
if *runnerIDSuffix != "" || getEnv("RUNNER_ID", "") != "" {
// Multiple runners - use local file with ID
*secretsFile = fmt.Sprintf("./runner-secrets-%s.json", runnerIDStr)
} else {
// Single runner - use local file
*secretsFile = "./runner-secrets.json"
}
}
client := runner.NewClient(*managerURL, *name, *hostname, *ipAddress)
// Probe capabilities once at startup (before any registration attempts)
log.Printf("Probing runner capabilities...")
client.ProbeCapabilities()
capabilities := client.GetCapabilities()
capList := []string{}
for cap, value := range capabilities {
// Only show boolean true capabilities and numeric GPU counts
if enabled, ok := value.(bool); ok && enabled {
capList = append(capList, cap)
} else if count, ok := value.(int); ok && count > 0 {
capList = append(capList, fmt.Sprintf("%s=%d", cap, count))
} else if count, ok := value.(float64); ok && count > 0 {
capList = append(capList, fmt.Sprintf("%s=%.0f", cap, count))
}
}
if len(capList) > 0 {
log.Printf("Detected capabilities: %s", strings.Join(capList, ", "))
} else {
log.Printf("Warning: No capabilities detected")
}
// Try to load secrets from file
var runnerID int64
var runnerSecret, managerSecret string
@@ -53,30 +102,55 @@ func main() {
}
}
// If no secrets loaded, register with token
// If no secrets loaded, register with token (with retry logic)
if runnerID == 0 {
if *token == "" {
log.Fatalf("Registration token required (use --token or set REGISTRATION_TOKEN env var)")
}
var err error
runnerID, runnerSecret, managerSecret, err = client.Register(*token)
if err != nil {
log.Fatalf("Failed to register runner: %v", err)
}
log.Printf("Registered runner with ID: %d", runnerID)
// Retry registration with exponential backoff
backoff := 1 * time.Second
maxBackoff := 30 * time.Second
maxRetries := 10
retryCount := 0
// Save secrets to file if specified
if *secretsFile != "" {
secrets := SecretsFile{
RunnerID: runnerID,
RunnerSecret: runnerSecret,
ManagerSecret: managerSecret,
for {
var err error
runnerID, runnerSecret, managerSecret, err = client.Register(*token)
if err == nil {
log.Printf("Registered runner with ID: %d", runnerID)
// Always save secrets to file (secretsFile is now always set to a default if not provided)
secrets := SecretsFile{
RunnerID: runnerID,
RunnerSecret: runnerSecret,
ManagerSecret: managerSecret,
}
if err := saveSecrets(*secretsFile, secrets); err != nil {
log.Printf("Warning: Failed to save secrets to %s: %v", *secretsFile, err)
} else {
log.Printf("Saved secrets to %s", *secretsFile)
}
break
}
if err := saveSecrets(*secretsFile, secrets); err != nil {
log.Printf("Warning: Failed to save secrets: %v", err)
} else {
log.Printf("Saved secrets to %s", *secretsFile)
// Check if it's a token error (invalid/expired/used token) - shutdown immediately
errMsg := err.Error()
if strings.Contains(errMsg, "token error:") {
log.Fatalf("Registration failed (token error): %v", err)
}
// Only retry on connection errors or other retryable errors
retryCount++
if retryCount >= maxRetries {
log.Fatalf("Failed to register runner after %d attempts: %v", maxRetries, err)
}
log.Printf("Registration failed (attempt %d/%d): %v, retrying in %v", retryCount, maxRetries, err, backoff)
time.Sleep(backoff)
backoff *= 2
if backoff > maxBackoff {
backoff = maxBackoff
}
}
}
@@ -90,7 +164,18 @@ func main() {
// ProcessTasks is now handled via WebSocket, but kept for HTTP fallback
// WebSocket will handle task assignment automatically
log.Printf("Runner started, connecting to manager via WebSocket...")
// Set up signal handlers to kill processes on shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
go func() {
sig := <-sigChan
log.Printf("Received signal: %v, killing all processes and shutting down...", sig)
client.KillAllProcesses()
os.Exit(0)
}()
// Block forever
select {}
}
@@ -125,3 +210,12 @@ func getEnv(key, defaultValue string) string {
return defaultValue
}
// generateShortID generates a short random ID (8 hex characters)
func generateShortID() string {
bytes := make([]byte, 4)
if _, err := rand.Read(bytes); err != nil {
// Fallback to timestamp-based ID if crypto/rand fails
return fmt.Sprintf("%x", os.Getpid()^int(time.Now().Unix()))
}
return hex.EncodeToString(bytes)
}