launch error display
This commit is contained in:
@@ -275,10 +275,28 @@ namespace UnifiedFarmLauncher.Services
|
||||
|
||||
if (process.ExitCode != 0 && !interactive)
|
||||
{
|
||||
throw new InvalidOperationException($"SSH script execution failed with exit code {process.ExitCode}: {error}");
|
||||
var errorText = error.ToString();
|
||||
var combinedOutput = output.ToString();
|
||||
if (!string.IsNullOrWhiteSpace(errorText))
|
||||
{
|
||||
combinedOutput = string.IsNullOrWhiteSpace(combinedOutput)
|
||||
? errorText
|
||||
: $"{combinedOutput}\n\n=== STDERR ===\n{errorText}";
|
||||
}
|
||||
throw new InvalidOperationException($"SSH script execution failed with exit code {process.ExitCode}: {combinedOutput}");
|
||||
}
|
||||
|
||||
return output.ToString();
|
||||
// Include stderr in output if present (for diagnostics)
|
||||
var result = output.ToString();
|
||||
var stderrText = error.ToString();
|
||||
if (!string.IsNullOrWhiteSpace(stderrText) && !interactive)
|
||||
{
|
||||
result = string.IsNullOrWhiteSpace(result)
|
||||
? stderrText
|
||||
: $"{result}\n\n=== STDERR ===\n{stderrText}";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public async Task<string> GetWorkerBasePathAsync(WorkerConfig worker)
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using UnifiedFarmLauncher.Models;
|
||||
|
||||
@@ -325,13 +328,356 @@ if ($shouldStart) {{
|
||||
'-PayloadBase64Path',""$payloadBase64Path""
|
||||
)
|
||||
|
||||
Start-Process -FilePath $psExe -ArgumentList $controllerArgs -WindowStyle Hidden | Out-Null
|
||||
Write-Host ""Worker $workerName started under controller."" -ForegroundColor Green
|
||||
Write-Host ""Attempting to start controller with: $psExe"" -ForegroundColor Cyan
|
||||
Write-Host ""Controller path: $controllerPath"" -ForegroundColor Cyan
|
||||
Write-Host ""Arguments: $($controllerArgs -join ' ')"" -ForegroundColor Cyan
|
||||
|
||||
try {{
|
||||
$controllerProcess = Start-Process -FilePath $psExe -ArgumentList $controllerArgs -WindowStyle Hidden -PassThru -ErrorAction Stop
|
||||
if ($controllerProcess) {{
|
||||
$controllerPid = $controllerProcess.Id
|
||||
Write-Host ""Controller process started with PID: $controllerPid"" -ForegroundColor Green
|
||||
|
||||
# Give it a moment to initialize
|
||||
Start-Sleep -Milliseconds 500
|
||||
|
||||
# Verify process is still running
|
||||
$stillRunning = Get-Process -Id $controllerPid -ErrorAction SilentlyContinue
|
||||
if (-not $stillRunning) {{
|
||||
throw ""Controller process (PID: $controllerPid) exited immediately after start""
|
||||
}}
|
||||
|
||||
# Update metadata with controller PID
|
||||
$updatedMeta = [pscustomobject]@{{
|
||||
WorkerName = $workerName
|
||||
WorkerType = $workerType
|
||||
Status = 'launching'
|
||||
ControllerPid = $controllerPid
|
||||
WorkerPid = $null
|
||||
Restarts = 0
|
||||
LastExitCode = $null
|
||||
LogPath = $logPath
|
||||
CommandPath = $commandPath
|
||||
PayloadPath = $payloadPath
|
||||
UpdatedAtUtc = (Get-Date).ToUniversalTime()
|
||||
}} | ConvertTo-Json -Depth 5
|
||||
$updatedMeta | Set-Content -Path $metaPath -Encoding UTF8
|
||||
|
||||
# Ensure file is flushed to disk before controller reads it
|
||||
Start-Sleep -Milliseconds 200
|
||||
|
||||
Write-Host ""Worker $workerName started under controller (PID: $controllerPid)."" -ForegroundColor Green
|
||||
}} else {{
|
||||
throw ""Start-Process returned null - controller failed to start""
|
||||
}}
|
||||
}} catch {{
|
||||
$errorMsg = $_.Exception.Message
|
||||
$errorDetails = $_.Exception.ToString()
|
||||
Write-Host ""Failed to start controller: $errorMsg"" -ForegroundColor Red
|
||||
Write-Host ""Error details: $errorDetails"" -ForegroundColor Red
|
||||
[Console]::Error.WriteLine(""Controller startup error: $errorMsg"")
|
||||
[Console]::Error.WriteLine(""Full error: $errorDetails"")
|
||||
|
||||
# Update metadata with error
|
||||
$errorMeta = [pscustomobject]@{{
|
||||
WorkerName = $workerName
|
||||
WorkerType = $workerType
|
||||
Status = 'error'
|
||||
ControllerPid = $null
|
||||
WorkerPid = $null
|
||||
Restarts = 0
|
||||
LastExitCode = 1
|
||||
LogPath = $logPath
|
||||
CommandPath = $commandPath
|
||||
PayloadPath = $payloadPath
|
||||
UpdatedAtUtc = (Get-Date).ToUniversalTime()
|
||||
ErrorMessage = ""Controller startup failed: $errorMsg""
|
||||
}} | ConvertTo-Json -Depth 5
|
||||
$errorMeta | Set-Content -Path $metaPath -Encoding UTF8
|
||||
throw
|
||||
}}
|
||||
}}
|
||||
";
|
||||
|
||||
// Pipe script through stdin to avoid command line length limits
|
||||
await _sshService.ExecuteRemoteScriptAsync(worker, ensureScript);
|
||||
string? ensureScriptOutput = null;
|
||||
try
|
||||
{
|
||||
ensureScriptOutput = await _sshService.ExecuteRemoteScriptAsync(worker, ensureScript);
|
||||
// Log any output from ensureScript for debugging
|
||||
if (!string.IsNullOrWhiteSpace(ensureScriptOutput))
|
||||
{
|
||||
System.Diagnostics.Debug.WriteLine($"EnsureScript output: {ensureScriptOutput}");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// If ensureScript fails, include that in diagnostics
|
||||
System.Diagnostics.Debug.WriteLine($"EnsureScript execution error: {ex.Message}");
|
||||
// Don't throw here - let the status check below handle it with full diagnostics
|
||||
}
|
||||
|
||||
// Wait a moment for the controller to start and update metadata
|
||||
// Give extra time for the metadata write to complete
|
||||
await Task.Delay(3000);
|
||||
|
||||
// Verify the worker actually started by checking metadata
|
||||
// Try up to 3 times with delays to account for slower startup
|
||||
WorkerStatus? status = null;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
status = await GetWorkerStatusAsync(worker, workerType);
|
||||
if (status != null && (status.Status == "running" || status.Status == "launching"))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (i < 2) // Don't delay on last attempt
|
||||
{
|
||||
await Task.Delay(1000);
|
||||
}
|
||||
}
|
||||
|
||||
if (status == null || (status.Status != "running" && status.Status != "launching"))
|
||||
{
|
||||
// Gather comprehensive diagnostic information
|
||||
var diagnostics = await GatherDiagnosticsAsync(worker, workerType);
|
||||
var errorMessage = status?.ErrorMessage ?? "Worker failed to start. Check logs for details.";
|
||||
var statusText = status?.Status ?? "unknown";
|
||||
|
||||
// Include ensureScript output if available
|
||||
var ensureOutputInfo = string.IsNullOrWhiteSpace(ensureScriptOutput)
|
||||
? ""
|
||||
: $"\n\n=== EnsureScript Output ===\n{ensureScriptOutput}";
|
||||
|
||||
throw new InvalidOperationException($"Worker did not start successfully. Status: {statusText}. {errorMessage}\n\n{diagnostics}{ensureOutputInfo}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<WorkerStatus?> GetWorkerStatusAsync(WorkerConfig worker, string workerType)
|
||||
{
|
||||
var remoteBasePath = await _sshService.GetWorkerBasePathAsync(worker);
|
||||
var instanceRoot = Path.Combine(remoteBasePath, workerType, worker.Name);
|
||||
var metaPath = Path.Combine(instanceRoot, "state", "worker-info.json");
|
||||
|
||||
var script = $@"
|
||||
$ProgressPreference = 'SilentlyContinue'
|
||||
$dataRoot = Join-Path ([Environment]::GetFolderPath('LocalApplicationData')) 'UnifiedWorkers'
|
||||
$instanceRoot = Join-Path (Join-Path $dataRoot '{workerType}') '{worker.Name}'
|
||||
$metaPath = Join-Path $instanceRoot 'state\worker-info.json'
|
||||
if (Test-Path $metaPath) {{
|
||||
try {{
|
||||
$meta = Get-Content $metaPath -Raw | ConvertFrom-Json
|
||||
$meta | ConvertTo-Json -Depth 5 -Compress
|
||||
}} catch {{
|
||||
Write-Error ""Error reading metadata: $($_.Exception.Message)""
|
||||
}}
|
||||
}} else {{
|
||||
Write-Error ""Metadata file not found at $metaPath""
|
||||
}}
|
||||
";
|
||||
|
||||
try
|
||||
{
|
||||
var output = await _sshService.ExecuteRemoteScriptAsync(worker, script);
|
||||
if (string.IsNullOrWhiteSpace(output))
|
||||
return null;
|
||||
|
||||
var lines = output.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
// Look for JSON line - it might be on a single line or multiple lines
|
||||
var jsonLines = lines.Where(line => line.Trim().StartsWith("{")).ToList();
|
||||
|
||||
string? jsonText = null;
|
||||
if (jsonLines.Count > 0)
|
||||
{
|
||||
// Use the first JSON line found
|
||||
jsonText = jsonLines[0].Trim();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Try to find JSON that might span multiple lines
|
||||
var fullText = string.Join("", lines);
|
||||
var startIdx = fullText.IndexOf('{');
|
||||
if (startIdx >= 0)
|
||||
{
|
||||
var endIdx = fullText.LastIndexOf('}');
|
||||
if (endIdx > startIdx)
|
||||
{
|
||||
jsonText = fullText.Substring(startIdx, endIdx - startIdx + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(jsonText))
|
||||
return null;
|
||||
|
||||
var options = new JsonSerializerOptions
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
};
|
||||
|
||||
return JsonSerializer.Deserialize<WorkerStatus>(jsonText, options);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Log the exception for debugging but return null
|
||||
System.Diagnostics.Debug.WriteLine($"Error parsing worker status: {ex.Message}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string> GatherDiagnosticsAsync(WorkerConfig worker, string workerType)
|
||||
{
|
||||
var diagnostics = new StringBuilder();
|
||||
|
||||
var script = $@"
|
||||
$ProgressPreference = 'SilentlyContinue'
|
||||
$ErrorActionPreference = 'Continue'
|
||||
$dataRoot = Join-Path ([Environment]::GetFolderPath('LocalApplicationData')) 'UnifiedWorkers'
|
||||
$instanceRoot = Join-Path (Join-Path $dataRoot '{workerType}') '{worker.Name}'
|
||||
$metaPath = Join-Path $instanceRoot 'state\worker-info.json'
|
||||
$logPath = Join-Path $instanceRoot 'logs\worker.log'
|
||||
$controllerPath = Join-Path $dataRoot 'controller.ps1'
|
||||
|
||||
Write-Host ""=== Diagnostics ==="" -ForegroundColor Cyan
|
||||
Write-Host ""Metadata file exists: $((Test-Path $metaPath))""
|
||||
Write-Host ""Log file exists: $((Test-Path $logPath))""
|
||||
Write-Host ""Controller script exists: $((Test-Path $controllerPath))""
|
||||
|
||||
# Check controller script content
|
||||
if (Test-Path $controllerPath) {{
|
||||
$controllerSize = (Get-Item $controllerPath).Length
|
||||
Write-Host ""Controller script size: $controllerSize bytes""
|
||||
$firstLine = Get-Content $controllerPath -First 1 -ErrorAction SilentlyContinue
|
||||
if ($firstLine) {{
|
||||
Write-Host ""Controller script first line: $firstLine""
|
||||
}}
|
||||
|
||||
# Try to test if controller script can be parsed/executed
|
||||
Write-Host ""`n=== Testing Controller Script ==="" -ForegroundColor Cyan
|
||||
try {{
|
||||
$testResult = powershell -NoLogo -NoProfile -ExecutionPolicy Bypass -Command ""& {{ Get-Command -Name '$controllerPath' -ErrorAction Stop }}"" 2>&1
|
||||
Write-Host ""Controller script syntax check: Passed""
|
||||
}} catch {{
|
||||
Write-Host ""Controller script syntax check failed: $($_.Exception.Message)"" -ForegroundColor Red
|
||||
}}
|
||||
|
||||
# Check if we can manually invoke the controller with test parameters
|
||||
$testParams = @('-WorkerName', 'TEST', '-WorkerType', 'sheepit', '-PayloadBase64Path', 'C:\temp\test.b64')
|
||||
Write-Host ""Attempting to test controller invocation...""
|
||||
try {{
|
||||
$testProcess = Start-Process -FilePath powershell -ArgumentList @('-NoLogo', '-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', ""$controllerPath"", '-WorkerName', 'TEST') -PassThru -WindowStyle Hidden -ErrorAction Stop
|
||||
Start-Sleep -Milliseconds 200
|
||||
if ($testProcess.HasExited) {{
|
||||
Write-Host ""Test controller process exited with code: $($testProcess.ExitCode)"" -ForegroundColor Yellow
|
||||
}} else {{
|
||||
Write-Host ""Test controller process started successfully (PID: $($testProcess.Id))"" -ForegroundColor Green
|
||||
Stop-Process -Id $testProcess.Id -Force -ErrorAction SilentlyContinue
|
||||
}}
|
||||
}} catch {{
|
||||
Write-Host ""Test controller invocation failed: $($_.Exception.Message)"" -ForegroundColor Red
|
||||
Write-Host ""Full error: $($_.Exception.ToString())"" -ForegroundColor Red
|
||||
}}
|
||||
}}
|
||||
|
||||
if (Test-Path $metaPath) {{
|
||||
try {{
|
||||
$meta = Get-Content $metaPath -Raw | ConvertFrom-Json
|
||||
Write-Host ""Metadata Status: $($meta.Status)""
|
||||
Write-Host ""Controller PID: $($meta.ControllerPid)""
|
||||
Write-Host ""Worker PID: $($meta.WorkerPid)""
|
||||
if ($meta.ErrorMessage) {{
|
||||
Write-Host ""Error Message: $($meta.ErrorMessage)"" -ForegroundColor Red
|
||||
}}
|
||||
}} catch {{
|
||||
Write-Host ""Error reading metadata: $($_.Exception.Message)"" -ForegroundColor Red
|
||||
Write-Host ""Metadata file content (first 500 chars):"" -ForegroundColor Yellow
|
||||
$metaContent = Get-Content $metaPath -Raw -ErrorAction SilentlyContinue
|
||||
if ($metaContent) {{
|
||||
Write-Host $metaContent.Substring(0, [Math]::Min(500, $metaContent.Length))
|
||||
}}
|
||||
}}
|
||||
}} else {{
|
||||
Write-Host ""Metadata file not found - controller may not have started"" -ForegroundColor Yellow
|
||||
}}
|
||||
|
||||
# Check if controller process is running
|
||||
$controllerRunning = $false
|
||||
$controllerProcessInfo = ""N/A""
|
||||
if (Test-Path $metaPath) {{
|
||||
try {{
|
||||
$meta = Get-Content $metaPath -Raw | ConvertFrom-Json
|
||||
if ($meta.ControllerPid) {{
|
||||
$proc = Get-Process -Id $meta.ControllerPid -ErrorAction SilentlyContinue
|
||||
$controllerRunning = ($null -ne $proc)
|
||||
if ($controllerRunning) {{
|
||||
$controllerProcessInfo = ""Running (PID: $($meta.ControllerPid))""
|
||||
}} else {{
|
||||
$controllerProcessInfo = ""Not running (PID: $($meta.ControllerPid) - process not found)""
|
||||
}}
|
||||
}} else {{
|
||||
$controllerProcessInfo = ""No controller PID in metadata""
|
||||
}}
|
||||
}} catch {{
|
||||
$controllerProcessInfo = ""Error checking: $($_.Exception.Message)""
|
||||
}}
|
||||
}} else {{
|
||||
$controllerProcessInfo = ""Metadata file not found""
|
||||
}}
|
||||
Write-Host ""Controller process: $controllerProcessInfo""
|
||||
|
||||
Write-Host ""`n=== Recent Log Entries (last 30 lines) ==="" -ForegroundColor Cyan
|
||||
if (Test-Path $logPath) {{
|
||||
try {{
|
||||
$logFileInfo = Get-Item $logPath -ErrorAction SilentlyContinue
|
||||
if ($logFileInfo) {{
|
||||
Write-Host ""Log file size: $($logFileInfo.Length) bytes"" -ForegroundColor Gray
|
||||
}}
|
||||
|
||||
$logContent = Get-Content $logPath -ErrorAction SilentlyContinue
|
||||
if ($logContent) {{
|
||||
$logLines = if ($logContent.Count -gt 30) {{ $logContent[-30..-1] }} else {{ $logContent }}
|
||||
Write-Host ""Found $($logLines.Count) log lines (showing last $([Math]::Min(30, $logLines.Count)))""
|
||||
$logLines | ForEach-Object {{ Write-Host $_ }}
|
||||
}} else {{
|
||||
Write-Host ""Log file exists but appears to be empty or unreadable"" -ForegroundColor Yellow
|
||||
Write-Host ""Attempting to read raw bytes..."" -ForegroundColor Gray
|
||||
try {{
|
||||
$rawBytes = [IO.File]::ReadAllBytes($logPath)
|
||||
Write-Host ""Log file contains $($rawBytes.Length) bytes""
|
||||
if ($rawBytes.Length -gt 0) {{
|
||||
$text = [System.Text.Encoding]::UTF8.GetString($rawBytes)
|
||||
Write-Host ""Log content (first 500 chars):"" -ForegroundColor Cyan
|
||||
Write-Host $text.Substring(0, [Math]::Min(500, $text.Length))
|
||||
}}
|
||||
}} catch {{
|
||||
Write-Host ""Could not read log file bytes: $($_.Exception.Message)"" -ForegroundColor Red
|
||||
}}
|
||||
}}
|
||||
}} catch {{
|
||||
Write-Host ""Error reading log: $($_.Exception.Message)"" -ForegroundColor Red
|
||||
Write-Host ""Stack trace: $($_.ScriptStackTrace)"" -ForegroundColor DarkRed
|
||||
}}
|
||||
}} else {{
|
||||
Write-Host ""Log file does not exist yet at: $logPath"" -ForegroundColor Yellow
|
||||
}}
|
||||
";
|
||||
|
||||
try
|
||||
{
|
||||
var output = await _sshService.ExecuteRemoteScriptAsync(worker, script);
|
||||
return output.Trim();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return $"Unable to gather diagnostics: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
private class WorkerStatus
|
||||
{
|
||||
public string? Status { get; set; }
|
||||
public int? WorkerPid { get; set; }
|
||||
public string? ErrorMessage { get; set; }
|
||||
}
|
||||
|
||||
public async Task StopWorkerAsync(WorkerConfig worker, string workerType)
|
||||
|
||||
Reference in New Issue
Block a user