Merge branch 'LogsFSHealthCheck' into 'master'

Introduce FsHealthCheck to run after a possibly-filesystem related error.

See merge request sheepitrenderfarm/client!162
This commit is contained in:
Sheepit Renderfarm
2023-01-06 14:43:15 +00:00
3 changed files with 101 additions and 1 deletions

View File

@@ -168,11 +168,14 @@ import okhttp3.HttpUrl;
}, this.configuration.getShutdownTime());
}
//send "error" log containing config
//send "error" log containing config and fs health check
step = log.newCheckPoint();
this.log.info("HWID: " + new HWIdentifier(log).getHardwareHash());
this.log.info("OS: " + OS.getOS().getVersion() + " " + System.getProperty("os.arch"));
this.log.info(configuration.toString());
for (String logline : configuration.filesystemHealthCheck()) {
this.log.info(logline);
}
sendError(step, null, Type.OK);
// Check integrity of all files in the working directories
@@ -756,6 +759,9 @@ import okhttp3.HttpUrl;
downloadRet = this.downloadExecutable(ajob);
if (downloadRet != Error.Type.OK) {
gui.setRenderingProjectName("");
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
this.log.error("Client::work problem with downloadExecutable (ret " + downloadRet + ")");
return downloadRet;
}
@@ -763,6 +769,9 @@ import okhttp3.HttpUrl;
downloadRet = this.downloadSceneFile(ajob);
if (downloadRet != Error.Type.OK) {
gui.setRenderingProjectName("");
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
this.log.error("Client::work problem with downloadSceneFile (ret " + downloadRet + ")");
return downloadRet;
}
@@ -770,12 +779,18 @@ import okhttp3.HttpUrl;
int ret = this.prepareWorkingDirectory(ajob); // decompress renderer and scene archives
if (ret != 0) {
gui.setRenderingProjectName("");
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
this.log.error("Client::work problem with this.prepareWorkingDirectory (ret " + ret + ")");
return Error.Type.CAN_NOT_CREATE_DIRECTORY;
}
}
catch (FermeException e) {
gui.setRenderingProjectName("");
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
if (e instanceof FermeExceptionNoSpaceLeftOnDevice) {
return Error.Type.NO_SPACE_LEFT_ON_DEVICE;
}
@@ -795,6 +810,9 @@ import okhttp3.HttpUrl;
if (scene_file.exists() == false) {
gui.setRenderingProjectName("");
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
this.log.error("Client::work job preparation failed (scene file '" + scene_file.getAbsolutePath()
+ "' does not exist), cleaning directory in hope to recover");
this.configuration.cleanWorkingDirectory();
@@ -803,6 +821,9 @@ import okhttp3.HttpUrl;
if (renderer_file.exists() == false) {
gui.setRenderingProjectName("");
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
this.log.error("Client::work job preparation failed (renderer file '" + renderer_file.getAbsolutePath()
+ "' does not exist), cleaning directory in hope to recover");
this.configuration.cleanWorkingDirectory();

View File

@@ -26,10 +26,12 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import com.sheepit.client.hardware.cpu.CPU;
import com.sheepit.client.hardware.gpu.GPUDevice;
@@ -330,6 +332,65 @@ import lombok.Data;
return files_local;
}
/**
* Runs through all SheepIt related directories and checks if files and folders are all readable, writeable
* and in case of directories, checks if the contents can be listed and if usable space is enough.
* Only logs instances where something was detected, otherwise is it will only print "FilesystemHealthCheck started"
* @return an ArrayList of Strings containing all of the logs of the FSHealth check
*/
public ArrayList<String> filesystemHealthCheck() {
ArrayList<String> logs = new ArrayList<>();
String f = "FSHealth: ";
logs.add(f + "FilesystemHealthCheck started");
ArrayList<File> dirsToCheck = new ArrayList<>();
ArrayList<File> dirsChecked = new ArrayList<>();
dirsToCheck.add(workingDirectory.getAbsoluteFile());
if (sharedDownloadsDirectory != null && dirsToCheck.contains(sharedDownloadsDirectory.getAbsoluteFile()) == false) {
dirsToCheck.add(sharedDownloadsDirectory.getAbsoluteFile());
}
if (storageDirectory != null && dirsToCheck.contains(storageDirectory.getAbsoluteFile()) == false) {
dirsToCheck.add(storageDirectory.getAbsoluteFile());
}
ListIterator<File> dirs = dirsToCheck.listIterator();
while (dirs.hasNext()) {
File dir = dirs.next();
dirs.remove();
dirsChecked.add(dir);
File[] fileList = dir.listFiles();
if (fileList == null) {
logs.add(f + "File list of " + dir + " is null");
}
else {
for (File file : fileList) {
file = file.getAbsoluteFile();
//logs.add(f + file);
boolean canRead = file.canRead();
boolean canWrite = file.canWrite();
boolean isDir = file.isDirectory();
if (canRead == false) {
logs.add(f + "Can't read from " + file);
}
if (canWrite == false) {
logs.add(f + "Can't write to " + file);
}
if (canRead && canWrite && isDir) {
if (dirsChecked.contains(file)) {
logs.add(f + "Dir " + file + " already checked. Loop detected");
}
else {
dirs.add(file);
}
long usableSpace = file.getUsableSpace();
if (usableSpace < 512 * 1024) {
logs.add(f + "Usable space is " + usableSpace + " for " + file);
}
}
}
}
}
return logs;
}
private static String getJarVersion() {
String versionPath = "/VERSION";
String version = "6.0.0";

View File

@@ -247,6 +247,9 @@ import java.util.regex.Pattern;
catch (IOException e) {
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
log.error("Job::render exception on script generation, will return UNKNOWN " + e + " stacktrace " + sw.toString());
return Error.Type.UNKNOWN;
}
@@ -409,6 +412,9 @@ import java.util.regex.Pattern;
// Put back base icon
gui.updateTrayIcon(Job.SHOW_BASE_ICON);
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
return error;
}
@@ -458,6 +464,9 @@ import java.util.regex.Pattern;
}
StringWriter sw = new StringWriter();
err.printStackTrace(new PrintWriter(sw));
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
log.error("Job::render exception(A) " + err + " stacktrace " + sw.toString());
return Error.Type.FAILED_TO_EXECUTE;
}
@@ -505,6 +514,9 @@ import java.util.regex.Pattern;
}
if (files.length == 0) {
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
log.error("Job::render no picture file found (after finished render (filename_without_extension " + filename_without_extension + ")");
String basename = "";
@@ -516,12 +528,18 @@ import java.util.regex.Pattern;
}
File crash_file = new File(configuration.getWorkingDirectory() + File.separator + basename + ".crash.txt");
if (crash_file.exists()) {
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
log.error("Job::render crash file found => the renderer crashed");
crash_file.delete();
return Error.Type.RENDERER_CRASHED;
}
if (exit_value == 127 && process.getDuration() < 10) {
for (String logline : configuration.filesystemHealthCheck()) {
log.debug(logline);
}
log.error("Job::render renderer returned 127 and took " + process.getDuration() + "s, some libraries may be missing");
return Error.Type.RENDERER_MISSING_LIBRARIES;
}