Introduce FSHealthCheck to run after a possibly-filesystem related error.
This commit is contained in:
committed by
Sheepit Renderfarm
parent
68ad00334e
commit
9eed45ef6e
@@ -168,11 +168,14 @@ import okhttp3.HttpUrl;
|
||||
}, this.configuration.getShutdownTime());
|
||||
}
|
||||
|
||||
//send "error" log containing config
|
||||
//send "error" log containing config and fs health check
|
||||
step = log.newCheckPoint();
|
||||
this.log.info("HWID: " + new HWIdentifier(log).getHardwareHash());
|
||||
this.log.info("OS: " + OS.getOS().getVersion() + " " + System.getProperty("os.arch"));
|
||||
this.log.info(configuration.toString());
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
this.log.info(logline);
|
||||
}
|
||||
sendError(step, null, Type.OK);
|
||||
|
||||
// Check integrity of all files in the working directories
|
||||
@@ -756,6 +759,9 @@ import okhttp3.HttpUrl;
|
||||
downloadRet = this.downloadExecutable(ajob);
|
||||
if (downloadRet != Error.Type.OK) {
|
||||
gui.setRenderingProjectName("");
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
this.log.error("Client::work problem with downloadExecutable (ret " + downloadRet + ")");
|
||||
return downloadRet;
|
||||
}
|
||||
@@ -763,6 +769,9 @@ import okhttp3.HttpUrl;
|
||||
downloadRet = this.downloadSceneFile(ajob);
|
||||
if (downloadRet != Error.Type.OK) {
|
||||
gui.setRenderingProjectName("");
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
this.log.error("Client::work problem with downloadSceneFile (ret " + downloadRet + ")");
|
||||
return downloadRet;
|
||||
}
|
||||
@@ -770,12 +779,18 @@ import okhttp3.HttpUrl;
|
||||
int ret = this.prepareWorkingDirectory(ajob); // decompress renderer and scene archives
|
||||
if (ret != 0) {
|
||||
gui.setRenderingProjectName("");
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
this.log.error("Client::work problem with this.prepareWorkingDirectory (ret " + ret + ")");
|
||||
return Error.Type.CAN_NOT_CREATE_DIRECTORY;
|
||||
}
|
||||
}
|
||||
catch (FermeException e) {
|
||||
gui.setRenderingProjectName("");
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
if (e instanceof FermeExceptionNoSpaceLeftOnDevice) {
|
||||
return Error.Type.NO_SPACE_LEFT_ON_DEVICE;
|
||||
}
|
||||
@@ -795,6 +810,9 @@ import okhttp3.HttpUrl;
|
||||
|
||||
if (scene_file.exists() == false) {
|
||||
gui.setRenderingProjectName("");
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
this.log.error("Client::work job preparation failed (scene file '" + scene_file.getAbsolutePath()
|
||||
+ "' does not exist), cleaning directory in hope to recover");
|
||||
this.configuration.cleanWorkingDirectory();
|
||||
@@ -803,6 +821,9 @@ import okhttp3.HttpUrl;
|
||||
|
||||
if (renderer_file.exists() == false) {
|
||||
gui.setRenderingProjectName("");
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
this.log.error("Client::work job preparation failed (renderer file '" + renderer_file.getAbsolutePath()
|
||||
+ "' does not exist), cleaning directory in hope to recover");
|
||||
this.configuration.cleanWorkingDirectory();
|
||||
|
||||
@@ -26,10 +26,12 @@ import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Calendar;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
|
||||
import com.sheepit.client.hardware.cpu.CPU;
|
||||
import com.sheepit.client.hardware.gpu.GPUDevice;
|
||||
@@ -330,6 +332,65 @@ import lombok.Data;
|
||||
return files_local;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs through all SheepIt related directories and checks if files and folders are all readable, writeable
|
||||
* and in case of directories, checks if the contents can be listed and if usable space is enough.
|
||||
* Only logs instances where something was detected, otherwise is it will only print "FilesystemHealthCheck started"
|
||||
* @return an ArrayList of Strings containing all of the logs of the FSHealth check
|
||||
*/
|
||||
public ArrayList<String> filesystemHealthCheck() {
|
||||
ArrayList<String> logs = new ArrayList<>();
|
||||
String f = "FSHealth: ";
|
||||
logs.add(f + "FilesystemHealthCheck started");
|
||||
ArrayList<File> dirsToCheck = new ArrayList<>();
|
||||
ArrayList<File> dirsChecked = new ArrayList<>();
|
||||
dirsToCheck.add(workingDirectory.getAbsoluteFile());
|
||||
if (sharedDownloadsDirectory != null && dirsToCheck.contains(sharedDownloadsDirectory.getAbsoluteFile()) == false) {
|
||||
dirsToCheck.add(sharedDownloadsDirectory.getAbsoluteFile());
|
||||
}
|
||||
if (storageDirectory != null && dirsToCheck.contains(storageDirectory.getAbsoluteFile()) == false) {
|
||||
dirsToCheck.add(storageDirectory.getAbsoluteFile());
|
||||
}
|
||||
ListIterator<File> dirs = dirsToCheck.listIterator();
|
||||
while (dirs.hasNext()) {
|
||||
File dir = dirs.next();
|
||||
dirs.remove();
|
||||
dirsChecked.add(dir);
|
||||
File[] fileList = dir.listFiles();
|
||||
if (fileList == null) {
|
||||
logs.add(f + "File list of " + dir + " is null");
|
||||
}
|
||||
else {
|
||||
for (File file : fileList) {
|
||||
file = file.getAbsoluteFile();
|
||||
//logs.add(f + file);
|
||||
boolean canRead = file.canRead();
|
||||
boolean canWrite = file.canWrite();
|
||||
boolean isDir = file.isDirectory();
|
||||
if (canRead == false) {
|
||||
logs.add(f + "Can't read from " + file);
|
||||
}
|
||||
if (canWrite == false) {
|
||||
logs.add(f + "Can't write to " + file);
|
||||
}
|
||||
if (canRead && canWrite && isDir) {
|
||||
if (dirsChecked.contains(file)) {
|
||||
logs.add(f + "Dir " + file + " already checked. Loop detected");
|
||||
}
|
||||
else {
|
||||
dirs.add(file);
|
||||
}
|
||||
long usableSpace = file.getUsableSpace();
|
||||
if (usableSpace < 512 * 1024) {
|
||||
logs.add(f + "Usable space is " + usableSpace + " for " + file);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return logs;
|
||||
}
|
||||
|
||||
private static String getJarVersion() {
|
||||
String versionPath = "/VERSION";
|
||||
String version = "6.0.0";
|
||||
|
||||
@@ -247,6 +247,9 @@ import java.util.regex.Pattern;
|
||||
catch (IOException e) {
|
||||
StringWriter sw = new StringWriter();
|
||||
e.printStackTrace(new PrintWriter(sw));
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
log.error("Job::render exception on script generation, will return UNKNOWN " + e + " stacktrace " + sw.toString());
|
||||
return Error.Type.UNKNOWN;
|
||||
}
|
||||
@@ -409,6 +412,9 @@ import java.util.regex.Pattern;
|
||||
|
||||
// Put back base icon
|
||||
gui.updateTrayIcon(Job.SHOW_BASE_ICON);
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -458,6 +464,9 @@ import java.util.regex.Pattern;
|
||||
}
|
||||
StringWriter sw = new StringWriter();
|
||||
err.printStackTrace(new PrintWriter(sw));
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
log.error("Job::render exception(A) " + err + " stacktrace " + sw.toString());
|
||||
return Error.Type.FAILED_TO_EXECUTE;
|
||||
}
|
||||
@@ -505,6 +514,9 @@ import java.util.regex.Pattern;
|
||||
}
|
||||
|
||||
if (files.length == 0) {
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
log.error("Job::render no picture file found (after finished render (filename_without_extension " + filename_without_extension + ")");
|
||||
|
||||
String basename = "";
|
||||
@@ -516,12 +528,18 @@ import java.util.regex.Pattern;
|
||||
}
|
||||
File crash_file = new File(configuration.getWorkingDirectory() + File.separator + basename + ".crash.txt");
|
||||
if (crash_file.exists()) {
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
log.error("Job::render crash file found => the renderer crashed");
|
||||
crash_file.delete();
|
||||
return Error.Type.RENDERER_CRASHED;
|
||||
}
|
||||
|
||||
if (exit_value == 127 && process.getDuration() < 10) {
|
||||
for (String logline : configuration.filesystemHealthCheck()) {
|
||||
log.debug(logline);
|
||||
}
|
||||
log.error("Job::render renderer returned 127 and took " + process.getDuration() + "s, some libraries may be missing");
|
||||
return Error.Type.RENDERER_MISSING_LIBRARIES;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user