Merge branch 'LogsFSHealthCheck' into 'master'
Introduce FsHealthCheck to run after a possibly-filesystem related error. See merge request sheepitrenderfarm/client!162
This commit is contained in:
@@ -168,11 +168,14 @@ import okhttp3.HttpUrl;
|
|||||||
}, this.configuration.getShutdownTime());
|
}, this.configuration.getShutdownTime());
|
||||||
}
|
}
|
||||||
|
|
||||||
//send "error" log containing config
|
//send "error" log containing config and fs health check
|
||||||
step = log.newCheckPoint();
|
step = log.newCheckPoint();
|
||||||
this.log.info("HWID: " + new HWIdentifier(log).getHardwareHash());
|
this.log.info("HWID: " + new HWIdentifier(log).getHardwareHash());
|
||||||
this.log.info("OS: " + OS.getOS().getVersion() + " " + System.getProperty("os.arch"));
|
this.log.info("OS: " + OS.getOS().getVersion() + " " + System.getProperty("os.arch"));
|
||||||
this.log.info(configuration.toString());
|
this.log.info(configuration.toString());
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
this.log.info(logline);
|
||||||
|
}
|
||||||
sendError(step, null, Type.OK);
|
sendError(step, null, Type.OK);
|
||||||
|
|
||||||
// Check integrity of all files in the working directories
|
// Check integrity of all files in the working directories
|
||||||
@@ -756,6 +759,9 @@ import okhttp3.HttpUrl;
|
|||||||
downloadRet = this.downloadExecutable(ajob);
|
downloadRet = this.downloadExecutable(ajob);
|
||||||
if (downloadRet != Error.Type.OK) {
|
if (downloadRet != Error.Type.OK) {
|
||||||
gui.setRenderingProjectName("");
|
gui.setRenderingProjectName("");
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
this.log.error("Client::work problem with downloadExecutable (ret " + downloadRet + ")");
|
this.log.error("Client::work problem with downloadExecutable (ret " + downloadRet + ")");
|
||||||
return downloadRet;
|
return downloadRet;
|
||||||
}
|
}
|
||||||
@@ -763,6 +769,9 @@ import okhttp3.HttpUrl;
|
|||||||
downloadRet = this.downloadSceneFile(ajob);
|
downloadRet = this.downloadSceneFile(ajob);
|
||||||
if (downloadRet != Error.Type.OK) {
|
if (downloadRet != Error.Type.OK) {
|
||||||
gui.setRenderingProjectName("");
|
gui.setRenderingProjectName("");
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
this.log.error("Client::work problem with downloadSceneFile (ret " + downloadRet + ")");
|
this.log.error("Client::work problem with downloadSceneFile (ret " + downloadRet + ")");
|
||||||
return downloadRet;
|
return downloadRet;
|
||||||
}
|
}
|
||||||
@@ -770,12 +779,18 @@ import okhttp3.HttpUrl;
|
|||||||
int ret = this.prepareWorkingDirectory(ajob); // decompress renderer and scene archives
|
int ret = this.prepareWorkingDirectory(ajob); // decompress renderer and scene archives
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
gui.setRenderingProjectName("");
|
gui.setRenderingProjectName("");
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
this.log.error("Client::work problem with this.prepareWorkingDirectory (ret " + ret + ")");
|
this.log.error("Client::work problem with this.prepareWorkingDirectory (ret " + ret + ")");
|
||||||
return Error.Type.CAN_NOT_CREATE_DIRECTORY;
|
return Error.Type.CAN_NOT_CREATE_DIRECTORY;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (FermeException e) {
|
catch (FermeException e) {
|
||||||
gui.setRenderingProjectName("");
|
gui.setRenderingProjectName("");
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
if (e instanceof FermeExceptionNoSpaceLeftOnDevice) {
|
if (e instanceof FermeExceptionNoSpaceLeftOnDevice) {
|
||||||
return Error.Type.NO_SPACE_LEFT_ON_DEVICE;
|
return Error.Type.NO_SPACE_LEFT_ON_DEVICE;
|
||||||
}
|
}
|
||||||
@@ -795,6 +810,9 @@ import okhttp3.HttpUrl;
|
|||||||
|
|
||||||
if (scene_file.exists() == false) {
|
if (scene_file.exists() == false) {
|
||||||
gui.setRenderingProjectName("");
|
gui.setRenderingProjectName("");
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
this.log.error("Client::work job preparation failed (scene file '" + scene_file.getAbsolutePath()
|
this.log.error("Client::work job preparation failed (scene file '" + scene_file.getAbsolutePath()
|
||||||
+ "' does not exist), cleaning directory in hope to recover");
|
+ "' does not exist), cleaning directory in hope to recover");
|
||||||
this.configuration.cleanWorkingDirectory();
|
this.configuration.cleanWorkingDirectory();
|
||||||
@@ -803,6 +821,9 @@ import okhttp3.HttpUrl;
|
|||||||
|
|
||||||
if (renderer_file.exists() == false) {
|
if (renderer_file.exists() == false) {
|
||||||
gui.setRenderingProjectName("");
|
gui.setRenderingProjectName("");
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
this.log.error("Client::work job preparation failed (renderer file '" + renderer_file.getAbsolutePath()
|
this.log.error("Client::work job preparation failed (renderer file '" + renderer_file.getAbsolutePath()
|
||||||
+ "' does not exist), cleaning directory in hope to recover");
|
+ "' does not exist), cleaning directory in hope to recover");
|
||||||
this.configuration.cleanWorkingDirectory();
|
this.configuration.cleanWorkingDirectory();
|
||||||
|
|||||||
@@ -26,10 +26,12 @@ import java.io.InputStream;
|
|||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.ListIterator;
|
||||||
|
|
||||||
import com.sheepit.client.hardware.cpu.CPU;
|
import com.sheepit.client.hardware.cpu.CPU;
|
||||||
import com.sheepit.client.hardware.gpu.GPUDevice;
|
import com.sheepit.client.hardware.gpu.GPUDevice;
|
||||||
@@ -330,6 +332,65 @@ import lombok.Data;
|
|||||||
return files_local;
|
return files_local;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs through all SheepIt related directories and checks if files and folders are all readable, writeable
|
||||||
|
* and in case of directories, checks if the contents can be listed and if usable space is enough.
|
||||||
|
* Only logs instances where something was detected, otherwise is it will only print "FilesystemHealthCheck started"
|
||||||
|
* @return an ArrayList of Strings containing all of the logs of the FSHealth check
|
||||||
|
*/
|
||||||
|
public ArrayList<String> filesystemHealthCheck() {
|
||||||
|
ArrayList<String> logs = new ArrayList<>();
|
||||||
|
String f = "FSHealth: ";
|
||||||
|
logs.add(f + "FilesystemHealthCheck started");
|
||||||
|
ArrayList<File> dirsToCheck = new ArrayList<>();
|
||||||
|
ArrayList<File> dirsChecked = new ArrayList<>();
|
||||||
|
dirsToCheck.add(workingDirectory.getAbsoluteFile());
|
||||||
|
if (sharedDownloadsDirectory != null && dirsToCheck.contains(sharedDownloadsDirectory.getAbsoluteFile()) == false) {
|
||||||
|
dirsToCheck.add(sharedDownloadsDirectory.getAbsoluteFile());
|
||||||
|
}
|
||||||
|
if (storageDirectory != null && dirsToCheck.contains(storageDirectory.getAbsoluteFile()) == false) {
|
||||||
|
dirsToCheck.add(storageDirectory.getAbsoluteFile());
|
||||||
|
}
|
||||||
|
ListIterator<File> dirs = dirsToCheck.listIterator();
|
||||||
|
while (dirs.hasNext()) {
|
||||||
|
File dir = dirs.next();
|
||||||
|
dirs.remove();
|
||||||
|
dirsChecked.add(dir);
|
||||||
|
File[] fileList = dir.listFiles();
|
||||||
|
if (fileList == null) {
|
||||||
|
logs.add(f + "File list of " + dir + " is null");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for (File file : fileList) {
|
||||||
|
file = file.getAbsoluteFile();
|
||||||
|
//logs.add(f + file);
|
||||||
|
boolean canRead = file.canRead();
|
||||||
|
boolean canWrite = file.canWrite();
|
||||||
|
boolean isDir = file.isDirectory();
|
||||||
|
if (canRead == false) {
|
||||||
|
logs.add(f + "Can't read from " + file);
|
||||||
|
}
|
||||||
|
if (canWrite == false) {
|
||||||
|
logs.add(f + "Can't write to " + file);
|
||||||
|
}
|
||||||
|
if (canRead && canWrite && isDir) {
|
||||||
|
if (dirsChecked.contains(file)) {
|
||||||
|
logs.add(f + "Dir " + file + " already checked. Loop detected");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dirs.add(file);
|
||||||
|
}
|
||||||
|
long usableSpace = file.getUsableSpace();
|
||||||
|
if (usableSpace < 512 * 1024) {
|
||||||
|
logs.add(f + "Usable space is " + usableSpace + " for " + file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return logs;
|
||||||
|
}
|
||||||
|
|
||||||
private static String getJarVersion() {
|
private static String getJarVersion() {
|
||||||
String versionPath = "/VERSION";
|
String versionPath = "/VERSION";
|
||||||
String version = "6.0.0";
|
String version = "6.0.0";
|
||||||
|
|||||||
@@ -247,6 +247,9 @@ import java.util.regex.Pattern;
|
|||||||
catch (IOException e) {
|
catch (IOException e) {
|
||||||
StringWriter sw = new StringWriter();
|
StringWriter sw = new StringWriter();
|
||||||
e.printStackTrace(new PrintWriter(sw));
|
e.printStackTrace(new PrintWriter(sw));
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
log.error("Job::render exception on script generation, will return UNKNOWN " + e + " stacktrace " + sw.toString());
|
log.error("Job::render exception on script generation, will return UNKNOWN " + e + " stacktrace " + sw.toString());
|
||||||
return Error.Type.UNKNOWN;
|
return Error.Type.UNKNOWN;
|
||||||
}
|
}
|
||||||
@@ -409,6 +412,9 @@ import java.util.regex.Pattern;
|
|||||||
|
|
||||||
// Put back base icon
|
// Put back base icon
|
||||||
gui.updateTrayIcon(Job.SHOW_BASE_ICON);
|
gui.updateTrayIcon(Job.SHOW_BASE_ICON);
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
@@ -458,6 +464,9 @@ import java.util.regex.Pattern;
|
|||||||
}
|
}
|
||||||
StringWriter sw = new StringWriter();
|
StringWriter sw = new StringWriter();
|
||||||
err.printStackTrace(new PrintWriter(sw));
|
err.printStackTrace(new PrintWriter(sw));
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
log.error("Job::render exception(A) " + err + " stacktrace " + sw.toString());
|
log.error("Job::render exception(A) " + err + " stacktrace " + sw.toString());
|
||||||
return Error.Type.FAILED_TO_EXECUTE;
|
return Error.Type.FAILED_TO_EXECUTE;
|
||||||
}
|
}
|
||||||
@@ -505,6 +514,9 @@ import java.util.regex.Pattern;
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (files.length == 0) {
|
if (files.length == 0) {
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
log.error("Job::render no picture file found (after finished render (filename_without_extension " + filename_without_extension + ")");
|
log.error("Job::render no picture file found (after finished render (filename_without_extension " + filename_without_extension + ")");
|
||||||
|
|
||||||
String basename = "";
|
String basename = "";
|
||||||
@@ -516,12 +528,18 @@ import java.util.regex.Pattern;
|
|||||||
}
|
}
|
||||||
File crash_file = new File(configuration.getWorkingDirectory() + File.separator + basename + ".crash.txt");
|
File crash_file = new File(configuration.getWorkingDirectory() + File.separator + basename + ".crash.txt");
|
||||||
if (crash_file.exists()) {
|
if (crash_file.exists()) {
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
log.error("Job::render crash file found => the renderer crashed");
|
log.error("Job::render crash file found => the renderer crashed");
|
||||||
crash_file.delete();
|
crash_file.delete();
|
||||||
return Error.Type.RENDERER_CRASHED;
|
return Error.Type.RENDERER_CRASHED;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (exit_value == 127 && process.getDuration() < 10) {
|
if (exit_value == 127 && process.getDuration() < 10) {
|
||||||
|
for (String logline : configuration.filesystemHealthCheck()) {
|
||||||
|
log.debug(logline);
|
||||||
|
}
|
||||||
log.error("Job::render renderer returned 127 and took " + process.getDuration() + "s, some libraries may be missing");
|
log.error("Job::render renderer returned 127 and took " + process.getDuration() + "s, some libraries may be missing");
|
||||||
return Error.Type.RENDERER_MISSING_LIBRARIES;
|
return Error.Type.RENDERER_MISSING_LIBRARIES;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user