From 9f825b8d0f660b09bbbd1d5efd6780a7dd10c997 Mon Sep 17 00:00:00 2001 From: M*C*O Date: Sat, 11 Nov 2023 08:50:11 +0000 Subject: [PATCH] Extract zips while reading the chunks, instead reading them to memory first --- .../com/sheepit/client/ChunkInputStream.java | 53 +++++++++++++++++++ src/main/java/com/sheepit/client/Utils.java | 14 ++--- 2 files changed, 57 insertions(+), 10 deletions(-) create mode 100644 src/main/java/com/sheepit/client/ChunkInputStream.java diff --git a/src/main/java/com/sheepit/client/ChunkInputStream.java b/src/main/java/com/sheepit/client/ChunkInputStream.java new file mode 100644 index 0000000..33105c7 --- /dev/null +++ b/src/main/java/com/sheepit/client/ChunkInputStream.java @@ -0,0 +1,53 @@ +package com.sheepit.client; + +import lombok.NonNull; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.ArrayDeque; +import java.util.List; +import java.util.NoSuchElementException; + +public class ChunkInputStream extends InputStream { + + @NonNull private final ArrayDeque chunkPaths; + @NonNull private BufferedInputStream currentStream; + + /** + * Given a list of chunk paths, provides an InputStream that reads the contents of these files in the order they were provided in. + * @param chunkPaths Non-empty, ordered list of paths + * @throws IOException If the first chunk could not be found. Errors with other chunk will be thrown during calls to read() + */ + public ChunkInputStream(List chunkPaths) throws IOException { + this.chunkPaths = new ArrayDeque<>(chunkPaths); + + /// Setup the first chunk for reading + prepareNextChunk(); + } + + private void prepareNextChunk() throws FileNotFoundException, NoSuchElementException { + currentStream = new BufferedInputStream(new FileInputStream(chunkPaths.removeFirst().toFile())); + } + + @Override public int read() throws IOException { + int result = currentStream.read(); + + if (result == -1) { + /// Finished reading from this chunk, continue with the next if possible + try { + prepareNextChunk(); + } + catch (NoSuchElementException e) { + /// This was the last chunk + return -1; + } + result = currentStream.read(); + } + + return result; + } +} diff --git a/src/main/java/com/sheepit/client/Utils.java b/src/main/java/com/sheepit/client/Utils.java index f1d63a6..8919cbd 100644 --- a/src/main/java/com/sheepit/client/Utils.java +++ b/src/main/java/com/sheepit/client/Utils.java @@ -45,6 +45,7 @@ import java.util.Map; import java.util.TimeZone; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; /** * Provides various general utility methods for the SheepIt client codebase @@ -105,18 +106,11 @@ public class Utils { */ public static int unzipChunksIntoDirectory(List full_path_chunks, String destinationDirectory, char[] password, Log log) { try { - // STEP 1: assemble the chunks into an actual zip (in RAM) - ByteArrayOutputStream unzippedData = new ByteArrayOutputStream(); - - for (String full_path_chunk: full_path_chunks) { - byte[] data = Files.readAllBytes(Paths.get(full_path_chunk)); - - unzippedData.write(data); - } - byte[] full_data = unzippedData.toByteArray(); + // STEP 1: Create a ChunkInputStream, which will read the chunks' contents in order + ChunkInputStream chunkInputStream = new ChunkInputStream(full_path_chunks.stream().map(Paths::get).collect(Collectors.toList())); // STEP 2: unzip the zip like before - ZipInputStream zipInputStream = new ZipInputStream(new ByteArrayInputStream(full_data)); + ZipInputStream zipInputStream = new ZipInputStream(chunkInputStream); if (password != null) { zipInputStream.setPassword(password); }