ZipTreeParsingTaskGenerator.scala 1.86 KB
Newer Older
1
package eu.nomad_lab.integrated_pipeline.io_integrations
2
3
4
5

import java.nio.file.Paths

import eu.nomad_lab.TreeType
6
7
import eu.nomad_lab.integrated_pipeline.{ FileTree, ParsingTaskGenerator }
import eu.nomad_lab.integrated_pipeline.messages.{ FileParsingTask, TreeParserEventScanError }
8
9
10
11
import eu.nomad_lab.parsers.{ CandidateParser, ParserCollection }
import org.apache.commons.compress.archivers.zip.{ ZipArchiveEntry, ZipFile }

import scala.annotation.tailrec
12
import scala.util.control.NonFatal
13

14
class ZipTreeParsingTaskGenerator(request: FileTree, parserCollection: ParserCollection)
15
    extends ParsingTaskGenerator {
16

17
18
  require(request.treeType == TreeType.Zip, "file tree to process must be a Zip archive")

19
  private val zipFile = new ZipFile(request.treeBasePath.toFile)
20
21
  private val zipEntries: java.util.Enumeration[ZipArchiveEntry] = zipFile.getEntries

22
23
24
25
  override def finalize(): Unit = {
    zipFile.close()
  }

26
  @tailrec protected[this] final override def findNextParsingCandidate(): Option[Either[TreeParserEventScanError, FileParsingTask]] = {
27
28
    if (zipEntries.hasMoreElements) {
      val zipEntry: ZipArchiveEntry = zipEntries.nextElement()
29
      val internalFilePath = Paths.get(zipEntry.getName)
30
      val candidateParsers = if (!zipEntry.isDirectory && !zipEntry.isUnixSymlink) {
31
32
        val zIn = zipFile.getInputStream(zipEntry)
        try {
33
          scanInputStream(parserCollection, zIn, internalFilePath.toString)
34
35
        } catch {
          case NonFatal(e) => return Some(Left(TreeParserEventScanError(request, internalFilePath, e)))
36
37
        } finally {
          zIn.close()
38
39
40
41
        }
      } else {
        Seq[CandidateParser]()
      }
42
      generateRequest(request, internalFilePath, candidateParsers) match {
43
        case Some(x) => Some(Right(x))
44
        case None => findNextParsingCandidate()
45
46
      }
    } else {
47
      zipFile.close()
48
49
50
51
      None
    }
  }
}