From 2b3dee09f498bc4e60f50f4083678ba277d76f9b Mon Sep 17 00:00:00 2001
From: Arvid Ihrig <ihrig@fhi-berlin.mpg.de>
Date: Wed, 11 Jul 2018 14:04:24 +0200
Subject: [PATCH] Integrated Pipeline: single-calculation HDF5 writer can be
 queried for the result output location for a file tree

---
 .../WriteToHDF5ResultsProcessor.scala         |  9 ++++--
 .../package.scala                             |  5 ++--
 .../WriteToHDF5ResultsProcessorSpec.scala     | 28 ++++++++++++++-----
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala b/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala
index b7ab0feb..8949117b 100644
--- a/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala
+++ b/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala
@@ -1,6 +1,6 @@
 package eu.nomad_lab.integrated_pipeline.io_integrations
 
-import java.nio.file.{ Path, Paths }
+import java.nio.file.{ Files, Path, Paths }
 
 import eu.nomad_lab.integrated_pipeline.OutputType.OutputType
 import eu.nomad_lab.integrated_pipeline.messages.{ FileParsingResult, FileTreeScanTask, InMemoryResult }
@@ -13,7 +13,8 @@ class WriteToHDF5ResultsProcessor(outputLocation: Path, metaInfo: MetaInfoEnv) e
   override def processFileParsingResult(result: FileParsingResult): Unit = {
     val id = result.task.calculationGid
     val fileName = Paths.get(id + ".h5")
-    val targetPath = outputLocation.resolve(fileName)
+    val targetPath = outputLocation(result.treeTask).resolve(fileName)
+    Files.createDirectories(outputLocation(result.treeTask))
     //VERIFY: Is this path the appropriate one for single calculation HDFs? (Is there a standard?)
     val h5file = H5File.create(targetPath, Paths.get("/", id, id))
     val h5Backend = H5Backend(metaEnv = metaInfo, h5File = h5file, closeFileOnFinishedParsing = false)
@@ -37,5 +38,7 @@ class WriteToHDF5ResultsProcessor(outputLocation: Path, metaInfo: MetaInfoEnv) e
 
   override def finishProcessingTreeResults(treeTask: FileTreeScanTask): Unit = ()
 
-  override def outputLocation(fileTree: FileTreeScanTask): Path = ??? // outputLocation FIXME
+  override def outputLocation(fileTree: FileTreeScanTask): Path = {
+    outputLocation.resolve(fileTree.prefixFolder).resolve(fileTree.archiveId)
+  }
 }
diff --git a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala
index 210d3f53..46981987 100644
--- a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala
+++ b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala
@@ -94,12 +94,13 @@ package object integrated_pipeline_end_to_end_tests extends TestDataBuilders {
       val task = aFileParsingTask().withTreeTask(treeTask).withRelativePath(entry._1).build()
       val id = task.calculationGid
       val fileName = s"$id.h5"
-      val location = tmpResultsFolder.resolve(fileName)
+      val targetFolder = tmpResultsFolder.resolve(treeTask.prefixFolder).resolve(treeTask.archiveId)
+      val location = targetFolder.resolve(fileName)
       assert(
         location.toFile.exists(),
         s"HDF5 file '$location' with parsing results does not exist"
       )
-      validateHDF5(tmpResultsFolder, id, metaInfo, checkSingleCalculationHDFContent(sample))
+      validateHDF5(targetFolder, id, metaInfo, checkSingleCalculationHDFContent(sample))
     }
     succeed
   }
diff --git a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala
index e4f9f976..c27866a7 100644
--- a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala
+++ b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala
@@ -5,14 +5,13 @@ import java.nio.file.Paths
 import eu.nomad_lab.TreeType
 import eu.nomad_lab.h5.CalculationH5
 import eu.nomad_lab.integrated_pipeline.io_integrations.WriteToHDF5ResultsProcessor
-import eu.nomad_lab.integrated_pipeline.messages.{ FileParsingTask, FileTreeScanTask }
+import eu.nomad_lab.integrated_pipeline.messages.FileParsingTask
 import eu.nomad_lab.meta.KnownMetaInfoEnvs
-import org.scalatest.WordSpec
+import org.scalatest.{ Matchers, WordSpec }
 
-class WriteToHDF5ResultsProcessorSpec extends WordSpec {
-
-  val sampleTree = FileTreeScanTask(Paths.get("/foo/bar"), TreeType.Directory)
+class WriteToHDF5ResultsProcessorSpec extends WordSpec with TestDataBuilders with Matchers {
 
+  val sampleTree = aFileTreeScanTask().withBasePath("/foo/bargus").withTreeType(TreeType.Directory)
   val metaData = KnownMetaInfoEnvs.all
 
   def validateHDFContent(expectedMainFileUri: String)(calc: CalculationH5, mainFileUri: String): Unit = {
@@ -30,15 +29,30 @@ class WriteToHDF5ResultsProcessorSpec extends WordSpec {
         val writer = new WriteToHDF5ResultsProcessor(tempDir, metaData)
         val inputs = (1 to 3).map(x => FileParsingTask(sampleTree, Paths.get(s"file$x"), "dummyParser"))
         inputs.foreach(x => writer.processFileParsingResult(createSuccessfulFileParsingResult(x)))
+        val targetFolder = writer.outputLocation(sampleTree)
         inputs.foreach { entry =>
           val calcName = entry.calculationGid
-          val filePath = tempDir.resolve(s"$calcName.h5")
+          val filePath = targetFolder.resolve(s"$calcName.h5")
           assert(filePath.toFile.exists(), s"calculation output HDF5 '$filePath' does not exist")
           val mainFileUri = entry.treeTask.treeBasePath.resolve(entry.relativePath).toUri.toString
-          validateHDF5(tempDir, calcName, metaData, validateHDFContent(mainFileUri))
+          validateHDF5(targetFolder, calcName, metaData, validateHDFContent(mainFileUri))
         }
       }
     }
+
+    "determining the output location" should {
+      "return the appropriate output location for a directory file tree" in {
+        val f = new WriteToHDF5ResultsProcessor(Paths.get("/non/existing/location"), metaData)
+        val fileTree = aFileTreeScanTask().withTreeType(TreeType.Directory).withBasePath("foo/bargus")
+        f.outputLocation(fileTree) should be(Paths.get("/non/existing/location/bar/bargus"))
+      }
+
+      "return the appropriate output location for a zip archive file tree" in {
+        val f = new WriteToHDF5ResultsProcessor(Paths.get("/non/existing/location"), metaData)
+        val fileTree = aFileTreeScanTask().withTreeType(TreeType.Zip).withBasePath(s"foo/R${"x" * 28}.zip")
+        f.outputLocation(fileTree) should be(Paths.get(s"/non/existing/location/Rxx/R${"x" * 28}"))
+      }
+    }
   }
 
 }
-- 
GitLab