From 2b3dee09f498bc4e60f50f4083678ba277d76f9b Mon Sep 17 00:00:00 2001 From: Arvid Ihrig <ihrig@fhi-berlin.mpg.de> Date: Wed, 11 Jul 2018 14:04:24 +0200 Subject: [PATCH] Integrated Pipeline: single-calculation HDF5 writer can be queried for the result output location for a file tree --- .../WriteToHDF5ResultsProcessor.scala | 9 ++++-- .../package.scala | 5 ++-- .../WriteToHDF5ResultsProcessorSpec.scala | 28 ++++++++++++++----- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala b/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala index b7ab0feb..8949117b 100644 --- a/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala +++ b/integrated-pipeline/src/main/scala/eu/nomad_lab/integrated_pipeline/io_integrations/WriteToHDF5ResultsProcessor.scala @@ -1,6 +1,6 @@ package eu.nomad_lab.integrated_pipeline.io_integrations -import java.nio.file.{ Path, Paths } +import java.nio.file.{ Files, Path, Paths } import eu.nomad_lab.integrated_pipeline.OutputType.OutputType import eu.nomad_lab.integrated_pipeline.messages.{ FileParsingResult, FileTreeScanTask, InMemoryResult } @@ -13,7 +13,8 @@ class WriteToHDF5ResultsProcessor(outputLocation: Path, metaInfo: MetaInfoEnv) e override def processFileParsingResult(result: FileParsingResult): Unit = { val id = result.task.calculationGid val fileName = Paths.get(id + ".h5") - val targetPath = outputLocation.resolve(fileName) + val targetPath = outputLocation(result.treeTask).resolve(fileName) + Files.createDirectories(outputLocation(result.treeTask)) //VERIFY: Is this path the appropriate one for single calculation HDFs? (Is there a standard?) val h5file = H5File.create(targetPath, Paths.get("/", id, id)) val h5Backend = H5Backend(metaEnv = metaInfo, h5File = h5file, closeFileOnFinishedParsing = false) @@ -37,5 +38,7 @@ class WriteToHDF5ResultsProcessor(outputLocation: Path, metaInfo: MetaInfoEnv) e override def finishProcessingTreeResults(treeTask: FileTreeScanTask): Unit = () - override def outputLocation(fileTree: FileTreeScanTask): Path = ??? // outputLocation FIXME + override def outputLocation(fileTree: FileTreeScanTask): Path = { + outputLocation.resolve(fileTree.prefixFolder).resolve(fileTree.archiveId) + } } diff --git a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala index 210d3f53..46981987 100644 --- a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala +++ b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_end_to_end_tests/package.scala @@ -94,12 +94,13 @@ package object integrated_pipeline_end_to_end_tests extends TestDataBuilders { val task = aFileParsingTask().withTreeTask(treeTask).withRelativePath(entry._1).build() val id = task.calculationGid val fileName = s"$id.h5" - val location = tmpResultsFolder.resolve(fileName) + val targetFolder = tmpResultsFolder.resolve(treeTask.prefixFolder).resolve(treeTask.archiveId) + val location = targetFolder.resolve(fileName) assert( location.toFile.exists(), s"HDF5 file '$location' with parsing results does not exist" ) - validateHDF5(tmpResultsFolder, id, metaInfo, checkSingleCalculationHDFContent(sample)) + validateHDF5(targetFolder, id, metaInfo, checkSingleCalculationHDFContent(sample)) } succeed } diff --git a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala index e4f9f976..c27866a7 100644 --- a/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala +++ b/integrated-pipeline/src/test/scala/eu/nomad_lab/integrated_pipeline_tests/WriteToHDF5ResultsProcessorSpec.scala @@ -5,14 +5,13 @@ import java.nio.file.Paths import eu.nomad_lab.TreeType import eu.nomad_lab.h5.CalculationH5 import eu.nomad_lab.integrated_pipeline.io_integrations.WriteToHDF5ResultsProcessor -import eu.nomad_lab.integrated_pipeline.messages.{ FileParsingTask, FileTreeScanTask } +import eu.nomad_lab.integrated_pipeline.messages.FileParsingTask import eu.nomad_lab.meta.KnownMetaInfoEnvs -import org.scalatest.WordSpec +import org.scalatest.{ Matchers, WordSpec } -class WriteToHDF5ResultsProcessorSpec extends WordSpec { - - val sampleTree = FileTreeScanTask(Paths.get("/foo/bar"), TreeType.Directory) +class WriteToHDF5ResultsProcessorSpec extends WordSpec with TestDataBuilders with Matchers { + val sampleTree = aFileTreeScanTask().withBasePath("/foo/bargus").withTreeType(TreeType.Directory) val metaData = KnownMetaInfoEnvs.all def validateHDFContent(expectedMainFileUri: String)(calc: CalculationH5, mainFileUri: String): Unit = { @@ -30,15 +29,30 @@ class WriteToHDF5ResultsProcessorSpec extends WordSpec { val writer = new WriteToHDF5ResultsProcessor(tempDir, metaData) val inputs = (1 to 3).map(x => FileParsingTask(sampleTree, Paths.get(s"file$x"), "dummyParser")) inputs.foreach(x => writer.processFileParsingResult(createSuccessfulFileParsingResult(x))) + val targetFolder = writer.outputLocation(sampleTree) inputs.foreach { entry => val calcName = entry.calculationGid - val filePath = tempDir.resolve(s"$calcName.h5") + val filePath = targetFolder.resolve(s"$calcName.h5") assert(filePath.toFile.exists(), s"calculation output HDF5 '$filePath' does not exist") val mainFileUri = entry.treeTask.treeBasePath.resolve(entry.relativePath).toUri.toString - validateHDF5(tempDir, calcName, metaData, validateHDFContent(mainFileUri)) + validateHDF5(targetFolder, calcName, metaData, validateHDFContent(mainFileUri)) } } } + + "determining the output location" should { + "return the appropriate output location for a directory file tree" in { + val f = new WriteToHDF5ResultsProcessor(Paths.get("/non/existing/location"), metaData) + val fileTree = aFileTreeScanTask().withTreeType(TreeType.Directory).withBasePath("foo/bargus") + f.outputLocation(fileTree) should be(Paths.get("/non/existing/location/bar/bargus")) + } + + "return the appropriate output location for a zip archive file tree" in { + val f = new WriteToHDF5ResultsProcessor(Paths.get("/non/existing/location"), metaData) + val fileTree = aFileTreeScanTask().withTreeType(TreeType.Zip).withBasePath(s"foo/R${"x" * 28}.zip") + f.outputLocation(fileTree) should be(Paths.get(s"/non/existing/location/Rxx/R${"x" * 28}")) + } + } } } -- GitLab