Skip to content
Snippets Groups Projects
Commit 27e1dc8e authored by Ihrig, Arvid Conrad (ari)'s avatar Ihrig, Arvid Conrad (ari)
Browse files

Integrated Pipeline: FileTreeScanTasks now require Zip-archives to have a...

Integrated Pipeline: FileTreeScanTasks now require Zip-archives to have a well-defined rawdata archive name
parent eef450a3
No related branches found
No related tags found
No related merge requests found
...@@ -67,6 +67,7 @@ object Main extends StrictLogging { ...@@ -67,6 +67,7 @@ object Main extends StrictLogging {
/** /**
* Generate a settings object, the parameters are taken from the global configuration files and * Generate a settings object, the parameters are taken from the global configuration files and
* can optionally be overwritten by the command-line configuration * can optionally be overwritten by the command-line configuration
* @param config the NOMAD configuration to use as default values
* @param commandLine the given command-line parameters * @param commandLine the given command-line parameters
* @return merged settings * @return merged settings
*/ */
......
...@@ -4,6 +4,12 @@ import java.nio.file.{ Path, Paths } ...@@ -4,6 +4,12 @@ import java.nio.file.{ Path, Paths }
import eu.nomad_lab.TreeType import eu.nomad_lab.TreeType
import scala.util.matching.Regex
object FileTreeScanTask {
val rawDataArchive: Regex = "(R[A-z0-9_-]{28}).zip".r
}
/** /**
* Request a scan to identify all potential calculation files found under the given file tree root * Request a scan to identify all potential calculation files found under the given file tree root
* @param treeBasePath the root of the file-tree * @param treeBasePath the root of the file-tree
...@@ -17,16 +23,21 @@ case class FileTreeScanTask( ...@@ -17,16 +23,21 @@ case class FileTreeScanTask(
/** /**
* Generate the archive ID for the tree-task, this is not necessarily identical to the * Generate the archive ID for the tree-task, this is not necessarily identical to the
* filename of the parsing results. * filename of the parsing results.
* @return the associated internal archive ID * @return the associated internal archive ID and optionally a warning
* @throws UnsupportedOperationException if a zip-archive file tree doesn't match the Id pattern
*/ */
def archiveId: String = treeType match { def archiveId: String = treeType match {
case TreeType.Directory => treeBasePath.getFileName.toString case TreeType.Directory => treeBasePath.getFileName.toString
case TreeType.Zip => treeBasePath.getFileName.toString.stripSuffix(".zip") case TreeType.Zip => treeBasePath.getFileName.toString match {
case FileTreeScanTask.rawDataArchive(id) => id
case path => throw new UnsupportedOperationException(s"$path is not a valid rawdata archive name")
}
} }
/** /**
* The canonical file-name of the HDF5-file containing the parsing results of the tree scan task. * The canonical file-name of the HDF5-file containing the parsing results of the tree scan task.
* @return the canonical file name of the parsing results HDF5 * @return the canonical file name of the parsing results HDF5 and optionally a warning
* @throws UnsupportedOperationException if a zip-archive file tree doesn't match the Id pattern
*/ */
def fileName: Path = { def fileName: Path = {
val id = archiveId val id = archiveId
...@@ -35,4 +46,18 @@ case class FileTreeScanTask( ...@@ -35,4 +46,18 @@ case class FileTreeScanTask(
case TreeType.Zip => s"S${id.substring(1)}.h5" case TreeType.Zip => s"S${id.substring(1)}.h5"
}) })
} }
/**
* The prefix-folder for the results from this file tree. Usually used as a subfolder of the
* general output location to avoid having a single giant folder with all results in it.
* @return the prefix path associated with this file tree and optionally a warning
* @throws UnsupportedOperationException if a zip-archive file tree doesn't match the Id pattern
*/
def prefixFolder: Path = {
val id = archiveId
Paths.get(treeType match {
case TreeType.Directory => id.substring(0, 3)
case TreeType.Zip => s"R${id.substring(1, 3)}"
})
}
} }
package eu.nomad_lab.integrated_pipeline_tests
import java.nio.file.Paths
import eu.nomad_lab.TreeType
import org.scalatest.{ Matchers, WordSpec }
class FileTreeScanTaskSpec extends WordSpec with Matchers with TestDataBuilders {
"a FileTreeScanTask" should {
"generate correct archive IDs for directory file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Directory)
fileTree.archiveId should be(data.baseName)
}
}
"generate correct archive file names for directory file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Directory)
fileTree.fileName should be(Paths.get(s"${data.baseName}.h5"))
}
}
"generate correct prefix folder names for directory file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Directory)
fileTree.prefixFolder should be(Paths.get(data.baseName.substring(0, 3)))
}
}
"generate correct archive IDs for rawdata zip archive file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Zip)
fileTree.archiveId should be(data.baseName)
}
}
"generate correct archive file names for rawdata zip archive file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Zip)
fileTree.fileName should be(Paths.get(s"S${data.baseName.substring(1)}.h5"))
}
}
"generate correct prefix folder names for rawdata zip archive file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Zip)
fileTree.prefixFolder should be(Paths.get(data.baseName.substring(0, 3)))
}
}
"throw an exception when handling non-rawdata zip archive file trees (unexpected filename)" in {
val failTree = aFileTreeScanTask().withTreeType(TreeType.Zip).withBasePath("/tmp/foo.zip")
an[UnsupportedOperationException] should be thrownBy failTree.archiveId
an[UnsupportedOperationException] should be thrownBy failTree.fileName
an[UnsupportedOperationException] should be thrownBy failTree.prefixFolder
}
}
}
...@@ -56,26 +56,6 @@ class WriteToHDF5MergedResultsProcessorSpec extends WordSpec { ...@@ -56,26 +56,6 @@ class WriteToHDF5MergedResultsProcessorSpec extends WordSpec {
} }
//TODO: move these tests to a new suite for the FileTreeScanTask?
"generate correct archive IDs for different file tree types" in {
val dirTask = FileTreeScanTask(Paths.get("/foo/bar"), TreeType.Directory)
assert(dirTask.archiveId == "bar", "unexpected ID for directory source")
val zipTask = FileTreeScanTask(Paths.get("/foo/Rbar.zip"), TreeType.Zip)
assert(zipTask.archiveId == "Rbar", "unexpected ID for zip source")
Seq(TreeType.Tar, TreeType.File, TreeType.Unknown).foreach { treeType =>
assertThrows[MatchError](FileTreeScanTask(unusedPath, treeType).archiveId)
}
}
"generate correct archive file names for different file tree types" in {
val dirTask = FileTreeScanTask(Paths.get("/foo/bar"), TreeType.Directory)
assert(dirTask.fileName == Paths.get("bar.h5"), "unexpected file name for directory source")
val zipTask = FileTreeScanTask(Paths.get("/foo/Rbar.zip"), TreeType.Zip)
assert(zipTask.fileName == Paths.get("Sbar.h5"), "unexpected file name for zip source")
Seq(TreeType.Tar, TreeType.File, TreeType.Unknown).foreach { treeType =>
assertThrows[MatchError](FileTreeScanTask(unusedPath, treeType).fileName)
}
}
} }
} }
......
...@@ -62,6 +62,8 @@ package object integrated_pipeline_tests { ...@@ -62,6 +62,8 @@ package object integrated_pipeline_tests {
"R1LGuYmO0fvYzWlcgRFzZOqVEdfXi/data/CO-NiMgO.dscf.out" -> turbomoleParserName "R1LGuYmO0fvYzWlcgRFzZOqVEdfXi/data/CO-NiMgO.dscf.out" -> turbomoleParserName
)) ))
val allTestData = Seq(testData1, testData2, testData3, testData4)
def createFileTreeScanRequest(archive: TestTreeData, mode: TreeType): FileTreeScanTask = { def createFileTreeScanRequest(archive: TestTreeData, mode: TreeType): FileTreeScanTask = {
val baseDir = sys.props.get("user.dir").get val baseDir = sys.props.get("user.dir").get
val prefix = archive.baseName.substring(0, 3) val prefix = archive.baseName.substring(0, 3)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment