Commit 27e1dc8e authored by Ihrig, Arvid Conrad (ari)'s avatar Ihrig, Arvid Conrad (ari)
Browse files

Integrated Pipeline: FileTreeScanTasks now require Zip-archives to have a...

Integrated Pipeline: FileTreeScanTasks now require Zip-archives to have a well-defined rawdata archive name
parent eef450a3
......@@ -67,6 +67,7 @@ object Main extends StrictLogging {
/**
* Generate a settings object, the parameters are taken from the global configuration files and
* can optionally be overwritten by the command-line configuration
* @param config the NOMAD configuration to use as default values
* @param commandLine the given command-line parameters
* @return merged settings
*/
......
......@@ -4,6 +4,12 @@ import java.nio.file.{ Path, Paths }
import eu.nomad_lab.TreeType
import scala.util.matching.Regex
object FileTreeScanTask {
val rawDataArchive: Regex = "(R[A-z0-9_-]{28}).zip".r
}
/**
* Request a scan to identify all potential calculation files found under the given file tree root
* @param treeBasePath the root of the file-tree
......@@ -17,16 +23,21 @@ case class FileTreeScanTask(
/**
* Generate the archive ID for the tree-task, this is not necessarily identical to the
* filename of the parsing results.
* @return the associated internal archive ID
* @return the associated internal archive ID and optionally a warning
* @throws UnsupportedOperationException if a zip-archive file tree doesn't match the Id pattern
*/
def archiveId: String = treeType match {
case TreeType.Directory => treeBasePath.getFileName.toString
case TreeType.Zip => treeBasePath.getFileName.toString.stripSuffix(".zip")
case TreeType.Zip => treeBasePath.getFileName.toString match {
case FileTreeScanTask.rawDataArchive(id) => id
case path => throw new UnsupportedOperationException(s"$path is not a valid rawdata archive name")
}
}
/**
* The canonical file-name of the HDF5-file containing the parsing results of the tree scan task.
* @return the canonical file name of the parsing results HDF5
* @return the canonical file name of the parsing results HDF5 and optionally a warning
* @throws UnsupportedOperationException if a zip-archive file tree doesn't match the Id pattern
*/
def fileName: Path = {
val id = archiveId
......@@ -35,4 +46,18 @@ case class FileTreeScanTask(
case TreeType.Zip => s"S${id.substring(1)}.h5"
})
}
/**
* The prefix-folder for the results from this file tree. Usually used as a subfolder of the
* general output location to avoid having a single giant folder with all results in it.
* @return the prefix path associated with this file tree and optionally a warning
* @throws UnsupportedOperationException if a zip-archive file tree doesn't match the Id pattern
*/
def prefixFolder: Path = {
val id = archiveId
Paths.get(treeType match {
case TreeType.Directory => id.substring(0, 3)
case TreeType.Zip => s"R${id.substring(1, 3)}"
})
}
}
package eu.nomad_lab.integrated_pipeline_tests
import java.nio.file.Paths
import eu.nomad_lab.TreeType
import org.scalatest.{ Matchers, WordSpec }
class FileTreeScanTaskSpec extends WordSpec with Matchers with TestDataBuilders {
"a FileTreeScanTask" should {
"generate correct archive IDs for directory file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Directory)
fileTree.archiveId should be(data.baseName)
}
}
"generate correct archive file names for directory file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Directory)
fileTree.fileName should be(Paths.get(s"${data.baseName}.h5"))
}
}
"generate correct prefix folder names for directory file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Directory)
fileTree.prefixFolder should be(Paths.get(data.baseName.substring(0, 3)))
}
}
"generate correct archive IDs for rawdata zip archive file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Zip)
fileTree.archiveId should be(data.baseName)
}
}
"generate correct archive file names for rawdata zip archive file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Zip)
fileTree.fileName should be(Paths.get(s"S${data.baseName.substring(1)}.h5"))
}
}
"generate correct prefix folder names for rawdata zip archive file trees" in {
allTestData.foreach { data =>
val fileTree = createFileTreeScanRequest(data, TreeType.Zip)
fileTree.prefixFolder should be(Paths.get(data.baseName.substring(0, 3)))
}
}
"throw an exception when handling non-rawdata zip archive file trees (unexpected filename)" in {
val failTree = aFileTreeScanTask().withTreeType(TreeType.Zip).withBasePath("/tmp/foo.zip")
an[UnsupportedOperationException] should be thrownBy failTree.archiveId
an[UnsupportedOperationException] should be thrownBy failTree.fileName
an[UnsupportedOperationException] should be thrownBy failTree.prefixFolder
}
}
}
......@@ -56,26 +56,6 @@ class WriteToHDF5MergedResultsProcessorSpec extends WordSpec {
}
//TODO: move these tests to a new suite for the FileTreeScanTask?
"generate correct archive IDs for different file tree types" in {
val dirTask = FileTreeScanTask(Paths.get("/foo/bar"), TreeType.Directory)
assert(dirTask.archiveId == "bar", "unexpected ID for directory source")
val zipTask = FileTreeScanTask(Paths.get("/foo/Rbar.zip"), TreeType.Zip)
assert(zipTask.archiveId == "Rbar", "unexpected ID for zip source")
Seq(TreeType.Tar, TreeType.File, TreeType.Unknown).foreach { treeType =>
assertThrows[MatchError](FileTreeScanTask(unusedPath, treeType).archiveId)
}
}
"generate correct archive file names for different file tree types" in {
val dirTask = FileTreeScanTask(Paths.get("/foo/bar"), TreeType.Directory)
assert(dirTask.fileName == Paths.get("bar.h5"), "unexpected file name for directory source")
val zipTask = FileTreeScanTask(Paths.get("/foo/Rbar.zip"), TreeType.Zip)
assert(zipTask.fileName == Paths.get("Sbar.h5"), "unexpected file name for zip source")
Seq(TreeType.Tar, TreeType.File, TreeType.Unknown).foreach { treeType =>
assertThrows[MatchError](FileTreeScanTask(unusedPath, treeType).fileName)
}
}
}
}
......
......@@ -62,6 +62,8 @@ package object integrated_pipeline_tests {
"R1LGuYmO0fvYzWlcgRFzZOqVEdfXi/data/CO-NiMgO.dscf.out" -> turbomoleParserName
))
val allTestData = Seq(testData1, testData2, testData3, testData4)
def createFileTreeScanRequest(archive: TestTreeData, mode: TreeType): FileTreeScanTask = {
val baseDir = sys.props.get("user.dir").get
val prefix = archive.baseName.substring(0, 3)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment