package.scala 6.82 KB
Newer Older
1
2
package eu.nomad_lab

3
import java.nio.file.Path
4

5
import eu.nomad_lab.JsonSupport.formats
6
import eu.nomad_lab.TreeType.TreeType
7
import eu.nomad_lab.h5.CalculationH5
8
import eu.nomad_lab.integrated_pipeline.{ FileTree, OutputType }
9
import eu.nomad_lab.integrated_pipeline.OutputType.OutputType
10
import eu.nomad_lab.integrated_pipeline_tests._
11
import eu.nomad_lab.meta.MetaInfoEnv
12
import org.scalatest.Assertions.succeed
13
import org.scalatest.{ Assertion, Informer }
14
15

import scala.collection.mutable
16
import scala.io.Source
17

18
package object integrated_pipeline_end_to_end_tests extends TestDataBuilders {
19

20
21
22
  trait Fixture {
    val numWorkers: Int
    val outputType: OutputType
23
    val treeType: TreeType
24
    val sample: TestTreeData
25
    lazy val treeTask: FileTree = createFileTreeScanRequest(sample, treeType)
26
27
28
29
30
31
32
33
34
35
36
37
38
    lazy val dataRoot: String = treeTask.treeType match {
      case TreeType.Directory => s"src/test/resources/${sample.baseName.substring(0, 3)}/${sample.baseName}"
      case TreeType.Zip => s"src/test/resources/${sample.baseName.substring(0, 3)}/${sample.baseName}.zip"
    }
    lazy val tmpResultsFolder: Path = {
      val prefix = treeTask.treeType match {
        case TreeType.Directory => "directory"
        case TreeType.Zip => "zip-archive"
      }
      val tempDir = generateTempTestDirectory(s"$prefix-to-${outputType.toString.toLowerCase}")
      tempDir.toFile.deleteOnExit()
      tempDir
    }
39
40
41
42
43
44
45
46
47
48
49
50

    def generateConsoleArgumentList(info: Option[Informer]): Array[String] = {
      val command = treeTask.treeType match {
        case TreeType.Directory => "parseDirectory"
        case TreeType.Zip => "parseRawDataArchive"
      }
      val output = "--output=" + (outputType match {
        case OutputType.Json => "Json"
        case OutputType.HDF5 => "HDF5"
        case OutputType.HDF5merged => "HDF5merged"
      })
      val directory = s"""--outputdir="$tmpResultsFolder""""
51
      val workers = s"--numWorkers=$numWorkers"
52
53
      val suppressConfigDump = "--noConfigDump"
      val params = Array(command, output, workers, directory, suppressConfigDump, dataRoot)
54
55
56
57
58
59
60
61
      info.foreach(x => x(s"command line arguments: ${params.mkString("'", " ", "'")}"))
      params
    }
  }

  class SingleWorkerFixture(val treeType: TreeType, val outputType: OutputType) extends Fixture {
    override val numWorkers: Int = 1
    override val sample: TestTreeData = testData4
62
  }
63

64
65
66
67
  class MultiWorkerFixture(val treeType: TreeType, val outputType: OutputType,
      val numWorkers: Int) extends Fixture {
    require(numWorkers > 1, "need multiple workers")
    override val sample: TestTreeData = testData3
68
69
  }

70
71
72
73
74
  class NonExistingTreeFixture(val treeType: TreeType, val outputType: OutputType) extends Fixture {
    override val numWorkers: Int = 1
    override val sample: TestTreeData = TestTreeData("purely-imaginary", Map())
  }

75
  def assertValidityOfGeneratedJsonFiles(sample: TestTreeData, treeTask: FileTree,
76
    tmpResultsFolder: Path, metaInfo: MetaInfoEnv): Assertion = {
77
    sample.candidateCalculationsWithParsers.foreach { entry =>
78
79
      val task = aFileParsingTask().withTreeTask(treeTask).withRelativePath(entry._1).build()
      val fileName = s"${task.calculationGid}.json"
80
81
      val targetFolder = tmpResultsFolder.resolve(treeTask.prefixFolder).resolve(treeTask.archiveId)
      val location = targetFolder.resolve(fileName)
82
83
84
85
86
      assert(
        location.toFile.exists(),
        s"json file '$location' with parsing results does not exist"
      )
      val jsonData = JsonUtils.parseReader(Source.fromFile(location.toFile).bufferedReader())
87
      validateJsonMetaData(jsonData \ "sections", metaInfo)
88
89
90
91
92
93
94
95
      assert(
        (jsonData \ "parserInfo" \ "name").extract[String] == entry._2,
        "unexpected parser name in output file"
      )
    }
    succeed
  }

96
  def assertValidityOfGeneratedHDF5Files(sample: TestTreeData, treeTask: FileTree,
97
    tmpResultsFolder: Path, metaInfo: MetaInfoEnv): Assertion = {
98
    sample.candidateCalculationsWithParsers.foreach { entry =>
99
100
      val task = aFileParsingTask().withTreeTask(treeTask).withRelativePath(entry._1).build()
      val id = task.calculationGid
101
      val archiveId = treeTask.archiveId
102
      val fileName = s"$id.h5"
103
104
      val targetFolder = tmpResultsFolder.resolve(treeTask.prefixFolder).resolve(treeTask.archiveId)
      val location = targetFolder.resolve(fileName)
105
106
107
108
      assert(
        location.toFile.exists(),
        s"HDF5 file '$location' with parsing results does not exist"
      )
109
110
      validateHDF5(targetFolder, archiveId, metaInfo, checkSingleCalculationHDFContent(sample),
        Some(fileName))
111
112
113
114
    }
    succeed
  }

115
  def assertValidityOfGeneratedMergedHDF5File(sample: TestTreeData, treeTask: FileTree,
116
    tmpResultsFolder: Path, metaInfo: MetaInfoEnv): Assertion = {
117
118
119
120
    val id = treeTask.archiveId
    val fileName = treeTask.fileName
    val targetFolder = tmpResultsFolder.resolve(treeTask.prefixFolder)
    val location = targetFolder.resolve(fileName)
121
    assert(location.toFile.exists(), s"parsing results HDF5 file '$location' does not exist")
122
123
    validateHDF5(targetFolder, id, metaInfo,
      checkMergedCalculationsHDFContent(sample, mutable.Set()), Some(fileName.toString))
124
125
126
    succeed
  }

127
  private def checkSingleCalculationHDFContent(testData: TestTreeData)(calc: CalculationH5, mainFileUri: String): Unit = {
128
129
130
131
132
133
134
135
136
137
138
    val key = testData.candidateCalculationsWithParsers.keySet.find(mainFileUri.endsWith)
    assert(key.nonEmpty, "could not map calculation's main file URI to reference data")
    val expectedParser = testData.candidateCalculationsWithParsers(key.get)
    val attributeId = H5Lib.attributeOpen(calc.calculationGroup, "parserInfo", null)
    val parserInfoStrings = H5Lib.attributeReadStr(attributeId)
    assert(parserInfoStrings.length == 1, "expected exactly one parser info block")
    val parserInfo = JsonUtils.parseStr(parserInfoStrings.head)
    H5Lib.attributeClose(attributeId)
    assert((parserInfo \ "name").extract[String] == expectedParser, "not the expected parser")
  }

139
  private def checkMergedCalculationsHDFContent(testData: TestTreeData, finished: mutable.Set[String])(calc: CalculationH5, mainFileUri: String): Unit = {
140
141
142
143
144
145
146
147
148
149
150
151
    val key = testData.candidateCalculationsWithParsers.keySet.find(mainFileUri.endsWith)
    assert(key.nonEmpty, "could not map calculation's main file URI to reference data")
    assert(finished.add(key.get), s"calculation '${key.get}' was already processed before")
    val expectedParser = testData.candidateCalculationsWithParsers(key.get)
    val attributeId = H5Lib.attributeOpen(calc.calculationGroup, "parserInfo", null)
    val parserInfoStrings = H5Lib.attributeReadStr(attributeId)
    assert(parserInfoStrings.length == 1, "expected exactly one parser info block")
    val parserInfo = JsonUtils.parseStr(parserInfoStrings.head)
    H5Lib.attributeClose(attributeId)
    assert((parserInfo \ "name").extract[String] == expectedParser, "not the expected parser")
  }

152
}