Commit 2f232e27 authored by Mohamed, Fawzi Roberto (fawzi)'s avatar Mohamed, Fawzi Roberto (fawzi)
Browse files

adding h5Tool (partially spin out of tool)

parent 4074db16
Pipeline #36809 failed with stages
in 13 minutes and 40 seconds
/*
Copyright 2016-2017 The NOMAD Developers Group
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eu.nomad_lab.h5
import java.io.File
import collection.JavaConverters._
import java.nio.file.Path
import java.nio.file.Paths
import java.nio.file.Files
import java.nio.file.StandardCopyOption
import java.nio.file.SimpleFileVisitor
import java.nio.file.FileVisitResult
import java.nio.file.attribute.BasicFileAttributes
import com.typesafe.scalalogging.StrictLogging
import eu.nomad_lab.LocalEnv
import scala.util.control.NonFatal
object H5Tool extends StrictLogging {
private val usage = """|Usage:
|h5Tool [--help] <command>
|
| Available commands:
|
| merge, count, extract, rename
|
|Performs the requested operation on Hdf5 files, for more information on the
|various commands pass --help to them.
|""".stripMargin
def main(args: Array[String]): Unit = {
if (args.length == 0) {
println(usage)
return
}
var list: List[String] = args.toList
if (list.isEmpty) {
println(usage)
return
}
val cmd = list.head
list = list.tail
cmd match {
case "--help" | "-h" =>
println(usage)
case "merge" => mergeCmd(list)
case "count" => countCmd(list)
case "extract" => extractCmd(list)
case "rename" => renameCmd(list)
case command =>
println(s"invalid command $command")
println(usage)
}
}
private val mergeCmdUsage = """Usage:
|h5Tool merge
| [--append-to-merge]
| [--merge-h5 <h5FileOut> [<fileIn1> <fileIn2> ...]]
|
|Merges several hdf5 files in one.
|Unless --append-to-merge is given, an already existing target is deleted
|before merging
|""".stripMargin
/**
* Implements the merge command of the command line
*/
def mergeCmd(args: List[String]): Unit = {
var mergeTarget: Option[String] = None
var mergeFiles: List[String] = Nil
var appendToMerge: Boolean = false
var list = args
while (!list.isEmpty) {
val arg = list.head
list = list.tail
arg match {
case "--help" | "-h" =>
println(mergeCmdUsage)
return
case "--merge-h5" =>
if (list.isEmpty) {
println(s"Error: missing target h5 file after --merge-h5. $usage")
return
}
mergeTarget = Some(list.head)
mergeFiles = list.tail
list = Nil
case "--append-to-merge" =>
appendToMerge = true
case _ =>
println(s"Error: unexpected argument $arg in merge command. $usage")
return
}
}
mergeTarget match {
case Some(target) =>
logger.info(s"will merge $mergeFiles -> $target")
val targetPath = Paths.get(target)
val targetF = if (appendToMerge && Files.exists(targetPath))
FileH5.open(targetPath, write = true)
else
FileH5.create(targetPath)
try {
for (inF <- mergeFiles) {
val sourceF = FileH5.open(Paths.get(inF))
try {
Merge.fileInFile(sourceF, targetF)
} finally {
sourceF.release()
}
}
} finally {
targetF.release()
}
case None => ()
}
}
private val countCmdUsage = s"""h5Tool count
| [--scan-dir=<scanDirectory>]
| <pathToH5Archive1> [<pathToH5Archive2>...]
|
|Prints the archiveGid and the count of calculations contained in it
|if --scan-dir is given does it for all the archives in the given directory
|""".stripMargin
def countCmd(args: List[String]): Unit = {
var list: List[String] = args
var h5Files: List[Path] = Nil
var scanDir: Seq[Path] = Seq()
while (!list.isEmpty) {
val arg = list.head
list = list.tail
val scanDirRe = "^--scan-dir=(.*)$".r
arg match {
case "--help" | "-h" =>
println(usage)
return
case scanDirRe(dir) =>
scanDir = scanDir :+ Paths.get(dir)
case "--" =>
h5Files = list.reverse.map { x: String => Paths.get(x) } ::: h5Files
list = Nil
case a =>
h5Files = Paths.get(a) :: h5Files
}
}
def process(fPath: Path): Unit = {
try {
val f = FileH5.open(fPath, false)
try {
for (a <- f.archives()) {
println(s"${a.archiveGid} ${a.calculations().lengthL}")
}
} finally {
f.release()
}
} catch {
case NonFatal(e) =>
logger.error(s"failure processing $fPath", e)
}
}
for (dir <- scanDir) {
Files.walkFileTree(dir, new SimpleFileVisitor[Path] {
override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
if (file.getFileName.toString.endsWith(".h5")) {
process(file)
}
return FileVisitResult.CONTINUE;
}
})
}
for (fPath <- h5Files.reverse) {
process(fPath)
}
}
private val extractCmdUsage = """Usage:
|h5Tool extract
| [--archive-gid=<archiveGid>]
| [--extract-uri=<uriOfCalculationToExtract>]
| [--archive-path=<pathToH5ArchiveIn>]
| [--extract-gid=<gidOfCalculationToExtract>]
| [--target-path=<pathToOutputHdf5>]
|""".stripMargin
def extractCmd(args: List[String]): Unit = {
var list: List[String] = args
while (!list.isEmpty) {
val arg = list.head
list = list.tail
arg match {
case "--help" | "-h" =>
println(extractCmdUsage)
return
case command =>
}
}
println("to do")
}
private val renameCmdUsage = """Usage:
|h5Tool rename
| [--renames=<renamesFile>]
| [--source-path=<pathToH5Archive>]
| [--target-dir=<pathToOutputDirectory>]
| [--flat-dir]
| [--source-paths [path1 [path2 ...]]]
|
|Performs the renames defined in a rename file formatted as follow:
|
|# meta/path1
| from -> to
| "from " -> " to"
|# meta/path2
|...
|
|Replacements are performed in place unless --target-dir is passed in.
|In that case prefix directories with the first 3 characters of the names
|are created unless one passes --flat-dir.
|""".stripMargin
/**
* performs replacements in the h5 files
*/
def renameCmd(args: List[String]): Unit = {
var list: List[String] = args
val renamesRe = "^--renames=(.*)$".r
val sourcePathRe = "^--source-path=(.*)$".r
val targetDirRe = "^--target-dir=(.*)$".r
var replacements: Seq[Replacements] = Seq()
var sourcePaths: Seq[Path] = Seq()
var targetDir: Option[Path] = None
var flatDir: Boolean = false
while (!list.isEmpty) {
val arg = list.head
list = list.tail
arg match {
case "--help" | "-h" =>
println(usage)
return
case renamesRe(rFile) =>
replacements = replacements ++ Replacements.fromFile(Paths.get(rFile))
case targetDirRe(targetD) =>
targetDir = Some(Paths.get(targetD))
case sourcePathRe(p) =>
sourcePaths = sourcePaths :+ Paths.get(p)
case "--flat-dir" => flatDir = true
case "--source-paths" =>
sourcePaths ++= list.map { x: String => Paths.get(x) }
list = Nil
case other =>
throw new Exception(s"unexpected argument '$other'")
}
}
for (inF <- sourcePaths) {
val targetPath = targetDir match {
case Some(dir) =>
val tPath = flatDir match {
case true =>
dir.resolve(inF.getFileName)
case false =>
dir.resolve(inF.getFileName.toString.take(3)).resolve(inF.getFileName)
}
if (!Files.exists(tPath.getParent))
Files.createDirectories(tPath.getParent, LocalEnv.directoryPermissionsAttributes)
Files.copy(inF, tPath, StandardCopyOption.REPLACE_EXISTING)
tPath
case None =>
inF
}
val targetF = FileH5.open(targetPath, write = true)
try {
for (repl <- replacements)
H5Rename.renameStr(targetF, repl)
} catch {
case NonFatal(e) =>
logger.error(s"Failure while doing replacements in file at $targetPath", e)
}
targetF.release()
}
}
}
......@@ -1650,6 +1650,17 @@ lazy val archiveTool = (project in file("archive/archive-tool")).
enablePlugins(BuildInfoPlugin).
settings(gitVersionSettings: _*)
lazy val h5Tool = (project in file("archive/h5-tool")).
dependsOn(core).
settings(commonSettings: _*).
settings(
name := "h5Tool",
mainClass in assembly := Some("eu.nomad_lab.h5.H5Tool")
).
settings(Revolver.settings: _*).
enablePlugins(BuildInfoPlugin).
settings(gitVersionSettings: _*)
lazy val archiveWebservice = (project in file("archive/archive-webservice")).
dependsOn(core).
dependsOn(webserviceBase).
......@@ -1856,4 +1867,4 @@ lazy val root = (project in file(".")).
core, dbSupport, parsingStatsDb, rawDataDb, repoDb,
fhiAims, abinit, castep, onetep, cp2k, cpmd, nwchem, bigdft, dlPoly, libAtoms, dmol3, exciting, gaussian, gpaw, lammps, amber, gromacs, gromos, namd, charmm, tinker, mopac, octopus, orca, qbox, quantumEspresso, turbomole, vasp, wien2k, elk, fleur, dftbPlus, asap, atk, gamess, gulp, fplo, molcas, phonopy, atomicData, qhp, elastic,
stats, fhiAimsBasis, symmetry, systemType, springer, repoTags, bandStructureNormalizer,
base, normalize, webservice, tool, calculationparser, normalizerWorker, treeparser, kubernetes, integratedpipeline, query, parquet, elasticsearch, repoBase, repoTool, repoDataManager, repoWebservice, archiveTool, archiveWebservice, rawDataInjection, rawDataTool, splitSupport, fileWebservice, metaInfoTool)
base, normalize, webservice, tool, calculationparser, normalizerWorker, treeparser, kubernetes, integratedpipeline, query, parquet, elasticsearch, repoBase, repoTool, repoDataManager, repoWebservice, archiveTool, h5Tool, archiveWebservice, rawDataInjection, rawDataTool, splitSupport, fileWebservice, metaInfoTool)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment