Commit 86f6389f authored by Mohamed, Fawzi Roberto (fawzi)'s avatar Mohamed, Fawzi Roberto (fawzi)
Browse files

indexing improvements

parent a5a98719
Pipeline #37034 failed with stages
in 47 minutes and 37 seconds
#!/bin/bash
cmd="java -Djava.library.path=/lib -jar /app/archiveTool.jar"
baseName=${baseName:-toIndex-}
globPattern=${globPattern:-??}
indexName=
if [ -n "$indexName" ] ; then
indexFlags="--indexName=$indexName"
fi
if [ -e /config.sh ] ; then
source /config.sh
fi
cmdArgs=${cmdArgs:-indexArchives --source-paths-file=${!f}}
cd /work
for f in /work/$baseName$globPattern ; do
(
if [[ -e "$f" ]] ; then
command=$(eval echo -e $cmd $cmdArgs)
$cmd $cmdArgs >& "out-$(basename $f).txt"
fi
) &
done
wait
......@@ -50,6 +50,7 @@ object ArchiveTool extends StrictLogging {
case "createIndex" => createEmptyIndex(list)
case "indexArchives" => indexArchives(list)
case "indexFolders" => indexFolders(list)
case "aliasRemap" => aliasRemap(list)
case command =>
println(s"invalid command $command")
println(usage)
......@@ -65,6 +66,7 @@ object ArchiveTool extends StrictLogging {
| createIndex create a new index but parse no archives
| indexArchives parse a set of archives and add their data to an existing index
| indexFolders parse a set of folders recursively and add their data to an existing index
| aliasRemap remaps the index to the current one
|
| Run archiveTool <command> [--help] to view information about the
| options for each command.""".stripMargin
......@@ -88,6 +90,7 @@ object ArchiveTool extends StrictLogging {
| the current date "archiveYYYY_MM_DD".""".stripMargin
private val usage_index = """archiveTool indexArchives
| [--indexName=<name of new index>]
| [--archive-gids-file=<file with archive gids>]
| archiveGid1 [archiveGid2 [...]]
|
| Parse the given normalized archives and add their data to an
......@@ -100,6 +103,13 @@ object ArchiveTool extends StrictLogging {
| Parse the given folders recursively and add the data from archives in
| them to an existing Nomad Archive index. If no index name is given,
| it defaults to a name with the current date "archiveYYYY_MM_DD".""".stripMargin
private val usage_aliasRemap = """archiveTool aliasRemap
| [--force]
| [--indexName=<name of new index>]
|
| Remaps the alias specified in the config parameter
| nomad_lab.elastic.indexNameData to the given index
| (it defaults to a name with the current date "archiveYYYY_MM_DD).""".stripMargin
def buildCompleteIndex(args: List[String]): Unit = {
var list: List[String] = args
......@@ -173,6 +183,7 @@ object ArchiveTool extends StrictLogging {
val today = ZonedDateTime.now(java.time.ZoneId.systemDefault()).toLocalDate
var indexNameData = "archive" + today
var archives = Seq[String]()
val archiveGidsFileRe = "^--archive-gids-file=(.)$".r
while (list.nonEmpty) {
val arg = list.head
list = list.tail
......@@ -181,6 +192,9 @@ object ArchiveTool extends StrictLogging {
println(usage_index)
return
case indexReData(name) => indexNameData = name
case archiveGidsFileRe(file) =>
val s = scala.io.Source.fromFile(file)
archives ++= s.getLines()
case _ => archives = arg :: list; list = List()
}
}
......@@ -232,6 +246,39 @@ object ArchiveTool extends StrictLogging {
}
}
def aliasRemap(args: List[String]): Unit = {
var list: List[String] = args
var force: Boolean = false
val today = ZonedDateTime.now(java.time.ZoneId.systemDefault()).toLocalDate
var indexNameData = "archive" + today
while (list.nonEmpty) {
val arg = list.head
list = list.tail
arg match {
case "--help" | "-h" =>
println(usage_aliasRemap)
return
case "--force" => force = true
case indexReData(name) => indexNameData = name
case _ =>
println(s"Error: unexpected argument $arg. $usage_complete")
return
}
}
val es = try {
ESManager(KnownMetaInfoEnvs.archive)
} catch {
case NonFatal(e) =>
logger.error(s"Elastic search connection failed! " + e)
return
}
try {
remapAliases(es, indexNameData, force = force)
} finally {
es.stop()
}
}
def createNewIndex(es: ESManager, indexName: String)(implicit metaInfo: MetaInfoEnv): Unit = {
val mapping = IndexManifest.getESMapping(es)
val creation = es.client.execute(createIndex(indexName).mappings(mapping))
......@@ -259,7 +306,7 @@ object ArchiveTool extends StrictLogging {
dirs.foreach(processFolder(es, archiveSet, _, indexName))
}
def remapAliases(es: ESManager, indexNameData: String): Unit = {
def remapAliases(es: ESManager, indexNameData: String, force: Boolean = false): Unit = {
val aliasResult = es.client.execute(catAliases()).await()
val liveIndexData = ESManager.defaultSettings(KnownMetaInfoEnvs.archive).indexNameData
logger.info(s"Data Alias to be used: $liveIndexData")
......@@ -273,7 +320,7 @@ object ArchiveTool extends StrictLogging {
val newCount = es.client.execute(catCount(indexNameData)).await.count
logger.info(s"Old Data index: ${rawIndexNameData.get} with $oldCount entries")
logger.info(s"New Data index: $indexNameData with $newCount entries")
if (oldCount > newCount) {
if (oldCount > newCount && !force) {
logger.error(s"New data index contains less entries than the current one!")
return
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment