Commit f668c160 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Removed script that are now maintained in nomad-faridi-private.

parent 46da2a27
# Simple python script template for deleting calculations from mongo and elastic
# based on elastic query
import elasticsearch_dsl as es
from nomad import infrastructure, config, processing
infrastructure.setup_logging()
infrastructure.setup_mongo()
infrastructure.setup_elastic()
query = es.Q('match', code_name='Octopus') \
& ~es.Q('exists', field='pid') \
& es.Q('wildcard', mainfile='*/inp') \
& es.Q('match', published=True)
search = es.Search(index=config.elastic.index_name).query(query)
calc_ids = [hit.calc_id for hit in search.scan()]
input('Will delete %d calcs. Press any key to continue ...' % len(calc_ids))
def chunks(l, n):
for i in range(0, len(l), n):
print(i)
yield l[i:i + n]
for chunk in chunks(calc_ids, 1000):
processing.Calc.objects(calc_id__in=chunk).delete()
search.delete()
#!/bin/sh
set -e
echo "log into docker registry..."
docker login gitlab-registry.mpcdf.mpg.de -u $1 -p $2
echo "building images..."
cd dependencies/nomad-lab-base
sbt "project repoTool" docker
sbt "project repoWebservice" docker
echo "pushing images..."
docker push gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/coe-repotool
docker push gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/coe-repowebservice
\ No newline at end of file
#!/bin/sh
set -e
coe_config_args="-e REPO_DB_JDBC_URL=jdbc:postgresql://postgres:5432/nomad -e REPO_ELASTIC_URL=elasticsearch://elastic:9200 --network nomad_default"
nomad="nomad -p 8000 -h localhost"
repo_tool="docker run $coe_config_args gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-fair/coe-repotool"
echo "reset nomad"
$nomad reset
curl -XDELETE localhost:9200/repo_index
curl -XDELETE localhost:9200/repo_topics
echo "import example calculations"
$nomad upload --unstage tests/data/proc/examples_vasp.zip
echo "create a new index with coe repoTool"
$repo_tool newIndex --indexName=repo_index --indexNameTopics=repo_topics
# try to search for new calculations
curl http://localhost:8111/repo/search/calculation_groups_oldformat?query=repository_program_name%3DVASP
\ No newline at end of file
UPDATE pg_database SET datallowconn = 'false' WHERE datname = 'fairdi_nomad_migration';
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = 'fairdi_nomad_migration';
DROP DATABASE fairdi_nomad_migration;
import json
from nomad import infrastructure
from nomad import processing
infrastructure.setup_logging()
calcs = infrastructure.setup_mongo().fairdi_nomad_migration.calc
uploads = infrastructure.setup_mongo().fairdi_nomad_migration.upload
packages = infrastructure.setup_mongo().coe_migration.package
def retrieve_remote_data():
count = 0
pid_dict = {}
for calc in calcs.find({'metadata.pid': {'$exists': True}}, {'metadata.pid': 1, 'upload_id': 1}):
pid = calc['metadata']['pid']
upload = calc['upload_id']
pid_calcs = pid_dict.get(pid)
if pid_calcs is None:
pid_calcs = []
pid_dict[pid] = pid_calcs
if upload != 'j-IVBd3MQWWvTJ6pg5oaSw':
pid_calcs.append(upload)
count += 1
if count % 100000 == 0:
print(count)
with open('pid_dict.json', 'wt') as f:
json.dump(pid_dict, f)
return pid_dict
def load_local_data():
with open('pid_dict.json', 'rt') as f:
return json.load(f)
try:
pid_dict = load_local_data()
except Exception:
pid_dict = retrieve_remote_data()
print('data available ...')
def remove_upload(upload):
for uploads in pid_dict.values():
if upload in uploads:
uploads.remove(upload)
def calc_dups():
upload_dict = {}
for _, uploads in pid_dict.items():
uploads = list(set(uploads))
for upload in uploads:
dup, single = upload_dict.get(upload, (0, 0))
if len(uploads) >= 2:
dup += 1
else:
single += 1
upload_dict[upload] = (dup, single)
return upload_dict
more = False
while True:
upload_dict = calc_dups()
for upload, (dup, single) in upload_dict.items():
if single == 0:
print('full: ' + upload)
remove_upload(upload)
more = True
break
if not more:
for upload, (dup, single) in upload_dict.items():
if dup > 0:
package_id = uploads.find_one({'_id': upload})['name']
pkg = packages.find_one({'_id': package_id})
source_upload_id = pkg['upload_id']
pkg_path = pkg['package_path']
print('%s, %s, %s, %s (%d vs %d)' % (source_upload_id, package_id, pkg_path, upload, dup, single))
break
import multiprocessing as mp
import sys
import os
import os.path
import stat
if __name__ == '__main__':
def fix_permissions(path):
has_problems = False
for item in os.listdir(path):
filepath = os.path.join(path, item)
if os.path.islink(filepath):
continue
stats = os.stat(filepath)
has_problems |= not bool(stats.st_mode & stat.S_IROTH)
if stats.st_mode & stat.S_IFDIR:
has_problems |= not bool(stats.st_mode & stat.S_IXOTH)
if has_problems:
print('fixing problems for %s' % path)
os.system('find %s -type d -exec chmod +x {} \\;' % path)
os.system('find %s -exec chmod +r {} \\;' % path)
if len(sys.argv) == 2:
path = sys.argv[1]
paths = [os.path.join(path, item) for item in os.listdir(path)]
elif len(sys.argv) == 1:
print('no path given')
else:
paths = sys.argv[1:]
with mp.Pool(5) as p:
p.map(fix_permissions, paths)
import pymongo
import zipfile
import sys
from nomad import parsing
client = pymongo.MongoClient()
packages = client['coe_migration']['package']
def check(upload_id, mainfile):
content = None
for package in packages.find(dict(upload_id=upload_id)):
package_path = package['package_path']
with zipfile.ZipFile(package_path, 'r') as zf:
try:
with zf.open(mainfile, 'r') as f:
content = f.read(5000)
except KeyError:
pass
if content is None:
print('mainfile does not exist')
sys.exit(1)
match = None
for parser in parsing.parsers:
if parser.is_mainfile(mainfile, 'text/plain', content, None):
match = parser
if match is None:
try:
print(content.decode('utf-8'))
except Exception:
print('not unicode decodable, probably binary file')
return match is not None
import json
with open('local/missing_calcs_data.json') as f:
data = json.load(f)
for cause in data['others'] + data['no_calcs']:
if 'investigated_cause' not in cause and 'phonopy' not in cause['example_mainfile'] and not check(cause['source_upload_id'], cause['example_mainfile']):
input(cause)
import re
import unidecode
from nomad import infrastructure
infrastructure.setup_logging()
existing = set()
for user in infrastructure.keycloak.search_user(max=2000):
if not re.match(r'^[a-zA-Z0-9_\-\.]+$', user.username):
# need to replace username
if user.first_name is not None and user.last_name is not None:
user.username = '%s%s' % (user.first_name[:1], user.last_name)
elif user.last_name is not None:
user.username = user.last_name
elif '@' in user.username:
user.username = user.username.split('@')[0]
user.username = unidecode.unidecode(user.username.lower())
user.username = re.sub('[^0-9a-zA-Z_\-\.]+', '', user.username)
index = 1
while user.username in existing:
user.username += '%d' % index
index += 1
existing.add(user.username)
infrastructure.keycloak._admin_client.update_user(
user_id=user.user_id, payload=dict(username=user.username))
print(user.username)
#!/bin/bash
kubectl create secret docker-registry gitlab-mpcdf \
--docker-server https://gitlab-registry.mpcdf.mpg.de \
--docker-username $1 \
--docker-password $2 \
--docker-email $3
#!/bin/sh
# add yum repo
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kube*
EOF
# delete old
yum erase -y kubelet kubeadm kubectl --disableexcludes=kubernetes
# Set SELinux in permissive mode (effectively disabling it)
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
systemctl enable --now kubelet
# firewall
firewall-cmd --permanent --add-port=6443/tcp
firewall-cmd --permanent --add-port=2379-2380/tcp
firewall-cmd --permanent --add-port=10250/tcp
firewall-cmd --permanent --add-port=10251/tcp
firewall-cmd --permanent --add-port=10252/tcp
firewall-cmd --permanent --add-port=10255/tcp
firewall-cmd --reload
modprobe br_netfilter
# routing
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
# start it up
systemctl daemon-reload
systemctl restart kubelet
echo "Still have to use kubeadm init/join"
echo "Run on master:"
echo "kubeadm token create --print-join-command"
echo "Run output here"
\ No newline at end of file
#!/bin/sh
# add yum repo
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kube*
EOF
# delete old
yum erase -y kubelet kubeadm kubectl --disableexcludes=kubernetes
# Set SELinux in permissive mode (effectively disabling it)
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
systemctl enable --now kubelet
# firewall
firewall-cmd --permanent --add-port=6443/tcp
firewall-cmd --permanent --add-port=2379-2380/tcp
firewall-cmd --permanent --add-port=10250/tcp
firewall-cmd --permanent --add-port=10251/tcp
firewall-cmd --permanent --add-port=10252/tcp
firewall-cmd --permanent --add-port=10255/tcp
firewall-cmd --permanent --add-port=8285/udp
firewall-cmd --permanent --add-port=8472/udp
firewall-cmd --reload
modprobe br_netfilter
# routing
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
# start it up
systemctl daemon-reload
systemctl restart kubelet
# init master node with flannel
kubeadm init --pod-network-cidr=10.244.0.0/16
export KUBECONFIG=/etc/kubernetes/admin.conf
sysctl net.bridge.bridge-nf-call-iptables=1
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/a70459be0084506e4ec919aa1c114638878db11b/Documentation/kube-flannel.yml
# allow to schedule nodes on master
kubectl taint nodes --all node-role.kubernetes.io/master-
echo "Run on master node to create join command:"
echo "kubeadm token create --print-join-command"
\ No newline at end of file
# increases the memory limits for flannel ds which causes alot of oom kills and
# further problems down the line
kubectl patch ds -n=kube-system kube-flannel-ds-amd64 -p '{"spec": {"template":{"spec":{"containers": [{"name":"kube-flannel", "resources": {"limits": {"cpu": "250m","memory": "550Mi"},"requests": {"cpu": "100m","memory": "100Mi"}}}]}}}}'
\ No newline at end of file
kubeadm reset
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
ipvsadm --clear
systemctl stop kubelet
systemctl stop docker
rm -rf /var/lib/cni/
rm -rf /var/lib/kubelet/*
rm -rf /etc/cni/
ifconfig cni0 down
ifconfig flannel.1 down
ifconfig docker0 down
ip link delete cni0
ip link delete flannel.1
systemctl start docker
\ No newline at end of file
[
"/nomad/repository/data/extracted/ftp_upload_for_uid_125",
"/nomad/repository/data/extracted/ftp_upload_for_uid_125_at_2018_03_11_19_24_00",
"/nomad/smartyyy/download_from_aflowlib_server/wget_download_aflowlib/ftp_upload_for_uid_125_at_2018_03_11_19_24_00"
]
from nomad import infrastructure
from nomad import processing
from nomad import search
infrastructure.setup_logging()
calcs = infrastructure.setup_mongo().fairdi_nomad_migration.calc
index = infrastructure.setup_mongo().coe_migration.source_calc
uploads_col = infrastructure.setup_mongo().fairdi_nomad_migration.upload
infrastructure.setup_elastic()
def check_and_fix(upload):
example = calcs.find_one({'upload_id': upload, 'metadata.pid': {'$exists': True}})
if example is None:
# can happen on multi package uploads
return
pid = example['metadata']['pid']
truth = index.find_one({'_id': pid})
if truth['metadata']['with_embargo'] != example['metadata']['with_embargo']:
u = uploads_col.find_one({'_id': upload})
print('need to fix from user %d, %s package id %s' % (example['metadata']['uploader']['id'], upload, u['name']))
for upload in calcs.distinct('upload_id'):
check_and_fix(upload)
from typing import Dict
import zipfile
import tarfile
import os.path
import sys
from nomad import config
names: Dict[str, bool] = dict()
def add_upload(tf, upload):
files = [
os.path.join(config.fs.public, upload[0:2], upload, base)
for base in ['raw-public.plain.zip', 'raw-restricted.plain.zip']]
for f in files:
with zipfile.ZipFile(f) as zf:
for zipinfo in zf.infolist():
name = zipinfo.filename
if name not in names:
names[name] = True
with zf.open(zipinfo) as bf:
tarinfo = tarfile.TarInfo(name)
tarinfo.size = zipinfo.file_size
tf.addfile(tarinfo, bf)
target = sys.argv[1]
with tarfile.TarFile(target, 'x') as tf:
for upload in sys.argv[2:]:
print('adding upload %s' % upload)
add_upload(tf, upload)
from nomad import infrastructure
from dateutil.parser import parse
import datetime
infrastructure.setup_logging()
mongo = infrastructure.setup_mongo()
calcs = mongo.fairdi_nomad_prod_v0_7.calc
datasets = mongo.fairdi_nomad_prod_v0_7.dataset
def doit(dataset_id):
example = calcs.find_one({'metadata.datasets': dataset_id})
if example is None:
print('no example for %s' % dataset_id)
return
user_id = example['metadata']['uploader']
if 'upload_time' not in example['metadata']:
print('no upload time in %s' % dataset_id)
upload_time = datetime.datetime.now()
else:
upload_time = example['metadata']['upload_time']
update = [
{'_id': dataset_id},
{'$set': {
'user_id': str(user_id),
'created': parse(str(upload_time))
}}
]
datasets.update_one(*update)
for item in datasets.distinct('_id'):
if 'user_id' not in item and 'created' not in item:
doit(item)
from nomad import infrastructure
from dateutil.parser import parse
infrastructure.setup_logging()
mongo = infrastructure.setup_mongo()
calcs = mongo.fairdi_nomad_migration.calc
uploads = mongo.fairdi_nomad_migration.upload
def doit(upload):
example = calcs.find_one({'upload_id': upload})
user_id = example['metadata']['uploader']['id']
if 'upload_time' not in example['metadata']:
print('no upload time in %s' % upload)
return
upload_time = example['metadata']['upload_time']
uploads.update_one(
{'_id': upload},
{'$set': {
'user_id': str(user_id),
'upload_time': parse(str(upload_time))
}})
for upload in uploads.distinct('_id'):
doit(upload)
from nomad import doi, infrastructure, utils
from nomad.datamodel import Dataset
if __name__ == '__main__':
infrastructure.setup_logging()
infrastructure.setup_mongo()
for dataset in Dataset.m_def.m_x('me').objects(doi__exists=True):
try:
doi.edit_url(doi=dataset.doi)
except Exception as e:
utils.get_logger('__name__').error('could not rewrite doi', exc_info=e)
else:
print('Rewrote URL of %s' % dataset.doi)
###
# Disable threshold based trigger for turning elastic indices read only due to low disc space
PUT http://localhost:9200/_cluster/settings HTTP/1.1
content-type: application/json
{
"transient" : {
"cluster.routing.allocation.disk.threshold_enabled" : false
}
}
###
# Make calcs index writeable after accedental getting read only
PUT http://localhost:9200/calcs/_settings HTTP/1.1