From c4035ca275cdee4a98f345604c20a2ac44e40987 Mon Sep 17 00:00:00 2001
From: Dinga Wonanke <dinga.wonanke@kit.edu>
Date: Thu, 28 Sep 2023 08:46:11 +0000
Subject: [PATCH] Improved the handling of layered systems in
 PorosityNormalizer.

---
 nomad/normalizing/mof_deconstructor.py | 258 +++++++++++++++++++------
 1 file changed, 200 insertions(+), 58 deletions(-)

diff --git a/nomad/normalizing/mof_deconstructor.py b/nomad/normalizing/mof_deconstructor.py
index 8d0cefa6e0..c9c47497e6 100644
--- a/nomad/normalizing/mof_deconstructor.py
+++ b/nomad/normalizing/mof_deconstructor.py
@@ -231,6 +231,13 @@ def dfsutil_graph_method(graph, temp, node, visited):
     return temp
 
 
+def longest_list(lst):
+    '''
+    return longest list in list of list
+    '''
+    return max(lst, key=len)
+
+
 def remove_unbound_guest(ase_atom):
     '''
     A simple script to remove guest from a metal organic framework.
@@ -264,7 +271,6 @@ def remove_unbound_guest(ase_atom):
             pymat_graph = connected_components(coordination_graph)
             if len(pymat_graph) == 1:
                 polymeric_indices.append(i)
-
         if len(polymeric_indices) > 0:
             Graphs = [StructureGraph.with_local_env_strategy(AseAtomsAdaptor.get_structure(
                 ase_atom[fragments[i]]), JmolNN()) for i in polymeric_indices]
@@ -278,9 +284,12 @@ def remove_unbound_guest(ase_atom):
             mof_indices = []
             for frag_indices in temp_indices:
                 mof_indices.extend(fragments[frag_indices])
-            return mof_indices
+            if len(mof_indices) == 0:
+                return longest_list(fragments)
+            else:
+                return mof_indices
         else:
-            return sum(sum(fragments, []))
+            return sum(fragments, [])
 
 
 def connected_components(graph):
@@ -528,6 +537,68 @@ def find_carbonyl_sulphate(ase_atom):
     return sulphate
 
 
+def find_sulfides(ase_atom):
+    '''
+    A simple aglorimth to search for sulfides.
+     S
+     |
+    -C
+     |
+     S
+
+    Parameters:
+    -----------
+    ase_atom: ASE atom
+
+    Returns
+    -------
+    dictionary of key = carbon index and values = sulphur index
+    '''
+    graph, _ = compute_ase_neighbour(ase_atom)
+    sulfides = {}
+    for atoms in ase_atom:
+        if atoms.symbol == 'C':
+            index = atoms.index
+            sulphure_atoms = [i for i in graph[index]
+                              if ase_atom[i].symbol == 'S']
+            if len(sulphure_atoms) == 2:
+                sulphure_to_metal = sum(
+                    [[j for j in graph[i] if ase_atom[j].symbol in transition_metals()] for i in sulphure_atoms], [])
+                if len(sulphure_to_metal) > 0:
+                    sulfides[index] = sulphure_atoms
+    return sulfides
+
+
+def find_phosphate(ase_atom):
+    '''
+    A simple algorithm to search for Carbonyl sulphate found in the system.
+       O
+       |
+      -P-o
+       |
+       O
+    Parameters:
+    -----------
+    ase_atom: ASE atom
+
+    Returns
+    -------
+    dictionary of key = carbon index and values = oxygen index
+    '''
+    graph, _ = compute_ase_neighbour(ase_atom)
+    phosphate = {}
+    for atoms in ase_atom:
+        if atoms.symbol == 'P':
+            index = atoms.index
+            oxygen = [i for i in graph[index] if ase_atom[i].symbol == 'O']
+            if len(oxygen) >= 1:
+                oxy_metal = sum(
+                    [[j for j in graph[i] if ase_atom[j].symbol in transition_metals()] for i in oxygen], [])
+                if len(oxy_metal) > 0:
+                    phosphate[index] = oxygen
+    return phosphate
+
+
 def secondary_building_units(ase_atom):
     """
     1) Search for all carboxylate that are connected to a metal.
@@ -553,8 +624,13 @@ def secondary_building_units(ase_atom):
     bonds_to_break = []
     carboxylates = find_carboxylates(ase_atom)
     all_sulphates = find_carbonyl_sulphate(ase_atom)
+    all_sulfides = find_sulfides(ase_atom)
+    all_phosphates = find_phosphate(ase_atom)
+    seen_phosphorous = []
+    seen_carbon = []
     for atoms in graph:
         if atoms in list(carboxylates.keys()):
+            seen_carbon.append(atoms)
             connected = graph[atoms]
             all_carbon_indices = [
                 i for i in connected if ase_atom[i].symbol == 'C']
@@ -570,15 +646,34 @@ def secondary_building_units(ase_atom):
                 bonds_to_break.append([atoms] + S_indx)
                 atom_pairs_at_breaking_point[atoms] = S_indx[0]
 
-        if ase_atom[atoms].symbol == 'C':
+        if atoms in list(all_sulfides.keys()):
+            seen_carbon.append(atoms)
             connected = graph[atoms]
-            oxygens = [i for i in connected if ase_atom[i].symbol == 'O']
-            if len(oxygens) == 1:
-                oxy_metal = [
-                    i for i in oxygens if ase_atom[i].symbol in transition_metals()]
-                if len(oxy_metal) == 1:
-                    atom_pairs_at_breaking_point[oxygens[0]] = oxy_metal[0]
-                    bonds_to_break.append([oxygens] + oxy_metal)
+            all_carbon_indices = [
+                i for i in connected if ase_atom[i].symbol == 'C']
+            all_nitrogens = [i for i in connected if ase_atom[i].symbol == 'N']
+            # S_indx = [i for i in connected if ase_atom[i].symbol == 'S']
+            if len(all_carbon_indices) == 1:
+                bonds_to_break.append([atoms] + all_carbon_indices)
+                atom_pairs_at_breaking_point[atoms] = all_carbon_indices[0]
+            if len(all_nitrogens) == 1:
+                bonds_to_break.append([atoms] + all_nitrogens)
+                atom_pairs_at_breaking_point[atoms] = all_nitrogens[0]
+            # if len(S_indx) == 1:
+            #     bonds_to_break.append([atoms] + S_indx)
+            #     atom_pairs_at_breaking_point[atoms] = S_indx[0]
+        if ase_atom[atoms].symbol == 'C':
+            if atoms not in seen_carbon:
+                connected = graph[atoms]
+                oxygens = [i for i in connected if ase_atom[i].symbol == 'O']
+                if len(oxygens) == 1:
+                    oxy_metal = [
+                        i for i in oxygens if ase_atom[i].symbol in transition_metals()]
+                    oxy_metal = [
+                        i for i in oxy_metal if i not in porphyrin_checker]
+                    if len(oxy_metal) == 1:
+                        atom_pairs_at_breaking_point[oxygens[0]] = oxy_metal[0]
+                        bonds_to_break.append([oxygens] + oxy_metal)
 
         if atoms in list(all_sulphates.keys()):
             connected = graph[atoms]
@@ -592,6 +687,7 @@ def secondary_building_units(ase_atom):
                 for oxy in oxygen:
                     metal = [i for i in graph[oxy]
                              if ase_atom[i].symbol in transition_metals()]
+                    metal = [i for i in metal if i not in porphyrin_checker]
                     if len(metal) > 0:
                         for met in metal:
                             atom_pairs_at_breaking_point[oxy] = met
@@ -599,11 +695,13 @@ def secondary_building_units(ase_atom):
 
         if ase_atom[atoms].symbol == 'O':
             seen = sum(list(carboxylates.values())
-                       + list(all_sulphates.values()), [])
+                       + list(all_sulphates.values())
+                       + list(all_phosphates.values()), [])
             if atoms not in seen:
                 connected = graph[atoms]
                 metal = [
                     i for i in connected if ase_atom[i].symbol in transition_metals()]
+                metal = [i for i in metal if i not in porphyrin_checker]
                 Nitrogen = [i for i in connected if ase_atom[i].symbol == 'N']
                 carbon = [i for i in connected if ase_atom[i].symbol
                           == 'C' and i not in list(carboxylates.keys())]
@@ -638,13 +736,32 @@ def secondary_building_units(ase_atom):
                     bonds_to_break.append([atoms] + metal)
 
         if ase_atom[atoms].symbol == 'S':
+            seen = sum(list(all_sulfides.values()), [])
+            if atoms not in seen:
+                connected = graph[atoms]
+                metal = [
+                    i for i in connected if ase_atom[i].symbol in transition_metals()]
+                if len(metal) > 0:
+                    for met in metal:
+                        atom_pairs_at_breaking_point[atoms] = met
+                        bonds_to_break.append([atoms, met])
+
+        if atoms in list(all_phosphates.keys()):
+            seen_phosphorous.append(atoms)
             connected = graph[atoms]
-            metal = [
-                i for i in connected if ase_atom[i].symbol in transition_metals()]
-            if len(metal) > 0:
-                for met in metal:
-                    atom_pairs_at_breaking_point[atoms] = met
-                    bonds_to_break.append([atoms, met])
+            all_carbon_indices = [
+                i for i in connected if ase_atom[i].symbol == 'C']
+            all_nitrogens = [i for i in connected if ase_atom[i].symbol == 'N']
+            S_indx = [i for i in connected if ase_atom[i].symbol == 'S']
+            if len(all_carbon_indices) == 1:
+                bonds_to_break.append([atoms] + all_carbon_indices)
+                atom_pairs_at_breaking_point[atoms] = all_carbon_indices[0]
+            if len(all_nitrogens) == 1:
+                bonds_to_break.append([atoms] + all_nitrogens)
+                atom_pairs_at_breaking_point[atoms] = all_nitrogens[0]
+            if len(S_indx) == 1:
+                bonds_to_break.append([atoms] + S_indx)
+                atom_pairs_at_breaking_point[atoms] = S_indx[0]
 
         if ase_atom[atoms].symbol == 'P':
             '''
@@ -653,23 +770,26 @@ def secondary_building_units(ase_atom):
             2) Look for all it's neigbours
             3) Look for neigbours that are not connected to metal or hydrogen.
             '''
-            connected = graph[atoms]
-            # not_connected_to_metal_or_hygrogen = [[i for i in graph[j] if ase_atom[i].symbol not in transition_metals() or ase_atom[i].symbol != 'H'] for j in connected]
+            # seen = list(all_phosphates.keys())
+            if atoms not in seen_phosphorous:
+                connected = graph[atoms]
+                # not_connected_to_metal_or_hygrogen = [[i for i in graph[j] if ase_atom[i].symbol not in transition_metals() or ase_atom[i].symbol != 'H'] for j in connected]
 
-            metal_oxy = [[i for i in graph[j] if ase_atom[i].symbol in transition_metals()]
-                         for j in connected]
+                metal_oxy = [[i for i in graph[j] if ase_atom[i].symbol in transition_metals()]
+                             for j in connected]
 
-            metal = sum(metal_oxy, [])
-            closest_atoms = sum(
-                [[i for i in graph[j] if i != atoms and not ase_atom[i].symbol in transition_metals()] for j in connected], [])
+                metal = sum(metal_oxy, [])
+                metal = [i for i in metal if i not in porphyrin_checker]
+                closest_atoms = sum(
+                    [[i for i in graph[j] if i != atoms and not ase_atom[i].symbol in transition_metals()] for j in connected], [])
 
-            if len(metal) > 0:
-                all_carbon_indices = sum([[i for i in graph[j] if i in connected]
-                                          for j in closest_atoms], [])
-                for frag in all_carbon_indices:
-                    atom_pairs_at_breaking_point[atoms] = frag
-                    atom_pairs_at_breaking_point[frag] = atoms
-                    bonds_to_break.append([atoms, frag])
+                if len(metal) > 0:
+                    all_carbon_indices = sum([[i for i in graph[j] if i in connected]
+                                              for j in closest_atoms], [])
+                    for frag in all_carbon_indices:
+                        atom_pairs_at_breaking_point[atoms] = frag
+                        atom_pairs_at_breaking_point[frag] = atoms
+                        bonds_to_break.append([atoms, frag])
 
         if ase_atom[atoms].symbol == 'B':
             '''
@@ -681,6 +801,7 @@ def secondary_building_units(ase_atom):
                          for j in connected]
 
             metal_connect = sum(metal_oxy, [])
+            metal = [i for i in metal if i not in porphyrin_checker]
             if len(metal_connect) > 0:
                 for frag in metal_connect:
                     atom_pairs_at_breaking_point[frag[0]] = frag[1]
@@ -737,15 +858,18 @@ def ligands_and_metal_clusters(ase_atom):
     bonds_to_break = []
     carboxylates = find_carboxylates(ase_atom)
     all_sulphates = find_carbonyl_sulphate(ase_atom)
-    Seen_oxygen = []
+    all_sulfides = find_sulfides(ase_atom)
+    all_phosphates = find_phosphate(ase_atom)
+    seen_sulphure = sum(list(all_sulfides.values()), [])
+
     for atoms in graph:
         if atoms in list(carboxylates.keys()):
             oxygen = carboxylates[atoms]
             for oxy in oxygen:
                 metal = [i for i in graph[oxy]
                          if ase_atom[i].symbol in transition_metals()]
+                metal = [i for i in metal if i not in porphyrin_checker]
                 if len(metal) > 0:
-                    Seen_oxygen.append(oxy)
                     for met in metal:
                         bonds_to_break.append([atoms] + [met])
                         atom_pairs_at_breaking_point[atoms] = met
@@ -757,36 +881,52 @@ def ligands_and_metal_clusters(ase_atom):
             for oxy in oxygen:
                 metal = [i for i in graph[oxy]
                          if ase_atom[i].symbol in transition_metals()]
+                metal = [i for i in metal if i not in porphyrin_checker]
                 if len(metal) > 0:
                     for met in metal:
                         bonds_to_break.append([oxy] + [met])
                         atom_pairs_at_breaking_point[oxy] = met
 
-        if ase_atom[atoms].symbol == 'N':
+        if atoms in list(all_sulfides.keys()):
+            sulphure = all_sulfides[atoms]
             connected = graph[atoms]
-            metal = [
-                i for i in connected if ase_atom[i].symbol in transition_metals()]
-            if len(metal) > 0 and atoms not in porphyrin_checker:
-                for met in metal:
-                    bonds_to_break.append([atoms, met])
-                    atom_pairs_at_breaking_point[atoms] = met
+            for sulf in sulphure:
+                metal = [i for i in graph[sulf]
+                         if ase_atom[i].symbol in transition_metals()]
+                metal = [i for i in metal if i not in porphyrin_checker]
+                if len(metal) > 0:
+                    for met in metal:
+                        bonds_to_break.append([sulf, met])
+                        atom_pairs_at_breaking_point[sulf] = met
+
+        if ase_atom[atoms].symbol == 'N':
+            if atoms not in porphyrin_checker:
+                connected = graph[atoms]
+                metal = [
+                    i for i in connected if ase_atom[i].symbol in transition_metals()]
+                if len(metal) > 0 and atoms not in porphyrin_checker:
+                    for met in metal:
+                        bonds_to_break.append([atoms, met])
+                        atom_pairs_at_breaking_point[atoms] = met
                 # atom_pairs_at_breaking_point [metal[0]] = atoms
 
         if ase_atom[atoms].symbol == 'S':
-            connected = graph[atoms]
-            metal = [
-                i for i in connected if ase_atom[i].symbol in transition_metals()]
-            if len(metal) > 0:
+            if atoms not in seen_sulphure:
+                connected = graph[atoms]
+                metal = [
+                    i for i in connected if ase_atom[i].symbol in transition_metals()]
+                metal = [i for i in metal if i not in porphyrin_checker]
                 if len(metal) > 0:
                     for met in metal:
                         bonds_to_break.append([atoms] + [met])
                         atom_pairs_at_breaking_point[atoms] = met
 
         if ase_atom[atoms].symbol == 'O':
-            # if not atoms in Seen_oxygen:
+
             connected = graph[atoms]
             metal = [
                 i for i in connected if ase_atom[i].symbol in transition_metals()]
+            metal = [i for i in metal if i not in porphyrin_checker]
             Nitrogen = [i for i in connected if ase_atom[i].symbol == 'N']
             carbon = [i for i in connected if ase_atom[i].symbol == 'C']
             if len(metal) > 0 and len(carbon) == 1:
@@ -811,18 +951,20 @@ def ligands_and_metal_clusters(ase_atom):
             '''
             Find the carbon closest to P, which is not bonded to a metal and cut
             '''
-            connected = [i for i in graph[atoms]
-                         if ase_atom[i].symbol not in transition_metals()]
-            metal_oxy = [[i for i in graph[j] if ase_atom[i].symbol in transition_metals()]
-                         for j in connected]
-
-            metal = sum(metal_oxy, [])
-            closest_atoms = sum([[[i, j] for i in graph[j] if i != atoms and ase_atom[i].symbol in
-                                  transition_metals()]for j in connected], [])
-
-            for frag in closest_atoms:
-                bonds_to_break.append(frag)
-                atom_pairs_at_breaking_point[frag[0]] = frag[1]
+            if atoms not in list(all_phosphates.keys()):
+                connected = [i for i in graph[atoms]
+                             if ase_atom[i].symbol not in transition_metals()]
+                metal_oxy = [[i for i in graph[j] if ase_atom[i].symbol in transition_metals()]
+                             for j in connected]
+
+                metal = sum(metal_oxy, [])
+                metal = [i for i in metal if i not in porphyrin_checker]
+                closest_atoms = sum([[[i, j] for i in graph[j] if i != atoms and ase_atom[i].symbol in
+                                    transition_metals()]for j in connected], [])
+
+                for frag in closest_atoms:
+                    bonds_to_break.append(frag)
+                    atom_pairs_at_breaking_point[frag[0]] = frag[1]
 
     for bonds in bonds_to_break:
         bond_matrix[bonds[0], bonds[1]] = 0
-- 
GitLab