From 619136d5e88aa35ddfc058acdc0036bae0579b38 Mon Sep 17 00:00:00 2001
From: cboulanger <info@bibliograph.org>
Date: Thu, 17 Oct 2024 14:57:28 +0200
Subject: [PATCH] improve fix_tail func

---
 convert-anystyle-data/lib/gold_standard.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/convert-anystyle-data/lib/gold_standard.py b/convert-anystyle-data/lib/gold_standard.py
index 7f3695a..6da1175 100644
--- a/convert-anystyle-data/lib/gold_standard.py
+++ b/convert-anystyle-data/lib/gold_standard.py
@@ -105,8 +105,9 @@ def fix_tail(elem: etree._Element, indentation="    "):
     # opening and closing quotes
     tail = re.sub(r'(\p{Pi})(\s*\n\s+)', r'\1', tail)
     tail = re.sub(r'(\s*\n\s+)(\p{Pf})', r'\2', tail)
-    # in tails without line break but with whitespace, replace the last whitespace with a linebreak
-    if '\n' not in tail and ' ' in tail:
+    # in tails without line break but with whitespace, replace normalized whitespace with linebreak
+    if '\n' not in tail and re.match(r'\s', tail) is not None:
+        tail = re.sub(r'\s+', r' ', tail).strip()
         p = tail.split(" ")
         s = '\n' + indentation_level(elem) * indentation
         tail = s.join(p) 
-- 
GitLab