From 5319453ed6eba2e3dc9e0b96dcac94ff344d0205 Mon Sep 17 00:00:00 2001 From: Christian Boulanger <info@bibliograph.org> Date: Thu, 31 Oct 2024 18:43:21 +0100 Subject: [PATCH] Updating HedgeDoc experiment (not tested) --- hedgedoc/hedgedoc_api.py | 3 + hedgedoc/hedgedoc_sync.py | 212 ++++++++++++++++++++++++++++++++ hedgedoc/hedgedoc_sync_local.py | 119 ------------------ hedgedoc/requirements.txt | 3 +- 4 files changed, 217 insertions(+), 120 deletions(-) create mode 100644 hedgedoc/hedgedoc_sync.py delete mode 100644 hedgedoc/hedgedoc_sync_local.py diff --git a/hedgedoc/hedgedoc_api.py b/hedgedoc/hedgedoc_api.py index b3063db..4b0b9d5 100644 --- a/hedgedoc/hedgedoc_api.py +++ b/hedgedoc/hedgedoc_api.py @@ -14,6 +14,9 @@ class HedgedocClient: def __init__(self, base_url) -> None: self.BASE_URL = base_url + + def base_url(self): + return self.BASE_URL def upload_image(self, image_path): """ diff --git a/hedgedoc/hedgedoc_sync.py b/hedgedoc/hedgedoc_sync.py new file mode 100644 index 0000000..2cf0b13 --- /dev/null +++ b/hedgedoc/hedgedoc_sync.py @@ -0,0 +1,212 @@ +from hedgedoc_api import HedgedocClient, find_image_urls +import argparse +import os +from urllib.parse import urlparse +from platformdirs import user_cache_dir +import json +import time +import threading +from datetime import datetime +import keyboard + +def parse_url(url): + parsed_url = urlparse(url) + base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" + doc_id = parsed_url.path.lstrip('/') # remove leading '/' from path + return base_url, doc_id + +# run a function in an interval but stop when the escape key is pressed +stop_thread = False +def call_interval(func, interval): + def wrapper(): + while True: + if stop_thread: + break + time.sleep(interval) + func() + thread = threading.Thread(target=wrapper) + thread.start() + +def set_stop_thread(e): + global stop_thread + stop_thread = True + +keyboard.on_press_key('esc', set_stop_thread) + + +def parse_time(time_string): + if time_string is None: + return None + return datetime.strptime(time_string, '%Y-%m-%dT%H:%M:%S.%fZ') + + +def flip_dict(input_dict): + flipped_dict = {v: k for k, v in input_dict.items()} + return flipped_dict + + +class HedgedocSyncClient(HedgedocClient): + + doc = None + doc_id = None + doc_file = None + meta_file = None + meta_data = None + verbose = None + updatetime = None + + def __init__(self, basel_url, doc_id=None, verbose = False): + if not doc_id: + raise ValueError("A HedgeDoc document id is required") + + self.cache_dir = user_cache_dir('hedgedoc-sync-local') + self.doc_id = doc_id + self.doc_file = os.path.join(self.cache_dir, f'{self.doc_id}.md') + self.verbose = verbose + self.meta_file = os.path.join(self.cache_dir, f'{self.doc_id}.json') + super().__init__(basel_url) + self.init() + + def init(self): + # load or create metadata file + if os.path.isfile(self.meta_file): + with open(self.meta_file,'r', encoding='utf-8') as f: + self.meta_data = json.load(f) + else: + os.makedirs(os.path.dirname(self.meta_file), exist_ok=True) + self.meta_data = { + "images": {} + } + self.save_metadata() + + # cache document content + os.makedirs(os.path.dirname(self.doc_file), exist_ok=True) + if self.doc_id is not None: + markdown = self.get_note_content(self.doc_id) + self.save_doc(markdown) + + def get_image_map(self): + """Returns the reference to a dict containing the mapping of original to copied images""" + return self.meta_data['images'] + + def document_url(self): + return f'{self.base_url()}/{self.doc_id}' + + def last_update_time(self): + note_metadata = self.get_note_metadata(self.doc_id) + updatetime = note_metadata.get('updatetime', None) or note_metadata.get('createtime', None) + return parse_time(updatetime) if updatetime else None + + def copy_images_and_update_urls(self, markdown, image_map = None): + """checks if the image URLs in the markdown have already been copied and uploads the images if not. + Returns a copy of the markdown document with the replaced URLs and a dict mapping the original + image urls to the local copies.""" + + if image_map is None: + image_map = {} + else: + # make sure to have no side effects + image_map = image_map.copy() + + image_urls = find_image_urls(markdown) + total = len(image_urls) + if total > 0: + for index, image_url in enumerate(image_urls): + new_image_url = image_map.get(image_url, None) + if new_image_url is None: + if self.verbose: + print(f' - Uploading {index+1} of {total} images to {self.base_url()}') + new_image_url = self.upload_image_from_url(image_url) + image_map[image_url] = new_image_url + elif self.verbose: + print(f' - Skipping already uploaded image {index+1} of {total} images') + markdown = markdown.replace(image_url, new_image_url) + return markdown, image_map + + + def save_doc(self, markdown=None): + if markdown is not None: + self.doc = markdown + with open(self.doc_file, 'w', encoding='utf-8') as f: + f.write(self.doc) + if self.verbose: + print(f"Updated document for {self.odc_id}") + + def save_metadata(self): + with open(self.meta_file, 'w', encoding='utf-8') as f: + json.dump(self.meta_data, f) + if self.verbose: + print(f'Updated metadata for {self.meta_doc_id}.') + + + def update_from(self, other): + if type(other) is not HedgedocSyncClient: + raise ValueError("First argument must be a HedgedocSyncClient instance") + + if other.doc_id is None: + raise ValueError("Cannot update as no doc_id set on the other client.") + + if self.verbose: + print(f'Updating {self.document_url()} from {other.document_url()} ...') + + # update image mapping from other instance + image_map = { v: k for k, v in other.get_image_map().items() if k.startswith(self.base_url()) } + image_map = image_map.update(self.get_image_map()) + # replace imgae urls from other with those from self + markdown, image_map = self.copy_images_and_update_urls(other.doc, image_map) + self.get_image_map().update(image_map) + + # create note if not exists + if self.doc_id is None: + new_note_url = self.create_note(markdown.encode()) + self.doc_id = os.path.basename(new_note_url) + if self.verbose: + print(f'Created copy at {self.document_url()}') + else: + print(f'Cannot update document because of current limitations of the HedgeDoc API.') + print(f'Open {other.document_url()} and manually copy and paste content to {self.document_url()}.') + + # save the copy and the updated metadata + self.save_doc(markdown) + self.update_metadata() + + +def main(): + parser = argparse.ArgumentParser(description='Sync two markdown documents hosted in different HedgeDoc instances.') + + # Add arguments to the parser + parser.add_argument('command', help='The command to execute: checkout, checkin') + parser.add_argument('source_url', help='URL of the source document.') + parser.add_argument('target_url', help='URL of the target document. If a domain name without a path, a new document will be created.') + #parser.add_argument('-i', '--interval', type=int, default=10, help='The time interval in which the documents should be synchronized, in seconds. Default is 10 seconds.') + parser.add_argument('-v', '--verbose', action='store_true', help='Provide verbose output.') + + # Parse the arguments + args = parser.parse_args() + source_base_url, source_id = parse_url(args.source_url) + target_base_url, target_id = parse_url(args.target_url) + verbose = args.verbose + + # create the clients + source = HedgedocSyncClient(source_base_url, source_id, verbose=verbose) + target = HedgedocSyncClient(target_base_url, target_id, verbose=verbose) + + # handle the commands + if args.command == "checkout": + target.update_from(source) + print(f'Document has been checked out from {source.document_url()} to {target.document_url()}') + elif args.command == "checkin": + source.update_from(target) + print(f'Document has been checked in from {target.document_url()} to {source.document_url()}') + elif args.command == "compare": + s, t = source.last_update_time(), target.last_update_time() + if s > t: + print(f'{source.document_url()} is newer') + elif t > s: + print(f'{target.document_url()} is newer') + else: + # this will never be called + print('Documents are identical') + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/hedgedoc/hedgedoc_sync_local.py b/hedgedoc/hedgedoc_sync_local.py deleted file mode 100644 index d430a3e..0000000 --- a/hedgedoc/hedgedoc_sync_local.py +++ /dev/null @@ -1,119 +0,0 @@ -from hedgedoc_api import HedgedocClient, find_image_urls -import argparse -import os -from urllib.parse import urlparse -from platformdirs import user_cache_dir -import json - -def parse_url(url): - parsed_url = urlparse(url) - base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" - doc_id = parsed_url.path.lstrip('/') # remove leading '/' from path - return base_url, doc_id - - -def run_sync(source_base_url, source_id, sync_base_url, sync_id=None, verbose = False): - if not source_id: - raise ValueError("A HedgeDoc document id is required") - - if verbose: - print(f'Synchronizing document {source_id} from {source_base_url} to {sync_base_url}...') - - client1 = HedgedocClient(source_base_url) - client2 = HedgedocClient(sync_base_url) - - # get content and save it locally - markdown = client1.get_note_content(source_id) - cache_dir = user_cache_dir('hedgedoc-sync-local') - cache_file = os.path.join(cache_dir, f'{source_id}.md') - - os.makedirs(os.path.dirname(cache_file), exist_ok=True) - with open(cache_file, 'w', encoding='utf-8') as f: - f.write(markdown) - if verbose: - print(f"Cached source document at {cache_file}") - - # save metadata - meta_file = os.path.join(cache_dir, f'{source_id}.json') - if os.path.isfile(meta_file): - with open(meta_file,'r', encoding='utf-8') as f: - sync_info = json.load(f) - # check if sync_ids match - cached_sync_id = sync_info.get('sync_id', None) - if sync_id and cached_sync_id and sync_id != cached_sync_id: - raise ValueError("Mismatch of target document id and cached document id.") - if not sync_id and cached_sync_id: - sync_id = cached_sync_id - else: - sync_info = { - 'images': {} - } - - # copy images - image_urls = find_image_urls(markdown) - total = len(image_urls) - if total > 0: - for index, image_url in enumerate(image_urls): - new_image_url = sync_info['images'].get(image_url, None) - if new_image_url is None: - if verbose: - print(f' - Uploading {index+1} of {total} images') - new_image_url = client2.upload_image_from_url(image_url) - sync_info['images'][image_url] = new_image_url - elif verbose: - print(f' - Skipping already uploaded image {index+1} of {total} images') - markdown = markdown.replace(image_url, new_image_url) - - # save metadata - os.makedirs(os.path.dirname(meta_file), exist_ok=True) - with open(meta_file, 'w', encoding='utf-8') as f: - json.dump(sync_info, f) - if verbose: - print(f'Created metadata file at {meta_file}.') - - # uplaod note with new image urls if not exists - if sync_id is None or sync_id == '': - try: - new_note_url = client2.create_note(markdown) - sync_id = os.path.basename(new_note_url) - except Exception as e: - print(e) - failed_upload_file = os.path.join(cache_dir, 'failed_upload_file.md') - with open(failed_upload_file, 'w', encoding='utf-8') as f: - f.write(markdown) - print(failed_upload_file) - exit(1) - # save changed sync_id - sync_info['sync_id'] = sync_id - with open(meta_file, 'w', encoding='utf-8') as f: - json.dump(sync_info, f) - if verbose: - print(f'Creating synchronized copy at {sync_base_url} with id {sync_id}') - elif verbose: - print(f'Synchronized copy at {sync_base_url} exists with id {sync_id}') - - # save copy with the local image paths - sync_file = os.path.join(cache_dir, sync_id + '.md') - with open(sync_file, 'w', encoding='utf-8') as f: - f.write(markdown) - -def main(): - parser = argparse.ArgumentParser(description='Sync two markdown documents hosted in different HedgeDoc instances.') - - # Add arguments to the parser - parser.add_argument('source_url', help='URL of the source document.') - parser.add_argument('sync_url', help='URL of the synchronized document. If a domain name without a path, a new document will be created.') - parser.add_argument('-i', '--interval', type=int, default=10, help='The time interval in which the documents should be synchronized, in seconds. Default is 10 seconds.') - parser.add_argument('-v', '--verbose', action='store_true', help='Provide verbose output.') - - # Parse the arguments - args = parser.parse_args() - source_base_url, source_id = parse_url(args.source_url) - sync_base_url, sync_id = parse_url(args.sync_url) - verbose = args.verbose - - # run sync - run_sync(source_base_url, source_id, sync_base_url, sync_id, verbose) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/hedgedoc/requirements.txt b/hedgedoc/requirements.txt index 57f9ad3..4fce5a8 100644 --- a/hedgedoc/requirements.txt +++ b/hedgedoc/requirements.txt @@ -1 +1,2 @@ -platformdirs \ No newline at end of file +platformdirs +keyboard \ No newline at end of file -- GitLab