Skip to content
Snippets Groups Projects
Commit abdc73b3 authored by Christian Boulanger's avatar Christian Boulanger
Browse files

Add pandoc experiment

parent 77b3f73f
Branches
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
from md_to_docx import MarkdownToWordConverter
converter = MarkdownToWordConverter(verbose=True)
converter.run(r'H:\Downloads\test.md', r'H:\Downloads\test.docx')
```
%% Output
Checking if Pandoc is installed...
Pandoc found at: C:\Users\boulanger\AppData\Local\Pandoc\pandoc.exe
Converting H:\Downloads\test.md to Word document H:\Downloads\test.docx...
Conversion completed successfully! Output saved as H:\Downloads\test.docx.
import os
import subprocess
import requests
import platform
import argparse
import sys
class MarkdownToWordConverter:
def __init__(self, verbose=False):
self.verbose = verbose
def log(self, message):
"""Prints a message only if verbose is True."""
if self.verbose:
print(message)
def download_markdown_file(self, url, output_path):
"""Downloads a Markdown file from a URL."""
self.log(f"Downloading Markdown file from {url}...")
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
with open(output_path, 'w', encoding='utf-8') as file:
file.write(response.text)
self.log(f"Markdown file saved as {output_path}.")
except requests.exceptions.RequestException as e:
raise Exception(f"Failed to download the Markdown file: {e}")
def check_pandoc_installed(self):
"""Checks if Pandoc is installed."""
self.log("Checking if Pandoc is installed...")
command = "where" if platform.system() == "Windows" else "which"
try:
result = subprocess.run([command, "pandoc"], capture_output=True, text=True, check=True)
self.log(f"Pandoc found at: {result.stdout.strip()}")
except subprocess.CalledProcessError:
raise Exception("Pandoc is not installed or not in the system PATH.")
def convert_markdown_to_word(self, input_path, output_path):
"""Converts a Markdown file to a Word document using Pandoc."""
self.log(f"Converting {input_path} to Word document {output_path}...")
try:
subprocess.run(["pandoc", input_path, "-o", output_path], check=True, shell=(platform.system() == "Windows"))
self.log(f"Conversion completed successfully! Output saved as {output_path}.")
except subprocess.CalledProcessError as e:
raise Exception(f"Conversion failed: {e}")
def run(self, source, output_docx):
"""Handles the conversion process."""
temp_md = "temp_downloaded_file.md"
input_path = source
try:
# Handle remote URL or local file
if source.startswith(("http://", "https://")):
input_path = temp_md
self.download_markdown_file(source, input_path)
elif not os.path.isfile(input_path):
raise Exception(f"Local file '{source}' does not exist.")
# Check if Pandoc is installed
self.check_pandoc_installed()
# Convert to Word document
self.convert_markdown_to_word(input_path, output_docx)
finally:
# Clean up temporary file if used
if input_path == temp_md and os.path.isfile(input_path):
os.remove(input_path)
self.log(f"Temporary file {input_path} removed.")
def main():
parser = argparse.ArgumentParser(
description="Convert a Markdown file (local or remote) to a Word document using Pandoc."
)
parser.add_argument(
"source",
help="URL or path to the local Markdown file."
)
parser.add_argument(
"output_docx",
help="Path to save the Word document (no default; must be specified)."
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable verbose output (default: False)."
)
args = parser.parse_args()
converter = MarkdownToWordConverter(verbose=args.verbose)
try:
converter.run(args.source, args.output_docx)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment