diff --git a/__main__.py b/__main__.py index 2bf9c25de1494530cc842d73e8cbda5d12ebd7c8..50b3fea0a5bb5c4e989188a0270a4f29b00180f1 100644 --- a/__main__.py +++ b/__main__.py @@ -2,8 +2,9 @@ from neomodel import config # type: ignore # pylint: disable=import-error,missin from pathlib import Path import yaml from to_db import post -from general_parser import trace_files +from general_parser import Triple, trace_files from maple_parser import handler_maple +from libxc_c_parser import handler_libxc_c def get_neo4j_settings() -> tuple[str, str, str, str]: @@ -25,16 +26,27 @@ def get_libxc_settings() -> tuple[str, str]: return config["libxc"]["path"], config["libxc"]["maple"] +def handler_composite(file_path: str) -> set[Triple]: + '''Handle the extraction and processing of information from a file.''' + if file_path.endswith(".mpl"): + return handler_maple(file_path) + if file_path.endswith(".c"): + return handler_libxc_c(file_path) + return set() + + def __main__(): # set up Neo4j connection config.DATABASE_URL = "bolt://{}:{}@{}:{}".format(*get_neo4j_settings()) # Parse the Maple files - maple_dir = "{}/{}/".format(*get_libxc_settings()) - maple_files = [str(file).split("/")[-1] for file in Path(maple_dir).rglob("*.mpl")] + maple_dir = "{}/".format(*get_libxc_settings()) + code_files = [str(file).split("/")[-1] for file in Path(maple_dir).rglob("*.mpl")] + code_files += [str(file).split("/")[-1] for file in Path(maple_dir).rglob("*.c")] - for maple_file in maple_files: - triples = trace_files(maple_file, maple_dir, handler_maple, triples=set()) + for code_file in code_files: + print(f"Processing {code_file}...") + triples = trace_files(code_file, maple_dir, handler_composite, triples=set()) for triple in triples: post(triple) diff --git a/libxc_c_parser.py b/libxc_c_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..e58b29c1a7428185814c0bf7b715914e461c8884 --- /dev/null +++ b/libxc_c_parser.py @@ -0,0 +1,64 @@ +import re +from typing import Optional +from general_parser import ( + Triple, + Block, + read_file, + clean_line, + process_lines, + file_path_to_name, +) + + +# matching functions +def is_file_reference(line: str) -> bool: + return line.startswith("#include") + + +def is_description_start(line: str) -> bool: + return line.startswith("const xc_func_info_type") + + +def is_description_end(line: str) -> bool: + return line.endswith("};") + + +def is_function_redefinition_start(line: str) -> bool: + return line.startswith("static void") + + +def is_function_redefinition_end(line: str) -> bool: + return line.endswith("}") + + +# Processing functions +def process_file_reference(file_name: str, lines: Block) -> Optional[Triple]: + match = re.search(r'#include\s+"(.*)"', lines[0]) + if match: + target_file_name = match.group(1) + if target_file_name.startswith("maple2c"): + target_file_name = "/".join( + [ + f + for f in target_file_name.replace("2c", "") + .replace(".c", ".mpl") + .split("/") + if f + ] + ) + return (file_name, target_file_name, "FileReference") + return None + + +# Composed functions +def handler_libxc_c(file_path: str) -> set[Triple]: + """Handle the Libxc C files and return triples""" + contents, file_name = read_file(file_path, clean_line), file_path_to_name(file_path) + triples: set[Triple] = set() + + # extract file references + contents, blocks = process_lines(contents, is_file_reference, None) + for block in blocks: + if triple := process_file_reference(file_name, block): + triples.add(triple) + return triples