Start parsing into tokens, add config.toml

2024-02-14 17:05:07 +01:00 · 2024-02-14 17:05:07 +01:00 · bfce84868e
commit bfce84868e
5 changed files with 128 additions and 0 deletions
--- a/.editorconfig
+++ b/.editorconfig
@ -0,0 +1,13 @@
+# top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+# 4 space indentation
+[*.py]
+indent_style = space
+indent_size = 4
+charset = utf-8
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,11 @@
+.DS_Store
+.idea
+*.log
+tmp/
+
+*.py[cod]
+*.egg
+build
+htmlcov
+__pycache__/
+config.toml
--- a/config.py
+++ b/config.py
@ -0,0 +1,8 @@
+import tomllib
+
+with open("config.toml", "rb") as f:
+    config = tomllib.load(f)
+
+
+def get(q: str):
+    return config.get(q, None)
--- a/config.toml.example
+++ b/config.toml.example
@ -0,0 +1 @@
+scripts_path = "/path/ho/higurashi/sdk/Scripts/"
--- a/parser.py
+++ b/parser.py
@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+import os
+import config
+
+def get_functions_from_file(filepath: str):
+    """
+    Gets the function calls from a file and returns
+    a list of lists with the following structure:
+
+    ["<function_name>", "<argument_1>, <argument2>, ..."]
+    """
+
+    tokens = []
+
+    with open(filepath, 'r') as file:
+        insideQuotes = False
+        insideComment = False
+        insideToken = False
+        currentWord = ""
+        currentTokenName = ""
+        currentTokenContent = ""
+
+        # TODO: Prettify this
+        while True:
+            char = file.read(1)
+            if len(char) != 1:
+                break
+
+            #print(currentWord)
+
+            if char in " \n\t\r":
+                currentWord = ""
+            else:
+                currentWord += char
+
+            if insideToken and char not in "\n\r":
+                currentTokenContent += char
+
+            # Skip comments
+            if insideComment:
+                if char == "\n":
+                    print("Skipped comment....")
+                    insideComment = False
+
+                continue
+
+            if char == '"':
+                insideQuotes = not insideQuotes
+
+            if not insideQuotes:
+                if currentWord == "//":
+                    insideComment = True
+
+                if char == "(":
+                    currentTokenName = currentWord
+                    insideToken = True
+
+                if insideToken and char == ")":
+                    insideToken = False
+
+                    # We have the whole function call, save the result
+                    tokens.append([
+                            # Using [1:] and [:-1] to remove the parenthesis
+                            currentTokenName[:-1].strip(),
+                            currentTokenContent[:-1].strip(),
+                    ])
+                    currentTokenName = ""
+                    currentTokenContent = ""
+
+    return tokens
+
+def main():
+    scripts_path = config.get("scripts_path")
+
+    tokens = get_functions_from_file(
+        os.path.join(scripts_path,"onik_005.txt")
+    )
+
+    structure = []
+
+    for token in tokens:
+        #if not token[0].startswith("Output"):
+        if not token[0] == "OutputLineAll":
+            continue
+
+        if "Line_ContinueAfterTyping" in token[1] and "\\n" in token[1]:
+            count = token[1].count("\\n")
+            print(token[1], count)
+            structure.append(["LineBreak", count])
+
+    print(structure)
+
+if __name__  == "__main__":
+    main()
				`@ -0,0 +1 @@`
				`scripts_path = "/path/ho/higurashi/sdk/Scripts/"`