impl: Generate on top of original txt file

2024-02-15 21:50:56 +01:00 · 2024-02-15 21:50:56 +01:00 · 654c5d0d14
parent bfce84868e
commit 654c5d0d14
3 changed files with 102 additions and 22 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,3 +9,9 @@ build
 htmlcov
 __pycache__/
 config.toml
+
+*.txt
+*.ttc
+*.ttf
+envdata
+venv/
--- a/orig.py
+++ b/orig.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+import config
+import parser
+from unidecode import unidecode
+
+#with open('enctest.txt', 'w', encoding='shift_jisx0213') as fileout:
+
+japanese_ranges = [
+    (0x4E00, 0x9FFF),  # Kanji
+    (0x3040, 0x309F),  # Hiragana
+    (0x30A0, 0x30FF),  # Katakana
+    (0xFF00, 0xFFEF),  # Full-width Roman characters and symbols
+    (0x3000, 0x303F),  # CJK symbols and punctuation (including 「」)
+]
+
+translation = parser.main()
+outfile = open('out.txt', 'w', encoding='shift_jisx0213')
+
+with open(config.get('original_path'), 'r', encoding='shift_jisx0213') as file:
+    for line in file:
+        found = False
+        for start, end in japanese_ranges:
+            if start <= ord(line[0]) <= end:
+                found = True
+
+        if found and len(translation) > 0:
+            #outfile.write(unidecode(translation.pop(0)[1]).replace("\\","¥"))
+            amount = line.count("@") + line.count("¥")
+
+            outfile.write('`')
+
+            for _ in range(amount):
+                outfile.write(unidecode(translation.pop(0)[1]).replace("\\","¥"))
+
+            if amount > 0:
+                outfile.write('\n')
+        else:
+            outfile.write(line)
+
+
+outfile.close()
--- a/parser.py
+++ b/parser.py
@ -18,8 +18,8 @@ def get_functions_from_file(filepath: str):
        insideComment = False
        insideToken = False
        currentWord = ""
-        currentTokenName = ""
-        currentTokenContent = ""
+        currentToken = []
+        currentTokenParameter = ""

        # TODO: Prettify this
        while True:
@ -35,17 +35,27 @@ def get_functions_from_file(filepath: str):
                currentWord += char

            if insideToken and char not in "\n\r":
-                currentTokenContent += char
+                currentTokenParameter += char

            # Skip comments
            if insideComment:
                if char == "\n":
-                    print("Skipped comment....")
+                    #print("Skipped comment....")
                    insideComment = False

                continue

-            if char == '"':
+            # Keep track of whether or not we're inside quotes
+            # Also handle the case where the quotation marks are
+            # escaped. We do this by seeing if in the current word
+            # there's a backtick in the second to last position.
+            if (
+                char == '"'
+                and (
+                    len(currentWord) <= 1
+                    or currentWord[-2] != "\\"
+                )
+            ):
                insideQuotes = not insideQuotes

            if not insideQuotes:
@ -53,20 +63,24 @@ def get_functions_from_file(filepath: str):
                    insideComment = True

                if char == "(":
-                    currentTokenName = currentWord
+                    # Write the function name
+                    currentToken.append(currentWord[:-1])
                    insideToken = True

+                if char == ",":
+                    currentToken.append(currentTokenParameter[:-1].strip())
+                    currentTokenParameter = ""
+
+
                if insideToken and char == ")":
                    insideToken = False

+                    currentToken.append(currentTokenParameter[:-1].strip())
+                    currentTokenParameter = ""
+
                    # We have the whole function call, save the result
-                    tokens.append([
-                            # Using [1:] and [:-1] to remove the parenthesis
-                            currentTokenName[:-1].strip(),
-                            currentTokenContent[:-1].strip(),
-                    ])
-                    currentTokenName = ""
-                    currentTokenContent = ""
+                    tokens.append(currentToken)
+                    currentToken = []

    return tokens

@ -74,22 +88,40 @@ def main():
    scripts_path = config.get("scripts_path")

    tokens = get_functions_from_file(
-        os.path.join(scripts_path,"onik_005.txt")
+        os.path.join(scripts_path,"onik_000.txt")
    )

    structure = []

    for token in tokens:
-        #if not token[0].startswith("Output"):
-        if not token[0] == "OutputLineAll":
-            continue
+        if token[0] == "OutputLine":
+            dialogue = token[4][1:-1].replace('\\', '')

-        if "Line_ContinueAfterTyping" in token[1] and "\\n" in token[1]:
-            count = token[1].count("\\n")
-            print(token[1], count)
-            structure.append(["LineBreak", count])
+            if token[-1] == "Line_Normal":
+                dialogue += "\\"
+            elif token[-1] == "Line_WaitForInput":
+                dialogue += "@"
+            elif token[-1] == "Line_ContinueAfterTyping":
+                pass
+            else:
+                raise Exception("Unhandled output termination")

-    print(structure)
+            structure.append(["OutputLine", dialogue])
+
+        elif (
+            token[0] == "OutputLineAll"
+            and "Line_ContinueAfterTyping" == token[-1]
+            and "\\n" in token[2]
+        ):
+            count = token[2].count("\\n")
+            #structure.append(["LineBreak", count])
+
+    #for coso in structure:
+    #    if coso[0] == "OutputLine":
+    #        print(coso[1], end="")
+    #    elif coso[0] == "LineBreak":
+    #        print(":".join(["br"] * coso[1]))
+    return structure

 if __name__  == "__main__":
    main()