From c44c7ebc13ad0e597c8583f02221b4ef6c111a84 Mon Sep 17 00:00:00 2001
From: dusk <strangealt@protonmail.com>
Date: Thu, 22 Feb 2024 23:55:11 +0100
Subject: [PATCH] Preliminary proper parser support

---
 src/fix.py      |  8 +++++
 src/flow.py     | 10 +++----
 src/orig.py     | 18 ++++++++++-
 src/parser.py   |  2 +-
 src/scripter.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 111 insertions(+), 7 deletions(-)
 create mode 100644 src/scripter.py

diff --git a/src/fix.py b/src/fix.py
index cae8ac7..adcfa5b 100644
--- a/src/fix.py
+++ b/src/fix.py
@@ -8,6 +8,10 @@ def open_onikakushi() -> str:
     outfile = open(outpath, 'w', encoding='shift_jisx0213')
 
     replacements = {
+        # Misc
+        12: 'caption"Onikakushi-hen"',
+        23: 'rmenu "Save",save,"Load",load,"Skip",skip,"Hide UI",windowerase,"Log",lookback,"Exit",reset',
+        # onik_000
         1202: '「ゴ・メ・ン・ナ・サ・イ!sd!w800って言ってみな。@言わないならぁ……！」@',
         1630: '「…圭ちゃんは食らうの初めてだよね。@…今日のはまだ!s100…甘い方…。」!sd',
         1823: '「け!w300……圭ちゃんがしてよ……!s300。おじさんは応援してる……。」!sd@',
@@ -35,6 +39,10 @@ def open_onikakushi() -> str:
         5792: '……くっくっく！!sd　うつけ者めッ！！！@',
         5966:'「ぎゃぎゃ!s250………!s80ぎゃああぁあぁあぁあぁああッ！！！」!sd¥',
         6224:'　とりとめのないおしゃべり。@',
+        # onik_009
+        6645:'quakey 5,200',
+        6646:'………あれ？@　圭一くんと梨花ちゃんは…なんでスタートしないんだろ？　しないんだろ？」@',
+        6647:'　魅音と沙都子は猛ダッシュで会場に散ったが、俺と梨花ちゃんは焦る様子もなく、ただ立ったままだ。¥',
     }
 
     for i, line in enumerate(origfile):
diff --git a/src/flow.py b/src/flow.py
index 081aa49..fc85294 100644
--- a/src/flow.py
+++ b/src/flow.py
@@ -3,11 +3,11 @@
 onik = {
     "gamestart" : [
         'onik_000.txt',
-        'onik_001.txt',
-        'onik_002.txt',
-        'onik_003.txt',
-        'onik_004.txt',
-        'onik_005.txt',
+        # 'onik_001.txt',
+        # 'onik_002.txt',
+        # 'onik_003.txt',
+        # 'onik_004.txt',
+        # 'onik_005.txt',
         # 'onik_009.txt',
         # 'onik_009_02.txt',
         # 'onik_010.txt',
diff --git a/src/orig.py b/src/orig.py
index 1ddd67a..87fa5f7 100755
--- a/src/orig.py
+++ b/src/orig.py
@@ -5,6 +5,7 @@ import sys
 
 from unidecode import unidecode
 
+import scripter
 import config
 import parser
 import flow
@@ -74,7 +75,6 @@ def process_sections():
     outfile.close()
     origfile.close()
 
-
 def get_symbols(line: str) -> (str, list[str]):
     res = []
     start_symbol = ''
@@ -134,13 +134,28 @@ def write_translated(outfile, origfile, translation_file_paths):
         parser.parse_to_csv(transfilepath)
         structure = parser.parse_to_structure(transfilepath)
 
+        testfile = open('test.txt', 'w')
+
         for line in origfile:
+
+            tokens = scripter.parse_line(line)
+            tkns = ''
+            for t in tokens:
+                if len(tkns) > 0:
+                    tkns += ' | '
+                tkns += t.token
+            testfile.write(tkns + '\n')
+
+            continue
+
             global debug_current_line
             debug_current_line += 1
 
             # Check if the current line is a dialogue line or not
 
             if line_should_be_translated(line):
+
+
                 start, symbols = get_symbols(line)
                 print("\n-", debug_current_line, transfilepath, [start], symbols)
 
@@ -183,3 +198,4 @@ def write_translated(outfile, origfile, translation_file_paths):
             else:
                 outfile.write(line)
 
+        testfile.close()
diff --git a/src/parser.py b/src/parser.py
index fb88804..211d3b7 100755
--- a/src/parser.py
+++ b/src/parser.py
@@ -103,7 +103,7 @@ def parse_to_csv(filename: str):
         csv_writer = csv.writer(
             csvfile,
             delimiter=delchar,
-            quoting=csv.QUOTE_ALL,
+            quoting=csv.QUOTE_NONE,
             quotechar=escapechar,
         )
 
diff --git a/src/scripter.py b/src/scripter.py
new file mode 100644
index 0000000..6a1c26a
--- /dev/null
+++ b/src/scripter.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+
+from enum import Enum
+
+
+class TokenType(Enum):
+    # Command tokens represent all non dialogue keywords
+    # If the commands are chained without text inbetween
+    # they will be parsed as a single token
+    COMMAND = 1
+    TEXT = 2
+
+class ScripterToken():
+    def __init__(self, token: str, type: TokenType):
+        self.token = token
+        self.type = type
+
+
+# Parse and tokenize an Nscripter script line in japanese mode
+# The original japanese mode diferentiates between single and
+# double byte characters
+def parse_line(line: str) -> list[ScripterToken]:
+    current_token = ScripterToken('', TokenType.TEXT)
+    token_list = []
+
+    for i, char in enumerate(line):
+        # Comments signify the end of what should be parsed
+        # Newline does not constitute a token, skip
+        if char == ';' or char == '\n':
+            break
+
+        # First character of the token
+        if len(current_token.token) == 0:
+            if is_double_width(char):
+                current_token.type = TokenType.TEXT
+            else:
+                current_token.type = TokenType.COMMAND
+            current_token.token += char
+
+        else:
+            # End of token
+            if current_token.type == TokenType.COMMAND and is_double_width(char):
+                # Add to list and reset the current one
+                token_list.append(current_token)
+                current_token = ScripterToken('', TokenType.TEXT)
+            elif current_token.type == TokenType.TEXT and not is_double_width(char):
+                # Add to list and reset the current one
+                token_list.append(current_token)
+                current_token = ScripterToken('', TokenType.COMMAND)
+
+            current_token.token += char
+
+    # Append last token of the line
+    token_list.append(current_token)
+
+
+    return token_list
+
+
+
+
+def is_double_width(char: str) -> bool:
+
+    japanese_ranges = [
+        (0x4E00, 0x9FFF), # Kanji
+        (0x3040, 0x309F), # Hiragana
+        (0x30A0, 0x30FF), # Katakana
+        (0xFF00, 0xFFEF), # Full-width Roman characters and symbols
+        (0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
+        (0x201c, 0x201d), # The characters “ ”
+        (0x2026, 0x2026), # The character …
+        (9734, 9734), # ☆ (Nscripter treats it like a dw character)
+
+    ]
+
+    for start, end in japanese_ranges:
+        if start <= ord(char[0]) <= end:
+            return True
+
+    return False