Preliminary proper parser support

2024-02-22 23:55:11 +01:00 · 2024-02-22 23:55:11 +01:00 · c44c7ebc13
parent f9eda09dea
commit c44c7ebc13
5 changed files with 111 additions and 7 deletions
--- a/src/fix.py
+++ b/src/fix.py
@ -8,6 +8,10 @@ def open_onikakushi() -> str:
    outfile = open(outpath, 'w', encoding='shift_jisx0213')
    replacements = {
        # Misc
        12: 'caption"Onikakushi-hen"',
        23: 'rmenu "Save",save,"Load",load,"Skip",skip,"Hide UI",windowerase,"Log",lookback,"Exit",reset',
        # onik_000
        1202: '「ゴ・メ・ン・ナ・サ・イ!sd!w800って言ってみな。@言わないならぁ……！」@',
        1630: '「…圭ちゃんは食らうの初めてだよね。@…今日のはまだ!s100…甘い方…。」!sd',
        1823: '「け!w300……圭ちゃんがしてよ……!s300。おじさんは応援してる……。」!sd@',
@ -35,6 +39,10 @@ def open_onikakushi() -> str:
        5792: '……くっくっく！!sd　うつけ者めッ！！！@',
        5966:'「ぎゃぎゃ!s250………!s80ぎゃああぁあぁあぁあぁああッ！！！」!sd¥',
        6224:'　とりとめのないおしゃべり。@',
        # onik_009
        6645:'quakey 5,200',
        6646:'………あれ？@　圭一くんと梨花ちゃんは…なんでスタートしないんだろ？　しないんだろ？」@',
        6647:'　魅音と沙都子は猛ダッシュで会場に散ったが、俺と梨花ちゃんは焦る様子もなく、ただ立ったままだ。¥',
    }
    for i, line in enumerate(origfile):
--- a/src/flow.py
+++ b/src/flow.py
@ -3,11 +3,11 @@
 onik = {
    "gamestart" : [
        'onik_000.txt',
-        'onik_001.txt',
+        # 'onik_001.txt',
-        'onik_002.txt',
+        # 'onik_002.txt',
-        'onik_003.txt',
+        # 'onik_003.txt',
-        'onik_004.txt',
+        # 'onik_004.txt',
-        'onik_005.txt',
+        # 'onik_005.txt',
        # 'onik_009.txt',
        # 'onik_009_02.txt',
        # 'onik_010.txt',
--- a/src/orig.py
+++ b/src/orig.py
@ -5,6 +5,7 @@ import sys
 from unidecode import unidecode
 import scripter
 import config
 import parser
 import flow
@ -74,7 +75,6 @@ def process_sections():
    outfile.close()
    origfile.close()
 def get_symbols(line: str) -> (str, list[str]):
    res = []
    start_symbol = ''
@ -134,13 +134,28 @@ def write_translated(outfile, origfile, translation_file_paths):
        parser.parse_to_csv(transfilepath)
        structure = parser.parse_to_structure(transfilepath)
        testfile = open('test.txt', 'w')
        for line in origfile:
            tokens = scripter.parse_line(line)
            tkns = ''
            for t in tokens:
                if len(tkns) > 0:
                    tkns += ' | '
                tkns += t.token
            testfile.write(tkns + '\n')
            continue
            global debug_current_line
            debug_current_line += 1
            # Check if the current line is a dialogue line or not
            if line_should_be_translated(line):
                start, symbols = get_symbols(line)
                print("\n-", debug_current_line, transfilepath, [start], symbols)
@ -183,3 +198,4 @@ def write_translated(outfile, origfile, translation_file_paths):
            else:
                outfile.write(line)
        testfile.close()
--- a/src/parser.py
+++ b/src/parser.py
@ -103,7 +103,7 @@ def parse_to_csv(filename: str):
        csv_writer = csv.writer(
            csvfile,
            delimiter=delchar,
-            quoting=csv.QUOTE_ALL,
+            quoting=csv.QUOTE_NONE,
            quotechar=escapechar,
        )
--- a/src/scripter.py
+++ b/src/scripter.py
@ -0,0 +1,80 @@
 #!/usr/bin/env python3
 from enum import Enum
 class TokenType(Enum):
    # Command tokens represent all non dialogue keywords
    # If the commands are chained without text inbetween
    # they will be parsed as a single token
    COMMAND = 1
    TEXT = 2
 class ScripterToken():
    def __init__(self, token: str, type: TokenType):
        self.token = token
        self.type = type
 # Parse and tokenize an Nscripter script line in japanese mode
 # The original japanese mode diferentiates between single and
 # double byte characters
 def parse_line(line: str) -> list[ScripterToken]:
    current_token = ScripterToken('', TokenType.TEXT)
    token_list = []
    for i, char in enumerate(line):
        # Comments signify the end of what should be parsed
        # Newline does not constitute a token, skip
        if char == ';' or char == '\n':
            break
        # First character of the token
        if len(current_token.token) == 0:
            if is_double_width(char):
                current_token.type = TokenType.TEXT
            else:
                current_token.type = TokenType.COMMAND
            current_token.token += char
        else:
            # End of token
            if current_token.type == TokenType.COMMAND and is_double_width(char):
                # Add to list and reset the current one
                token_list.append(current_token)
                current_token = ScripterToken('', TokenType.TEXT)
            elif current_token.type == TokenType.TEXT and not is_double_width(char):
                # Add to list and reset the current one
                token_list.append(current_token)
                current_token = ScripterToken('', TokenType.COMMAND)
            current_token.token += char
    # Append last token of the line
    token_list.append(current_token)
    return token_list
 def is_double_width(char: str) -> bool:
    japanese_ranges = [
        (0x4E00, 0x9FFF), # Kanji
        (0x3040, 0x309F), # Hiragana
        (0x30A0, 0x30FF), # Katakana
        (0xFF00, 0xFFEF), # Full-width Roman characters and symbols
        (0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
        (0x201c, 0x201d), # The characters “ ”
        (0x2026, 0x2026), # The character …
        (9734, 9734), # ☆ (Nscripter treats it like a dw character)
    ]
    for start, end in japanese_ranges:
        if start <= ord(char[0]) <= end:
            return True
    return False