From c44c7ebc13ad0e597c8583f02221b4ef6c111a84 Mon Sep 17 00:00:00 2001 From: dusk Date: Thu, 22 Feb 2024 23:55:11 +0100 Subject: [PATCH] Preliminary proper parser support --- src/fix.py | 8 +++++ src/flow.py | 10 +++---- src/orig.py | 18 ++++++++++- src/parser.py | 2 +- src/scripter.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 src/scripter.py diff --git a/src/fix.py b/src/fix.py index cae8ac7..adcfa5b 100644 --- a/src/fix.py +++ b/src/fix.py @@ -8,6 +8,10 @@ def open_onikakushi() -> str: outfile = open(outpath, 'w', encoding='shift_jisx0213') replacements = { + # Misc + 12: 'caption"Onikakushi-hen"', + 23: 'rmenu "Save",save,"Load",load,"Skip",skip,"Hide UI",windowerase,"Log",lookback,"Exit",reset', + # onik_000 1202: '「ゴ・メ・ン・ナ・サ・イ!sd!w800って言ってみな。@言わないならぁ……!」@', 1630: '「…圭ちゃんは食らうの初めてだよね。@…今日のはまだ!s100…甘い方…。」!sd', 1823: '「け!w300……圭ちゃんがしてよ……!s300。おじさんは応援してる……。」!sd@', @@ -35,6 +39,10 @@ def open_onikakushi() -> str: 5792: '……くっくっく!!sd うつけ者めッ!!!@', 5966:'「ぎゃぎゃ!s250………!s80ぎゃああぁあぁあぁあぁああッ!!!」!sd¥', 6224:' とりとめのないおしゃべり。@', + # onik_009 + 6645:'quakey 5,200', + 6646:'………あれ?@ 圭一くんと梨花ちゃんは…なんでスタートしないんだろ? しないんだろ?」@', + 6647:' 魅音と沙都子は猛ダッシュで会場に散ったが、俺と梨花ちゃんは焦る様子もなく、ただ立ったままだ。¥', } for i, line in enumerate(origfile): diff --git a/src/flow.py b/src/flow.py index 081aa49..fc85294 100644 --- a/src/flow.py +++ b/src/flow.py @@ -3,11 +3,11 @@ onik = { "gamestart" : [ 'onik_000.txt', - 'onik_001.txt', - 'onik_002.txt', - 'onik_003.txt', - 'onik_004.txt', - 'onik_005.txt', + # 'onik_001.txt', + # 'onik_002.txt', + # 'onik_003.txt', + # 'onik_004.txt', + # 'onik_005.txt', # 'onik_009.txt', # 'onik_009_02.txt', # 'onik_010.txt', diff --git a/src/orig.py b/src/orig.py index 1ddd67a..87fa5f7 100755 --- a/src/orig.py +++ b/src/orig.py @@ -5,6 +5,7 @@ import sys from unidecode import unidecode +import scripter import config import parser import flow @@ -74,7 +75,6 @@ def process_sections(): outfile.close() origfile.close() - def get_symbols(line: str) -> (str, list[str]): res = [] start_symbol = '' @@ -134,13 +134,28 @@ def write_translated(outfile, origfile, translation_file_paths): parser.parse_to_csv(transfilepath) structure = parser.parse_to_structure(transfilepath) + testfile = open('test.txt', 'w') + for line in origfile: + + tokens = scripter.parse_line(line) + tkns = '' + for t in tokens: + if len(tkns) > 0: + tkns += ' | ' + tkns += t.token + testfile.write(tkns + '\n') + + continue + global debug_current_line debug_current_line += 1 # Check if the current line is a dialogue line or not if line_should_be_translated(line): + + start, symbols = get_symbols(line) print("\n-", debug_current_line, transfilepath, [start], symbols) @@ -183,3 +198,4 @@ def write_translated(outfile, origfile, translation_file_paths): else: outfile.write(line) + testfile.close() diff --git a/src/parser.py b/src/parser.py index fb88804..211d3b7 100755 --- a/src/parser.py +++ b/src/parser.py @@ -103,7 +103,7 @@ def parse_to_csv(filename: str): csv_writer = csv.writer( csvfile, delimiter=delchar, - quoting=csv.QUOTE_ALL, + quoting=csv.QUOTE_NONE, quotechar=escapechar, ) diff --git a/src/scripter.py b/src/scripter.py new file mode 100644 index 0000000..6a1c26a --- /dev/null +++ b/src/scripter.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +from enum import Enum + + +class TokenType(Enum): + # Command tokens represent all non dialogue keywords + # If the commands are chained without text inbetween + # they will be parsed as a single token + COMMAND = 1 + TEXT = 2 + +class ScripterToken(): + def __init__(self, token: str, type: TokenType): + self.token = token + self.type = type + + +# Parse and tokenize an Nscripter script line in japanese mode +# The original japanese mode diferentiates between single and +# double byte characters +def parse_line(line: str) -> list[ScripterToken]: + current_token = ScripterToken('', TokenType.TEXT) + token_list = [] + + for i, char in enumerate(line): + # Comments signify the end of what should be parsed + # Newline does not constitute a token, skip + if char == ';' or char == '\n': + break + + # First character of the token + if len(current_token.token) == 0: + if is_double_width(char): + current_token.type = TokenType.TEXT + else: + current_token.type = TokenType.COMMAND + current_token.token += char + + else: + # End of token + if current_token.type == TokenType.COMMAND and is_double_width(char): + # Add to list and reset the current one + token_list.append(current_token) + current_token = ScripterToken('', TokenType.TEXT) + elif current_token.type == TokenType.TEXT and not is_double_width(char): + # Add to list and reset the current one + token_list.append(current_token) + current_token = ScripterToken('', TokenType.COMMAND) + + current_token.token += char + + # Append last token of the line + token_list.append(current_token) + + + return token_list + + + + +def is_double_width(char: str) -> bool: + + japanese_ranges = [ + (0x4E00, 0x9FFF), # Kanji + (0x3040, 0x309F), # Hiragana + (0x30A0, 0x30FF), # Katakana + (0xFF00, 0xFFEF), # Full-width Roman characters and symbols + (0x3000, 0x303F), # CJK symbols and punctuation (including 「」) + (0x201c, 0x201d), # The characters “ ” + (0x2026, 0x2026), # The character … + (9734, 9734), # ☆ (Nscripter treats it like a dw character) + + ] + + for start, end in japanese_ranges: + if start <= ord(char[0]) <= end: + return True + + return False