Preliminary proper parser support

This commit is contained in:
Dusk 2024-02-22 23:55:11 +01:00
parent f9eda09dea
commit c44c7ebc13
5 changed files with 111 additions and 7 deletions

View File

@ -8,6 +8,10 @@ def open_onikakushi() -> str:
outfile = open(outpath, 'w', encoding='shift_jisx0213') outfile = open(outpath, 'w', encoding='shift_jisx0213')
replacements = { replacements = {
# Misc
12: 'caption"Onikakushi-hen"',
23: 'rmenu "Save",save,"Load",load,"Skip",skip,"Hide UI",windowerase,"Log",lookback,"Exit",reset',
# onik_000
1202: '「ゴ・メ・ン・ナ・サ・イ!sd!w800って言ってみな。@言わないならぁ……!」@', 1202: '「ゴ・メ・ン・ナ・サ・イ!sd!w800って言ってみな。@言わないならぁ……!」@',
1630: '「…圭ちゃんは食らうの初めてだよね。@…今日のはまだ!s100…甘い方…。」!sd', 1630: '「…圭ちゃんは食らうの初めてだよね。@…今日のはまだ!s100…甘い方…。」!sd',
1823: '「け!w300……圭ちゃんがしてよ……!s300。おじさんは応援してる……。」!sd@', 1823: '「け!w300……圭ちゃんがしてよ……!s300。おじさんは応援してる……。」!sd@',
@ -35,6 +39,10 @@ def open_onikakushi() -> str:
5792: '……くっくっく!!sd うつけ者めッ@', 5792: '……くっくっく!!sd うつけ者めッ@',
5966:'「ぎゃぎゃ!s250………!s80ぎゃああぁあぁあぁあぁああッ」!sd¥', 5966:'「ぎゃぎゃ!s250………!s80ぎゃああぁあぁあぁあぁああッ」!sd¥',
6224:' とりとめのないおしゃべり。@', 6224:' とりとめのないおしゃべり。@',
# onik_009
6645:'quakey 5,200',
6646:'………あれ?@ 圭一くんと梨花ちゃんは…なんでスタートしないんだろ? しないんだろ?」@',
6647:' 魅音と沙都子は猛ダッシュで会場に散ったが、俺と梨花ちゃんは焦る様子もなく、ただ立ったままだ。¥',
} }
for i, line in enumerate(origfile): for i, line in enumerate(origfile):

View File

@ -3,11 +3,11 @@
onik = { onik = {
"gamestart" : [ "gamestart" : [
'onik_000.txt', 'onik_000.txt',
'onik_001.txt', # 'onik_001.txt',
'onik_002.txt', # 'onik_002.txt',
'onik_003.txt', # 'onik_003.txt',
'onik_004.txt', # 'onik_004.txt',
'onik_005.txt', # 'onik_005.txt',
# 'onik_009.txt', # 'onik_009.txt',
# 'onik_009_02.txt', # 'onik_009_02.txt',
# 'onik_010.txt', # 'onik_010.txt',

View File

@ -5,6 +5,7 @@ import sys
from unidecode import unidecode from unidecode import unidecode
import scripter
import config import config
import parser import parser
import flow import flow
@ -74,7 +75,6 @@ def process_sections():
outfile.close() outfile.close()
origfile.close() origfile.close()
def get_symbols(line: str) -> (str, list[str]): def get_symbols(line: str) -> (str, list[str]):
res = [] res = []
start_symbol = '' start_symbol = ''
@ -134,13 +134,28 @@ def write_translated(outfile, origfile, translation_file_paths):
parser.parse_to_csv(transfilepath) parser.parse_to_csv(transfilepath)
structure = parser.parse_to_structure(transfilepath) structure = parser.parse_to_structure(transfilepath)
testfile = open('test.txt', 'w')
for line in origfile: for line in origfile:
tokens = scripter.parse_line(line)
tkns = ''
for t in tokens:
if len(tkns) > 0:
tkns += ' | '
tkns += t.token
testfile.write(tkns + '\n')
continue
global debug_current_line global debug_current_line
debug_current_line += 1 debug_current_line += 1
# Check if the current line is a dialogue line or not # Check if the current line is a dialogue line or not
if line_should_be_translated(line): if line_should_be_translated(line):
start, symbols = get_symbols(line) start, symbols = get_symbols(line)
print("\n-", debug_current_line, transfilepath, [start], symbols) print("\n-", debug_current_line, transfilepath, [start], symbols)
@ -183,3 +198,4 @@ def write_translated(outfile, origfile, translation_file_paths):
else: else:
outfile.write(line) outfile.write(line)
testfile.close()

View File

@ -103,7 +103,7 @@ def parse_to_csv(filename: str):
csv_writer = csv.writer( csv_writer = csv.writer(
csvfile, csvfile,
delimiter=delchar, delimiter=delchar,
quoting=csv.QUOTE_ALL, quoting=csv.QUOTE_NONE,
quotechar=escapechar, quotechar=escapechar,
) )

80
src/scripter.py Normal file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env python3
from enum import Enum
class TokenType(Enum):
# Command tokens represent all non dialogue keywords
# If the commands are chained without text inbetween
# they will be parsed as a single token
COMMAND = 1
TEXT = 2
class ScripterToken():
def __init__(self, token: str, type: TokenType):
self.token = token
self.type = type
# Parse and tokenize an Nscripter script line in japanese mode
# The original japanese mode diferentiates between single and
# double byte characters
def parse_line(line: str) -> list[ScripterToken]:
current_token = ScripterToken('', TokenType.TEXT)
token_list = []
for i, char in enumerate(line):
# Comments signify the end of what should be parsed
# Newline does not constitute a token, skip
if char == ';' or char == '\n':
break
# First character of the token
if len(current_token.token) == 0:
if is_double_width(char):
current_token.type = TokenType.TEXT
else:
current_token.type = TokenType.COMMAND
current_token.token += char
else:
# End of token
if current_token.type == TokenType.COMMAND and is_double_width(char):
# Add to list and reset the current one
token_list.append(current_token)
current_token = ScripterToken('', TokenType.TEXT)
elif current_token.type == TokenType.TEXT and not is_double_width(char):
# Add to list and reset the current one
token_list.append(current_token)
current_token = ScripterToken('', TokenType.COMMAND)
current_token.token += char
# Append last token of the line
token_list.append(current_token)
return token_list
def is_double_width(char: str) -> bool:
japanese_ranges = [
(0x4E00, 0x9FFF), # Kanji
(0x3040, 0x309F), # Hiragana
(0x30A0, 0x30FF), # Katakana
(0xFF00, 0xFFEF), # Full-width Roman characters and symbols
(0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
(0x201c, 0x201d), # The characters “ ”
(0x2026, 0x2026), # The character …
(9734, 9734), # ☆ (Nscripter treats it like a dw character)
]
for start, end in japanese_ranges:
if start <= ord(char[0]) <= end:
return True
return False