Preliminary proper parser support
This commit is contained in:
parent
f9eda09dea
commit
c44c7ebc13
|
@ -8,6 +8,10 @@ def open_onikakushi() -> str:
|
||||||
outfile = open(outpath, 'w', encoding='shift_jisx0213')
|
outfile = open(outpath, 'w', encoding='shift_jisx0213')
|
||||||
|
|
||||||
replacements = {
|
replacements = {
|
||||||
|
# Misc
|
||||||
|
12: 'caption"Onikakushi-hen"',
|
||||||
|
23: 'rmenu "Save",save,"Load",load,"Skip",skip,"Hide UI",windowerase,"Log",lookback,"Exit",reset',
|
||||||
|
# onik_000
|
||||||
1202: '「ゴ・メ・ン・ナ・サ・イ!sd!w800って言ってみな。@言わないならぁ……!」@',
|
1202: '「ゴ・メ・ン・ナ・サ・イ!sd!w800って言ってみな。@言わないならぁ……!」@',
|
||||||
1630: '「…圭ちゃんは食らうの初めてだよね。@…今日のはまだ!s100…甘い方…。」!sd',
|
1630: '「…圭ちゃんは食らうの初めてだよね。@…今日のはまだ!s100…甘い方…。」!sd',
|
||||||
1823: '「け!w300……圭ちゃんがしてよ……!s300。おじさんは応援してる……。」!sd@',
|
1823: '「け!w300……圭ちゃんがしてよ……!s300。おじさんは応援してる……。」!sd@',
|
||||||
|
@ -35,6 +39,10 @@ def open_onikakushi() -> str:
|
||||||
5792: '……くっくっく!!sd うつけ者めッ!!!@',
|
5792: '……くっくっく!!sd うつけ者めッ!!!@',
|
||||||
5966:'「ぎゃぎゃ!s250………!s80ぎゃああぁあぁあぁあぁああッ!!!」!sd¥',
|
5966:'「ぎゃぎゃ!s250………!s80ぎゃああぁあぁあぁあぁああッ!!!」!sd¥',
|
||||||
6224:' とりとめのないおしゃべり。@',
|
6224:' とりとめのないおしゃべり。@',
|
||||||
|
# onik_009
|
||||||
|
6645:'quakey 5,200',
|
||||||
|
6646:'………あれ?@ 圭一くんと梨花ちゃんは…なんでスタートしないんだろ? しないんだろ?」@',
|
||||||
|
6647:' 魅音と沙都子は猛ダッシュで会場に散ったが、俺と梨花ちゃんは焦る様子もなく、ただ立ったままだ。¥',
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, line in enumerate(origfile):
|
for i, line in enumerate(origfile):
|
||||||
|
|
10
src/flow.py
10
src/flow.py
|
@ -3,11 +3,11 @@
|
||||||
onik = {
|
onik = {
|
||||||
"gamestart" : [
|
"gamestart" : [
|
||||||
'onik_000.txt',
|
'onik_000.txt',
|
||||||
'onik_001.txt',
|
# 'onik_001.txt',
|
||||||
'onik_002.txt',
|
# 'onik_002.txt',
|
||||||
'onik_003.txt',
|
# 'onik_003.txt',
|
||||||
'onik_004.txt',
|
# 'onik_004.txt',
|
||||||
'onik_005.txt',
|
# 'onik_005.txt',
|
||||||
# 'onik_009.txt',
|
# 'onik_009.txt',
|
||||||
# 'onik_009_02.txt',
|
# 'onik_009_02.txt',
|
||||||
# 'onik_010.txt',
|
# 'onik_010.txt',
|
||||||
|
|
18
src/orig.py
18
src/orig.py
|
@ -5,6 +5,7 @@ import sys
|
||||||
|
|
||||||
from unidecode import unidecode
|
from unidecode import unidecode
|
||||||
|
|
||||||
|
import scripter
|
||||||
import config
|
import config
|
||||||
import parser
|
import parser
|
||||||
import flow
|
import flow
|
||||||
|
@ -74,7 +75,6 @@ def process_sections():
|
||||||
outfile.close()
|
outfile.close()
|
||||||
origfile.close()
|
origfile.close()
|
||||||
|
|
||||||
|
|
||||||
def get_symbols(line: str) -> (str, list[str]):
|
def get_symbols(line: str) -> (str, list[str]):
|
||||||
res = []
|
res = []
|
||||||
start_symbol = ''
|
start_symbol = ''
|
||||||
|
@ -134,13 +134,28 @@ def write_translated(outfile, origfile, translation_file_paths):
|
||||||
parser.parse_to_csv(transfilepath)
|
parser.parse_to_csv(transfilepath)
|
||||||
structure = parser.parse_to_structure(transfilepath)
|
structure = parser.parse_to_structure(transfilepath)
|
||||||
|
|
||||||
|
testfile = open('test.txt', 'w')
|
||||||
|
|
||||||
for line in origfile:
|
for line in origfile:
|
||||||
|
|
||||||
|
tokens = scripter.parse_line(line)
|
||||||
|
tkns = ''
|
||||||
|
for t in tokens:
|
||||||
|
if len(tkns) > 0:
|
||||||
|
tkns += ' | '
|
||||||
|
tkns += t.token
|
||||||
|
testfile.write(tkns + '\n')
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
global debug_current_line
|
global debug_current_line
|
||||||
debug_current_line += 1
|
debug_current_line += 1
|
||||||
|
|
||||||
# Check if the current line is a dialogue line or not
|
# Check if the current line is a dialogue line or not
|
||||||
|
|
||||||
if line_should_be_translated(line):
|
if line_should_be_translated(line):
|
||||||
|
|
||||||
|
|
||||||
start, symbols = get_symbols(line)
|
start, symbols = get_symbols(line)
|
||||||
print("\n-", debug_current_line, transfilepath, [start], symbols)
|
print("\n-", debug_current_line, transfilepath, [start], symbols)
|
||||||
|
|
||||||
|
@ -183,3 +198,4 @@ def write_translated(outfile, origfile, translation_file_paths):
|
||||||
else:
|
else:
|
||||||
outfile.write(line)
|
outfile.write(line)
|
||||||
|
|
||||||
|
testfile.close()
|
||||||
|
|
|
@ -103,7 +103,7 @@ def parse_to_csv(filename: str):
|
||||||
csv_writer = csv.writer(
|
csv_writer = csv.writer(
|
||||||
csvfile,
|
csvfile,
|
||||||
delimiter=delchar,
|
delimiter=delchar,
|
||||||
quoting=csv.QUOTE_ALL,
|
quoting=csv.QUOTE_NONE,
|
||||||
quotechar=escapechar,
|
quotechar=escapechar,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,80 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class TokenType(Enum):
|
||||||
|
# Command tokens represent all non dialogue keywords
|
||||||
|
# If the commands are chained without text inbetween
|
||||||
|
# they will be parsed as a single token
|
||||||
|
COMMAND = 1
|
||||||
|
TEXT = 2
|
||||||
|
|
||||||
|
class ScripterToken():
|
||||||
|
def __init__(self, token: str, type: TokenType):
|
||||||
|
self.token = token
|
||||||
|
self.type = type
|
||||||
|
|
||||||
|
|
||||||
|
# Parse and tokenize an Nscripter script line in japanese mode
|
||||||
|
# The original japanese mode diferentiates between single and
|
||||||
|
# double byte characters
|
||||||
|
def parse_line(line: str) -> list[ScripterToken]:
|
||||||
|
current_token = ScripterToken('', TokenType.TEXT)
|
||||||
|
token_list = []
|
||||||
|
|
||||||
|
for i, char in enumerate(line):
|
||||||
|
# Comments signify the end of what should be parsed
|
||||||
|
# Newline does not constitute a token, skip
|
||||||
|
if char == ';' or char == '\n':
|
||||||
|
break
|
||||||
|
|
||||||
|
# First character of the token
|
||||||
|
if len(current_token.token) == 0:
|
||||||
|
if is_double_width(char):
|
||||||
|
current_token.type = TokenType.TEXT
|
||||||
|
else:
|
||||||
|
current_token.type = TokenType.COMMAND
|
||||||
|
current_token.token += char
|
||||||
|
|
||||||
|
else:
|
||||||
|
# End of token
|
||||||
|
if current_token.type == TokenType.COMMAND and is_double_width(char):
|
||||||
|
# Add to list and reset the current one
|
||||||
|
token_list.append(current_token)
|
||||||
|
current_token = ScripterToken('', TokenType.TEXT)
|
||||||
|
elif current_token.type == TokenType.TEXT and not is_double_width(char):
|
||||||
|
# Add to list and reset the current one
|
||||||
|
token_list.append(current_token)
|
||||||
|
current_token = ScripterToken('', TokenType.COMMAND)
|
||||||
|
|
||||||
|
current_token.token += char
|
||||||
|
|
||||||
|
# Append last token of the line
|
||||||
|
token_list.append(current_token)
|
||||||
|
|
||||||
|
|
||||||
|
return token_list
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def is_double_width(char: str) -> bool:
|
||||||
|
|
||||||
|
japanese_ranges = [
|
||||||
|
(0x4E00, 0x9FFF), # Kanji
|
||||||
|
(0x3040, 0x309F), # Hiragana
|
||||||
|
(0x30A0, 0x30FF), # Katakana
|
||||||
|
(0xFF00, 0xFFEF), # Full-width Roman characters and symbols
|
||||||
|
(0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
|
||||||
|
(0x201c, 0x201d), # The characters “ ”
|
||||||
|
(0x2026, 0x2026), # The character …
|
||||||
|
(9734, 9734), # ☆ (Nscripter treats it like a dw character)
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
for start, end in japanese_ranges:
|
||||||
|
if start <= ord(char[0]) <= end:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
Loading…
Reference in New Issue