Change over to the new parsing
This commit is contained in:
parent
c44c7ebc13
commit
ea4b17ee40
177
src/orig.py
177
src/orig.py
|
@ -11,39 +11,6 @@ import parser
|
|||
import flow
|
||||
import fix
|
||||
|
||||
def line_should_be_translated(line: str) -> bool:
|
||||
japanese_ranges = [
|
||||
(0x4E00, 0x9FFF), # Kanji
|
||||
(0x3040, 0x309F), # Hiragana
|
||||
(0x30A0, 0x30FF), # Katakana
|
||||
(0xFF00, 0xFFEF), # Full-width Roman characters and symbols
|
||||
(0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
|
||||
(0x201c, 0x201c), # The character “
|
||||
(0x2026, 0x2026), # The character …
|
||||
]
|
||||
|
||||
|
||||
for start, end in japanese_ranges:
|
||||
if start <= ord(line[0]) <= end:
|
||||
return True
|
||||
|
||||
# if line starts with special commands
|
||||
if line.startswith(('!s', '!w', '!d', '@', '¥')):
|
||||
# ignore line after comment
|
||||
comment_i = line.find(';')
|
||||
if comment_i != -1:
|
||||
line = line[:comment_i]
|
||||
|
||||
# Check if line has japanese chars
|
||||
for c in line:
|
||||
for start, end in japanese_ranges:
|
||||
if start <= ord(c) <= end:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
debug_current_line = -1
|
||||
|
||||
def process_sections():
|
||||
|
@ -69,61 +36,39 @@ def process_sections():
|
|||
origfile,
|
||||
flow.onik[section_name],
|
||||
)
|
||||
print("finished section: ", section_name)
|
||||
else:
|
||||
outfile.write(line)
|
||||
|
||||
outfile.close()
|
||||
origfile.close()
|
||||
|
||||
def get_symbols(line: str) -> (str, list[str]):
|
||||
res = []
|
||||
start_symbol = ''
|
||||
|
||||
inbetween = 0
|
||||
i = 0
|
||||
while i < len(line):
|
||||
if line[i] in ['@', '/', '¥']:
|
||||
symbol = line[i]
|
||||
i += 1
|
||||
elif line[i:i+3] == '!sd':
|
||||
symbol = line[i:i+3]
|
||||
i += 3
|
||||
elif line[i:i+2] in ['!d', '!w', '!s']:
|
||||
x = i
|
||||
i += 2
|
||||
while i < len(line):
|
||||
if line[i] >= '0' and line[i] <= '9':
|
||||
i += 1
|
||||
continue
|
||||
def swap_line_text(tokens, translation_lines: list[parser.OutputLine]) -> (str, str, int):
|
||||
"""
|
||||
Given a token list and a buffer with lines, replace the text tokens
|
||||
with lines from the line_buffer.
|
||||
|
||||
symbol = line[x:i]
|
||||
break
|
||||
elif line[i] == '!':
|
||||
raise Exception('Unhandled symbol', line)
|
||||
else: # It's not a symbol, it's a regular character
|
||||
#print(line[i])
|
||||
inbetween += 1
|
||||
i += 1
|
||||
continue
|
||||
Returns the swapped token list and the amount of lines consumed.
|
||||
"""
|
||||
|
||||
# Only reaches this if it's a symbol
|
||||
# Lists are pointers to arrays, don't mutate it
|
||||
ret_en = ''
|
||||
ret_jp = ''
|
||||
lines_written = 0
|
||||
|
||||
# Each symbol acts as a separator between dialog texts, if we
|
||||
# have two symbols next to eachother, then we print more dialog
|
||||
# than we should. Concatenate consicutive symbols together to
|
||||
# prevent this
|
||||
if inbetween > 0:
|
||||
res.append(symbol)
|
||||
for token in tokens:
|
||||
if token.type == scripter.TokenType.TEXT:
|
||||
ret_en += unidecode(translation_lines[lines_written].text_en)
|
||||
ret_jp += translation_lines[lines_written].text_jp
|
||||
lines_written += 1
|
||||
else:
|
||||
# Symbols at the start should not have dialog before them.
|
||||
# Treat them as a special "start_symbol"
|
||||
if len(res) == 0:
|
||||
start_symbol = symbol
|
||||
else:
|
||||
res[-1] += symbol
|
||||
inbetween = 0
|
||||
ret_en += token.token
|
||||
ret_jp += token.token
|
||||
|
||||
return ret_en, ret_jp, lines_written
|
||||
|
||||
|
||||
return start_symbol, res
|
||||
|
||||
|
||||
# Given a set of translation files, the original file and the output file
|
||||
|
@ -134,68 +79,38 @@ def write_translated(outfile, origfile, translation_file_paths):
|
|||
parser.parse_to_csv(transfilepath)
|
||||
structure = parser.parse_to_structure(transfilepath)
|
||||
|
||||
testfile = open('test.txt', 'w')
|
||||
|
||||
for line in origfile:
|
||||
|
||||
tokens = scripter.parse_line(line)
|
||||
tkns = ''
|
||||
for t in tokens:
|
||||
if len(tkns) > 0:
|
||||
tkns += ' | '
|
||||
tkns += t.token
|
||||
testfile.write(tkns + '\n')
|
||||
|
||||
continue
|
||||
|
||||
# --- Debug ---
|
||||
global debug_current_line
|
||||
debug_current_line += 1
|
||||
print("\n-", debug_current_line, transfilepath, tokens[0])
|
||||
# -------------
|
||||
|
||||
# Check if the current line is a dialogue line or not
|
||||
# Replace the text tokens with the translated ones
|
||||
line_en, line_jp, lines_written = swap_line_text(tokens, structure)
|
||||
# Remove the lines that have been written
|
||||
structure = structure[lines_written:]
|
||||
|
||||
if line_should_be_translated(line):
|
||||
outfile.write('`' + unidecode(line_en) + '\n')
|
||||
|
||||
# --- Debug ---
|
||||
print(">", line_en)
|
||||
print(">", line_jp)
|
||||
print("<", line, end='')
|
||||
# -------------
|
||||
|
||||
start, symbols = get_symbols(line)
|
||||
print("\n-", debug_current_line, transfilepath, [start], symbols)
|
||||
if line_jp+'\n' != line:
|
||||
print()
|
||||
print(" ------------------------------------------------------")
|
||||
print(" ! NO THAT'S WRONG! !")
|
||||
print(" ------------------------------------------------------")
|
||||
sys.exit(1)
|
||||
|
||||
outfile.write('`')
|
||||
outfile.write(start)
|
||||
|
||||
_printed_line_jp = start
|
||||
_printed_line_en = start
|
||||
while True:
|
||||
_printed_line_jp += structure[0].text_jp
|
||||
_printed_line_en += structure[0].text_en
|
||||
outfile.write(
|
||||
unidecode(structure.pop(0).text_en)
|
||||
)
|
||||
|
||||
if len(symbols) > 0:
|
||||
_printed_line_jp += symbols[0]
|
||||
_printed_line_en += symbols[0]
|
||||
outfile.write(symbols.pop(0))
|
||||
|
||||
if len(symbols) <= 0:
|
||||
break
|
||||
|
||||
outfile.write('\n')
|
||||
|
||||
print(">", _printed_line_en)
|
||||
print(">", _printed_line_jp)
|
||||
print("<", line, end='')
|
||||
|
||||
if _printed_line_jp+'\n' != line:
|
||||
print()
|
||||
print("NO THAT'S WRONG!")
|
||||
sys.exit(1)
|
||||
|
||||
# Used up all of the structures, this chapter has ended.
|
||||
# Got to the next one
|
||||
if len(structure) <= 0:
|
||||
break
|
||||
|
||||
else:
|
||||
outfile.write(line)
|
||||
|
||||
testfile.close()
|
||||
# Used up all of the structures, this chapter has ended.
|
||||
# Got to the next one
|
||||
if len(structure) <= 0:
|
||||
print()
|
||||
print(f'- finished "{transfilepath}"')
|
||||
break
|
||||
|
|
|
@ -9,24 +9,34 @@ class TokenType(Enum):
|
|||
# they will be parsed as a single token
|
||||
COMMAND = 1
|
||||
TEXT = 2
|
||||
COMMENT = 3
|
||||
|
||||
class ScripterToken():
|
||||
def __init__(self, token: str, type: TokenType):
|
||||
self.token = token
|
||||
self.type = type
|
||||
|
||||
def __str__(self):
|
||||
return f' |{self.token}|'
|
||||
|
||||
|
||||
# Parse and tokenize an Nscripter script line in japanese mode
|
||||
# The original japanese mode diferentiates between single and
|
||||
# double byte characters
|
||||
def parse_line(line: str) -> list[ScripterToken]:
|
||||
current_token = ScripterToken('', TokenType.TEXT)
|
||||
current_token = ScripterToken('', TokenType.COMMAND)
|
||||
token_list = []
|
||||
|
||||
for i, char in enumerate(line):
|
||||
# Comments signify the end of what should be parsed
|
||||
# Newline does not constitute a token, skip
|
||||
if char == ';' or char == '\n':
|
||||
if char == '\n':
|
||||
break
|
||||
|
||||
if char == ';':
|
||||
token_list.append(current_token)
|
||||
# [:-1] to remove the newline
|
||||
current_token = ScripterToken(line[i:-1], TokenType.COMMENT)
|
||||
break
|
||||
|
||||
# First character of the token
|
||||
|
|
Loading…
Reference in New Issue