hinamizawa-downporter/src/parser.py

167 lines
4.7 KiB
Python
Executable File

#!/usr/bin/env python3
import csv
import os
import config
import flow
class OutputLine():
def __init__(self, text_jp: str, text_en: str):
self.text_jp = text_jp
self.text_en = text_en
# self.type = type
def get_functions_from_file(filepath: str) -> list[str]:
"""
Gets the function calls from a file and returns
a list of lists with the following structure:
["<function_name>", "<argument_1>, <argument2>, ..."]
"""
tokens = []
with open(filepath, 'r') as file:
insideQuotes = False
insideComment = False
insideToken = False
currentWord = ""
currentToken = []
currentTokenParameter = ""
# TODO: Prettify this
while True:
char = file.read(1)
if len(char) != 1:
break
# print(currentWord)
if char in " \n\t\r":
currentWord = ""
else:
currentWord += char
if insideToken and char not in "\n\r":
currentTokenParameter += char
# Skip comments
if insideComment:
if char == "\n":
# print("Skipped comment....")
insideComment = False
continue
# Keep track of whether or not we're inside quotes
# Also handle the case where the quotation marks are
# escaped. We do this by seeing if in the current word
# there's a backtick in the second to last position.
if (
char == '"'
and (
len(currentWord) <= 1
or currentWord[-2] != "\\"
)
):
insideQuotes = not insideQuotes
if not insideQuotes:
if currentWord == "//":
insideComment = True
if char == "(":
# Write the function name
currentToken.append(currentWord[:-1])
insideToken = True
if char == ",":
currentToken.append(currentTokenParameter[:-1].strip())
currentTokenParameter = ""
if insideToken and char == ")":
insideToken = False
currentToken.append(currentTokenParameter[:-1].strip())
currentTokenParameter = ""
# We have the whole function call, save the result
tokens.append(currentToken)
currentToken = []
return tokens
def parse_to_csv():
for csv_entry in flow.script_flow:
out_path = config.get('output_path')
scripts_path = config.get('scripts_path')
escapechar = config.get('csv_escapechar')
delchar = config.get('csv_delchar')
tokens = get_functions_from_file(
os.path.join(scripts_path, csv_entry.orig)
)
csv_path = os.path.join(out_path, 'trans', csv_entry.dest)
os.makedirs(os.path.dirname(csv_path), exist_ok = True)
#print(f'Generating {csv_entry.dest}...')
with open(csv_path, 'w') as csv_file:
csv_writer = csv.writer(
csv_file,
delimiter=delchar,
quoting=csv.QUOTE_MINIMAL,
quotechar=escapechar,
)
i = 0
for token in tokens:
if token[0] != 'OutputLine':
continue
# Only include the range specified by the csv_entry
if i < csv_entry.start:
i += 1
continue
elif i > csv_entry.end:
break
# NOTE: [1:-1] Removes quotes
# NOTE: replace(' ', '') Is needed for some unity script dialogue
text_jp = token[2][1:-1].replace('\\', '').replace('\n', '').replace(' ', '')
text_en = token[4][1:-1].replace('\\', '').replace('\n', '')
csv_writer.writerow([text_jp, text_en])
i += 1
if i-1 != csv_entry.end:
print(f'WARNING: File "{csv_entry.dest}": written {i-1} of {csv_entry.end} lines')
def parse_to_structure(filename: str) -> list[OutputLine]:
escapechar = config.get('csv_escapechar')
delchar = config.get('csv_delchar')
with open(filename, 'r') as csvfile:
csv_reader = csv.reader(
csvfile,
delimiter=delchar,
quotechar=escapechar,
)
structure = []
for row in csv_reader:
structure.append(
OutputLine(
text_jp=row[0],
text_en=row[1]
)
)
return structure