hinamizawa-downporter/src/parser.py

#!/usr/bin/env python3

import csv
import os

import config
import flow

class OutputLine():
    def __init__(self, text_jp: str, text_en: str):
        self.text_jp = text_jp
        self.text_en = text_en
        # self.type = type

def get_functions_from_file(filepath: str) -> list[str]:
    """
    Gets the function calls from a file and returns
    a list of lists with the following structure:

    ["<function_name>", "<argument_1>, <argument2>, ..."]
    """

    tokens = []

    with open(filepath, 'r') as file:
        insideQuotes = False
        insideComment = False
        insideToken = False
        currentWord = ""
        currentToken = []
        currentTokenParameter = ""

        # TODO: Prettify this
        while True:
            char = file.read(1)
            if len(char) != 1:
                break

            # print(currentWord)

            if char in " \n\t\r":
                currentWord = ""
            else:
                currentWord += char

            if insideToken and char not in "\n\r":
                currentTokenParameter += char

            # Skip comments
            if insideComment:
                if char == "\n":
                    # print("Skipped comment....")
                    insideComment = False

                continue

            # Keep track of whether or not we're inside quotes
            # Also handle the case where the quotation marks are
            # escaped. We do this by seeing if in the current word
            # there's a backtick in the second to last position.
            if (
                char == '"'
                and (
                    len(currentWord) <= 1
                    or currentWord[-2] != "\\"
                )
            ):
                insideQuotes = not insideQuotes

            if not insideQuotes:
                if currentWord == "//":
                    insideComment = True

                if char == "(":
                    # Write the function name
                    currentToken.append(currentWord[:-1])
                    insideToken = True

                if char == ",":
                    currentToken.append(currentTokenParameter[:-1].strip())
                    currentTokenParameter = ""

                if insideToken and char == ")":
                    insideToken = False

                    currentToken.append(currentTokenParameter[:-1].strip())
                    currentTokenParameter = ""

                    # We have the whole function call, save the result
                    tokens.append(currentToken)
                    currentToken = []

    return tokens

def parse_to_csv():
    for csv_entry in flow.script_flow:
        out_path = config.get('output_path')
        scripts_path = config.get('scripts_path')

        escapechar = config.get('csv_escapechar')
        delchar = config.get('csv_delchar')

        tokens = get_functions_from_file(
            os.path.join(scripts_path, csv_entry.orig)
        )

        csv_path = os.path.join(out_path, 'trans', csv_entry.dest)
        os.makedirs(os.path.dirname(csv_path), exist_ok = True)

        #print(f'Generating {csv_entry.dest}...')

        with open(csv_path, 'w') as csv_file:

            csv_writer = csv.writer(
                csv_file,
                delimiter=delchar,
                quoting=csv.QUOTE_MINIMAL,
                quotechar=escapechar,
            )

            i = 0
            for token in tokens:
                if token[0] != 'OutputLine':
                    continue

                # Only include the range specified by the csv_entry
                if i < csv_entry.start:
                    i += 1
                    continue
                elif i > csv_entry.end:
                    break

                # NOTE: [1:-1] Removes quotes
                # NOTE: replace(' ', '') Is needed for some unity script dialogue
                text_jp = token[2][1:-1].replace('\\', '').replace('\n', '').replace(' ', '')
                text_en = token[4][1:-1].replace('\\', '').replace('\n', '')
                csv_writer.writerow([text_jp, text_en])

                i += 1

        if i-1 != csv_entry.end:
            print(f'WARNING: File "{csv_entry.dest}": written {i-1} of {csv_entry.end} lines')


def parse_to_structure(filename: str) -> list[OutputLine]:
    escapechar = config.get('csv_escapechar')
    delchar = config.get('csv_delchar')

    with open(filename, 'r') as csvfile:
        csv_reader = csv.reader(
            csvfile,
            delimiter=delchar,
            quotechar=escapechar,
        )

        structure = []

        for row in csv_reader:
            structure.append(
                OutputLine(
                    text_jp=row[0],
                    text_en=row[1]
                )
            )

    return structure