hinamizawa-downporter/src/orig.py

#!/usr/bin/env python3

import os
import sys

from unidecode import unidecode

import config
import parser
import flow
import fix

def line_should_be_translated(line: str) -> bool:
    japanese_ranges = [
        (0x4E00, 0x9FFF),  # Kanji
        (0x3040, 0x309F),  # Hiragana
        (0x30A0, 0x30FF),  # Katakana
        (0xFF00, 0xFFEF),  # Full-width Roman characters and symbols
        (0x3000, 0x303F),  # CJK symbols and punctuation (including 「」)
        (0x201c, 0x201c), # The character “
        (0x2026, 0x2026), # The character …
    ]


    for start, end in japanese_ranges:
        if start <= ord(line[0]) <= end:
            return True

    # if line starts with special commands
    if line.startswith(('!s', '!w', '!d', '@', '¥')):
        # ignore line after comment
        comment_i = line.find(';')
        if comment_i != -1:
            line = line[:comment_i]

        # Check if line has japanese chars
        for c in line:
            for start, end in japanese_ranges:
                if start <= ord(c) <= end:
                    return True

    return False


debug_current_line = -1

def process_sections():
    output_filepath = os.path.join(config.get('output_path'), 'out.txt')
    outfile = open(output_filepath, 'w', encoding='shift_jisx0213')
    origfile = open(fix.open_onikakushi(), 'r', encoding='shift_jisx0213')

    sections = dict()

    for line in origfile:
        global debug_current_line
        debug_current_line += 1

        if line.startswith('*'):
            section_name = line[1:].split(' ', 1)[0].replace('\n','')

            outfile.write(line)

            if section_name in flow.onik:
                print("entering", section_name)
                write_translated(
                    outfile,
                    origfile,
                    flow.onik[section_name],
                )
        else:
            outfile.write(line)

    outfile.close()
    origfile.close()


def get_symbols(line: str) -> (str, list[str]):
    res = []
    start_symbol = ''

    inbetween = 0
    i = 0
    while i < len(line):
        if line[i] in ['@', '/', '¥']:
            symbol = line[i]
            i += 1
        elif line[i:i+3] == '!sd':
            symbol = line[i:i+3]
            i += 3
        elif line[i:i+2] in ['!d', '!w', '!s']:
            x = i
            i += 2
            while i < len(line):
                if line[i] >= '0' and line[i] <= '9':
                    i += 1
                    continue

                symbol = line[x:i]
                break
        elif line[i] == '!':
            raise Exception('Unhandled symbol', line)
        else: # It's not a symbol, it's a regular character
            #print(line[i])
            inbetween += 1
            i += 1
            continue

        # Only reaches this if it's a symbol

        # Each symbol acts as a separator between dialog texts, if we
        # have two symbols next to eachother, then we print more dialog
        # than we should. Concatenate consicutive symbols together to
        # prevent this
        if inbetween > 0:
            res.append(symbol)
        else:
            # Symbols at the start should not have dialog before them.
            # Treat them as a special "start_symbol"
            if len(res) == 0:
                start_symbol = symbol
            else:
                res[-1] += symbol
        inbetween = 0

    return start_symbol, res


# Given a set of translation files, the original file and the output file
# replace the japanese lines with the translated ones in a given section.
def write_translated(outfile, origfile, translation_file_paths):
    for transfilepath in translation_file_paths:
        print(f'- reading "{transfilepath}"')
        parser.parse_to_csv(transfilepath)
        structure = parser.parse_to_structure(transfilepath)

        for line in origfile:
            global debug_current_line
            debug_current_line += 1

            # Check if the current line is a dialogue line or not

            if line_should_be_translated(line):
                start, symbols = get_symbols(line)
                print("\n-", debug_current_line, transfilepath, [start], symbols)

                outfile.write('`')
                outfile.write(start)

                _printed_line_jp = start
                _printed_line_en = start
                while True:
                    _printed_line_jp += structure[0].text_jp
                    _printed_line_en += structure[0].text_en
                    outfile.write(
                        unidecode(structure.pop(0).text_en)
                    )

                    if len(symbols) > 0:
                        _printed_line_jp += symbols[0]
                        _printed_line_en += symbols[0]
                        outfile.write(symbols.pop(0))

                    if len(symbols) <= 0:
                        break

                outfile.write('\n')

                print(">", _printed_line_en)
                print(">", _printed_line_jp)
                print("<", line, end='')

                if _printed_line_jp+'\n' != line:
                    print()
                    print("NO THAT'S WRONG!")
                    sys.exit(1)

                # Used up all of the structures, this chapter has ended.
                # Got to the next one
                if len(structure) <= 0:
                    break

            else:
                outfile.write(line)
Organize the project a little bit better 2024-02-16 12:48:18 +00:00			`#!/usr/bin/env python3`

Make output folder as configurable 2024-02-18 11:44:16 +00:00			`import os`
Cleaner debugging 2024-02-19 18:30:20 +00:00			`import sys`
Make output folder as configurable 2024-02-18 11:44:16 +00:00
Organize the project a little bit better 2024-02-16 12:48:18 +00:00			`from unidecode import unidecode`

Make output folder as configurable 2024-02-18 11:44:16 +00:00			`import config`
			`import parser`
WIP 18/2/24 2024-02-18 16:11:31 +00:00			`import flow`
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`import fix`
Organize the project a little bit better 2024-02-16 12:48:18 +00:00
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`def line_should_be_translated(line: str) -> bool:`
			`japanese_ranges = [`
			`(0x4E00, 0x9FFF), # Kanji`
			`(0x3040, 0x309F), # Hiragana`
			`(0x30A0, 0x30FF), # Katakana`
			`(0xFF00, 0xFFEF), # Full-width Roman characters and symbols`
			`(0x3000, 0x303F), # CJK symbols and punctuation (including 「」)`
Add japanese ellipsis to japanese chars 2024-02-19 18:29:24 +00:00			`(0x201c, 0x201c), # The character “`
			`(0x2026, 0x2026), # The character …`
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`]`


			`for start, end in japanese_ranges:`
			`if start <= ord(line[0]) <= end:`
			`return True`

			`# if line starts with special commands`
			`if line.startswith(('!s', '!w', '!d', '@', '¥')):`
			`# ignore line after comment`
			`comment_i = line.find(';')`
			`if comment_i != -1:`
			`line = line[:comment_i]`

			`# Check if line has japanese chars`
			`for c in line:`
			`for start, end in japanese_ranges:`
			`if start <= ord(c) <= end:`
			`return True`

			`return False`


Organize the project a little bit better 2024-02-16 12:48:18 +00:00
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`debug_current_line = -1`
Organize the project a little bit better 2024-02-16 12:48:18 +00:00
WIP 18/2/24 2024-02-18 16:11:31 +00:00			`def process_sections():`
Make output folder as configurable 2024-02-18 11:44:16 +00:00			`output_filepath = os.path.join(config.get('output_path'), 'out.txt')`
			`outfile = open(output_filepath, 'w', encoding='shift_jisx0213')`
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`origfile = open(fix.open_onikakushi(), 'r', encoding='shift_jisx0213')`
WIP 18/2/24 2024-02-18 16:11:31 +00:00
			`sections = dict()`

			`for line in origfile:`
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`global debug_current_line`
			`debug_current_line += 1`

WIP 18/2/24 2024-02-18 16:11:31 +00:00			`if line.startswith('*'):`
			`section_name = line[1:].split(' ', 1)[0].replace('\n','')`

			`outfile.write(line)`

			`if section_name in flow.onik:`
			`print("entering", section_name)`
			`write_translated(`
			`outfile,`
			`origfile,`
			`flow.onik[section_name],`
			`)`
			`else:`
			`outfile.write(line)`

			`outfile.close()`
			`origfile.close()`


Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`def get_symbols(line: str) -> (str, list[str]):`
Get symbols from each line 2024-02-18 19:28:09 +00:00			`res = []`
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`start_symbol = ''`
Get symbols from each line 2024-02-18 19:28:09 +00:00
			`inbetween = 0`
			`i = 0`
			`while i < len(line):`
			`if line[i] in ['@', '/', '¥']:`
Clean up a little bit the symbols function 2024-02-18 19:37:04 +00:00			`symbol = line[i]`
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`i += 1`
Get symbols from each line 2024-02-18 19:28:09 +00:00			`elif line[i:i+3] == '!sd':`
Clean up a little bit the symbols function 2024-02-18 19:37:04 +00:00			`symbol = line[i:i+3]`
Get symbols from each line 2024-02-18 19:28:09 +00:00			`i += 3`
			`elif line[i:i+2] in ['!d', '!w', '!s']:`
			`x = i`
			`i += 2`
			`while i < len(line):`
Don't treat wide numbers as numbers 2024-02-19 18:13:45 +00:00			`if line[i] >= '0' and line[i] <= '9':`
Get symbols from each line 2024-02-18 19:28:09 +00:00			`i += 1`
			`continue`

Clean up a little bit the symbols function 2024-02-18 19:37:04 +00:00			`symbol = line[x:i]`
Get symbols from each line 2024-02-18 19:28:09 +00:00			`break`
			`elif line[i] == '!':`
			`raise Exception('Unhandled symbol', line)`
Clean up a little bit the symbols function 2024-02-18 19:37:04 +00:00			`else: # It's not a symbol, it's a regular character`
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`#print(line[i])`
Get symbols from each line 2024-02-18 19:28:09 +00:00			`inbetween += 1`
Clean up a little bit the symbols function 2024-02-18 19:37:04 +00:00			`i += 1`
			`continue`

Actually print separation by symbol 2024-02-18 20:03:55 +00:00			`# Only reaches this if it's a symbol`
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00
			`# Each symbol acts as a separator between dialog texts, if we`
			`# have two symbols next to eachother, then we print more dialog`
			`# than we should. Concatenate consicutive symbols together to`
			`# prevent this`
Clean up a little bit the symbols function 2024-02-18 19:37:04 +00:00			`if inbetween > 0:`
			`res.append(symbol)`
			`else:`
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`# Symbols at the start should not have dialog before them.`
			`# Treat them as a special "start_symbol"`
			`if len(res) == 0:`
			`start_symbol = symbol`
			`else:`
			`res[-1] += symbol`
Clean up a little bit the symbols function 2024-02-18 19:37:04 +00:00			`inbetween = 0`
Get symbols from each line 2024-02-18 19:28:09 +00:00
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`return start_symbol, res`
Get symbols from each line 2024-02-18 19:28:09 +00:00

WIP 18/2/24 2024-02-18 16:11:31 +00:00			`# Given a set of translation files, the original file and the output file`
			`# replace the japanese lines with the translated ones in a given section.`
			`def write_translated(outfile, origfile, translation_file_paths):`
			`for transfilepath in translation_file_paths:`
			`print(f'- reading "{transfilepath}"')`
Use csv file for intermediary parsing 2024-02-21 16:47:29 +00:00			`parser.parse_to_csv(transfilepath)`
WIP 18/2/24 2024-02-18 16:11:31 +00:00			`structure = parser.parse_to_structure(transfilepath)`
Organize the project a little bit better 2024-02-16 12:48:18 +00:00
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`for line in origfile:`
			`global debug_current_line`
			`debug_current_line += 1`

Get symbols from each line 2024-02-18 19:28:09 +00:00			`# Check if the current line is a dialogue line or not`
Organize the project a little bit better 2024-02-16 12:48:18 +00:00
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`if line_should_be_translated(line):`
			`start, symbols = get_symbols(line)`
Cleaner debugging 2024-02-19 18:30:20 +00:00			`print("\n-", debug_current_line, transfilepath, [start], symbols)`
Get symbols from each line 2024-02-18 19:28:09 +00:00
Organize the project a little bit better 2024-02-16 12:48:18 +00:00			outfile.write('`')
Handle symbols/commands at the start of line 2024-02-19 18:08:40 +00:00			`outfile.write(start)`

			`_printed_line_jp = start`
			`_printed_line_en = start`
WIP 18/2/24 2024-02-18 16:11:31 +00:00			`while True:`
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`_printed_line_jp += structure[0].text_jp`
			`_printed_line_en += structure[0].text_en`
Organize the project a little bit better 2024-02-16 12:48:18 +00:00			`outfile.write(`
Actually print separation by symbol 2024-02-18 20:03:55 +00:00			`unidecode(structure.pop(0).text_en)`
WIP 18/2/24 2024-02-18 16:11:31 +00:00			`)`

Actually print separation by symbol 2024-02-18 20:03:55 +00:00			`if len(symbols) > 0:`
Fix until chapter 2 2024-02-18 21:52:25 +00:00			`_printed_line_jp += symbols[0]`
			`_printed_line_en += symbols[0]`
Actually print separation by symbol 2024-02-18 20:03:55 +00:00			`outfile.write(symbols.pop(0))`

			`if len(symbols) <= 0:`
			`break`
WIP 18/2/24 2024-02-18 16:11:31 +00:00
			`outfile.write('\n')`

Fix until chapter 2 2024-02-18 21:52:25 +00:00			`print(">", _printed_line_en)`
			`print(">", _printed_line_jp)`
WIP 18/2/24 2024-02-18 16:11:31 +00:00			`print("<", line, end='')`

Fix until chapter 2 2024-02-18 21:52:25 +00:00			`if _printed_line_jp+'\n' != line:`
Change debug message 2024-02-20 22:19:40 +00:00			`print()`
			`print("NO THAT'S WRONG!")`
Cleaner debugging 2024-02-19 18:30:20 +00:00			`sys.exit(1)`
WIP 18/2/24 2024-02-18 16:11:31 +00:00
			`# Used up all of the structures, this chapter has ended.`
			`# Got to the next one`
			`if len(structure) <= 0:`
			`break`
Organize the project a little bit better 2024-02-16 12:48:18 +00:00
			`else:`
			`outfile.write(line)`