hinamizawa-downporter/src/orig.py

186 lines
5.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2024-02-18 11:44:16 +00:00
import os
2024-02-19 18:30:20 +00:00
import sys
2024-02-18 11:44:16 +00:00
from unidecode import unidecode
2024-02-18 11:44:16 +00:00
import config
import parser
2024-02-18 16:11:31 +00:00
import flow
2024-02-18 21:52:25 +00:00
import fix
def line_should_be_translated(line: str) -> bool:
japanese_ranges = [
(0x4E00, 0x9FFF), # Kanji
(0x3040, 0x309F), # Hiragana
(0x30A0, 0x30FF), # Katakana
(0xFF00, 0xFFEF), # Full-width Roman characters and symbols
(0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
(0x201c, 0x201c), # The character “
(0x2026, 0x2026), # The character …
]
for start, end in japanese_ranges:
if start <= ord(line[0]) <= end:
return True
# if line starts with special commands
if line.startswith(('!s', '!w', '!d', '@', '¥')):
# ignore line after comment
comment_i = line.find(';')
if comment_i != -1:
line = line[:comment_i]
# Check if line has japanese chars
for c in line:
for start, end in japanese_ranges:
if start <= ord(c) <= end:
return True
return False
2024-02-18 21:52:25 +00:00
debug_current_line = -1
2024-02-18 16:11:31 +00:00
def process_sections():
2024-02-18 11:44:16 +00:00
output_filepath = os.path.join(config.get('output_path'), 'out.txt')
outfile = open(output_filepath, 'w', encoding='shift_jisx0213')
2024-02-18 21:52:25 +00:00
origfile = open(fix.open_onikakushi(), 'r', encoding='shift_jisx0213')
2024-02-18 16:11:31 +00:00
sections = dict()
for line in origfile:
2024-02-18 21:52:25 +00:00
global debug_current_line
debug_current_line += 1
2024-02-18 16:11:31 +00:00
if line.startswith('*'):
section_name = line[1:].split(' ', 1)[0].replace('\n','')
outfile.write(line)
if section_name in flow.onik:
print("entering", section_name)
write_translated(
outfile,
origfile,
flow.onik[section_name],
)
else:
outfile.write(line)
outfile.close()
origfile.close()
def get_symbols(line: str) -> (str, list[str]):
2024-02-18 19:28:09 +00:00
res = []
start_symbol = ''
2024-02-18 19:28:09 +00:00
inbetween = 0
i = 0
while i < len(line):
if line[i] in ['@', '/', '¥']:
symbol = line[i]
2024-02-18 21:52:25 +00:00
i += 1
2024-02-18 19:28:09 +00:00
elif line[i:i+3] == '!sd':
symbol = line[i:i+3]
2024-02-18 19:28:09 +00:00
i += 3
elif line[i:i+2] in ['!d', '!w', '!s']:
x = i
i += 2
while i < len(line):
2024-02-19 18:13:45 +00:00
if line[i] >= '0' and line[i] <= '9':
2024-02-18 19:28:09 +00:00
i += 1
continue
symbol = line[x:i]
2024-02-18 19:28:09 +00:00
break
elif line[i] == '!':
raise Exception('Unhandled symbol', line)
else: # It's not a symbol, it's a regular character
2024-02-18 21:52:25 +00:00
#print(line[i])
2024-02-18 19:28:09 +00:00
inbetween += 1
i += 1
continue
2024-02-18 20:03:55 +00:00
# Only reaches this if it's a symbol
# Each symbol acts as a separator between dialog texts, if we
# have two symbols next to eachother, then we print more dialog
# than we should. Concatenate consicutive symbols together to
# prevent this
if inbetween > 0:
res.append(symbol)
else:
# Symbols at the start should not have dialog before them.
# Treat them as a special "start_symbol"
if len(res) == 0:
start_symbol = symbol
else:
res[-1] += symbol
inbetween = 0
2024-02-18 19:28:09 +00:00
return start_symbol, res
2024-02-18 19:28:09 +00:00
2024-02-18 16:11:31 +00:00
# Given a set of translation files, the original file and the output file
# replace the japanese lines with the translated ones in a given section.
def write_translated(outfile, origfile, translation_file_paths):
for transfilepath in translation_file_paths:
print(f'- reading "{transfilepath}"')
2024-02-21 16:47:29 +00:00
parser.parse_to_csv(transfilepath)
2024-02-18 16:11:31 +00:00
structure = parser.parse_to_structure(transfilepath)
2024-02-18 21:52:25 +00:00
for line in origfile:
global debug_current_line
debug_current_line += 1
2024-02-18 19:28:09 +00:00
# Check if the current line is a dialogue line or not
if line_should_be_translated(line):
start, symbols = get_symbols(line)
2024-02-19 18:30:20 +00:00
print("\n-", debug_current_line, transfilepath, [start], symbols)
2024-02-18 19:28:09 +00:00
outfile.write('`')
outfile.write(start)
_printed_line_jp = start
_printed_line_en = start
2024-02-18 16:11:31 +00:00
while True:
2024-02-18 21:52:25 +00:00
_printed_line_jp += structure[0].text_jp
_printed_line_en += structure[0].text_en
outfile.write(
2024-02-18 20:03:55 +00:00
unidecode(structure.pop(0).text_en)
2024-02-18 16:11:31 +00:00
)
2024-02-18 20:03:55 +00:00
if len(symbols) > 0:
2024-02-18 21:52:25 +00:00
_printed_line_jp += symbols[0]
_printed_line_en += symbols[0]
2024-02-18 20:03:55 +00:00
outfile.write(symbols.pop(0))
if len(symbols) <= 0:
break
2024-02-18 16:11:31 +00:00
outfile.write('\n')
2024-02-18 21:52:25 +00:00
print(">", _printed_line_en)
print(">", _printed_line_jp)
2024-02-18 16:11:31 +00:00
print("<", line, end='')
2024-02-18 21:52:25 +00:00
if _printed_line_jp+'\n' != line:
2024-02-20 22:19:40 +00:00
print()
print("NO THAT'S WRONG!")
2024-02-19 18:30:20 +00:00
sys.exit(1)
2024-02-18 16:11:31 +00:00
# Used up all of the structures, this chapter has ended.
# Got to the next one
if len(structure) <= 0:
break
else:
outfile.write(line)