hinamizawa-downporter/src/scripter.py

100 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
from enum import Enum
class TokenType(Enum):
# Command tokens represent all non dialogue keywords
# If the commands are chained without text inbetween
# they will be parsed as a single token
COMMAND = 1
TEXT = 2
COMMENT = 3
class ScripterToken():
def __init__(self, token: str, type: TokenType):
self.token = token
self.type = type
def __str__(self):
if self.type == TokenType.TEXT:
color = '\033[34m'
elif self.type == TokenType.COMMAND:
color = '\033[32m'
elif self.type == TokenType.COMMENT:
color = '\033[36m'
return f'{color}{self.token}\033[0m'
def __repr__(self):
return self.__str__()
# Parse and tokenize an Nscripter script line in japanese mode
# The original japanese mode diferentiates between single and
# double byte characters
def parse_line(line: str) -> list[ScripterToken]:
current_token = ScripterToken('', TokenType.COMMAND)
token_list = []
for i, char in enumerate(line):
# Comments signify the end of what should be parsed
# Newline does not constitute a token, skip
if char == '\n':
break
if char == ';':
token_list.append(current_token)
# [:-1] to remove the newline
current_token = ScripterToken(line[i:-1], TokenType.COMMENT)
break
# First character of the token
if len(current_token.token) == 0:
if is_double_width(char):
current_token.type = TokenType.TEXT
else:
current_token.type = TokenType.COMMAND
current_token.token += char
else:
# End of token
if current_token.type == TokenType.COMMAND and is_double_width(char):
# Add to list and reset the current one
token_list.append(current_token)
current_token = ScripterToken('', TokenType.TEXT)
elif current_token.type == TokenType.TEXT and not is_double_width(char):
# Add to list and reset the current one
token_list.append(current_token)
current_token = ScripterToken('', TokenType.COMMAND)
current_token.token += char
# Append last token of the line
token_list.append(current_token)
return token_list
def is_double_width(char: str) -> bool:
japanese_ranges = [
(0x4E00, 0x9FFF), # Kanji
(0x3040, 0x309F), # Hiragana
(0x30A0, 0x30FF), # Katakana
(0xFF00, 0xFFEF), # Full-width Roman characters and symbols
(0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
(0x201c, 0x201d), # The characters “ ”
(0x2026, 0x2026), # The character …
(9734, 9734), # ☆ (Nscripter treats it like a dw character)
(215, 215), # × (treated as a command when it shouldnt)
]
for start, end in japanese_ranges:
if 0xFF < ord(char[0]) or start <= ord(char[0]) <= end:
return True
return False