100 lines
3.0 KiB
Python
100 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
||
|
||
from enum import Enum
|
||
|
||
|
||
class TokenType(Enum):
|
||
# Command tokens represent all non dialogue keywords
|
||
# If the commands are chained without text inbetween
|
||
# they will be parsed as a single token
|
||
COMMAND = 1
|
||
TEXT = 2
|
||
COMMENT = 3
|
||
|
||
class ScripterToken():
|
||
def __init__(self, token: str, type: TokenType):
|
||
self.token = token
|
||
self.type = type
|
||
|
||
def __str__(self):
|
||
if self.type == TokenType.TEXT:
|
||
color = '\033[34m'
|
||
elif self.type == TokenType.COMMAND:
|
||
color = '\033[32m'
|
||
elif self.type == TokenType.COMMENT:
|
||
color = '\033[36m'
|
||
return f'{color}{self.token}\033[0m'
|
||
|
||
def __repr__(self):
|
||
return self.__str__()
|
||
|
||
|
||
# Parse and tokenize an Nscripter script line in japanese mode
|
||
# The original japanese mode diferentiates between single and
|
||
# double byte characters
|
||
def parse_line(line: str) -> list[ScripterToken]:
|
||
current_token = ScripterToken('', TokenType.COMMAND)
|
||
token_list = []
|
||
|
||
for i, char in enumerate(line):
|
||
# Comments signify the end of what should be parsed
|
||
# Newline does not constitute a token, skip
|
||
if char == '\n':
|
||
break
|
||
|
||
if char == ';':
|
||
token_list.append(current_token)
|
||
# [:-1] to remove the newline
|
||
current_token = ScripterToken(line[i:-1], TokenType.COMMENT)
|
||
break
|
||
|
||
# First character of the token
|
||
if len(current_token.token) == 0:
|
||
if is_double_width(char):
|
||
current_token.type = TokenType.TEXT
|
||
else:
|
||
current_token.type = TokenType.COMMAND
|
||
current_token.token += char
|
||
|
||
else:
|
||
# End of token
|
||
if current_token.type == TokenType.COMMAND and is_double_width(char):
|
||
# Add to list and reset the current one
|
||
token_list.append(current_token)
|
||
current_token = ScripterToken('', TokenType.TEXT)
|
||
elif current_token.type == TokenType.TEXT and not is_double_width(char):
|
||
# Add to list and reset the current one
|
||
token_list.append(current_token)
|
||
current_token = ScripterToken('', TokenType.COMMAND)
|
||
|
||
current_token.token += char
|
||
|
||
# Append last token of the line
|
||
token_list.append(current_token)
|
||
|
||
|
||
return token_list
|
||
|
||
|
||
|
||
|
||
def is_double_width(char: str) -> bool:
|
||
|
||
japanese_ranges = [
|
||
(0x4E00, 0x9FFF), # Kanji
|
||
(0x3040, 0x309F), # Hiragana
|
||
(0x30A0, 0x30FF), # Katakana
|
||
(0xFF00, 0xFFEF), # Full-width Roman characters and symbols
|
||
(0x3000, 0x303F), # CJK symbols and punctuation (including 「」)
|
||
(0x201c, 0x201d), # The characters “ ”
|
||
(0x2026, 0x2026), # The character …
|
||
(9734, 9734), # ☆ (Nscripter treats it like a dw character)
|
||
(215, 215), # × (treated as a command when it shouldnt)
|
||
]
|
||
|
||
for start, end in japanese_ranges:
|
||
if 0xFF < ord(char[0]) or start <= ord(char[0]) <= end:
|
||
return True
|
||
|
||
return False
|