#!/usr/bin/env python3 from enum import Enum class TokenType(Enum): # Command tokens represent all non dialogue keywords # If the commands are chained without text inbetween # they will be parsed as a single token COMMAND = 1 TEXT = 2 COMMENT = 3 class ScripterToken(): def __init__(self, token: str, type: TokenType): self.token = token self.type = type def __str__(self): if self.type == TokenType.TEXT: color = '\033[34m' elif self.type == TokenType.COMMAND: color = '\033[32m' elif self.type == TokenType.COMMENT: color = '\033[36m' return f'{color}{self.token}\033[0m' def __repr__(self): return self.__str__() # Parse and tokenize an Nscripter script line in japanese mode # The original japanese mode diferentiates between single and # double byte characters def parse_line(line: str) -> list[ScripterToken]: current_token = ScripterToken('', TokenType.COMMAND) token_list = [] for i, char in enumerate(line): # Comments signify the end of what should be parsed # Newline does not constitute a token, skip if char == '\n': break if char == ';': token_list.append(current_token) # [:-1] to remove the newline current_token = ScripterToken(line[i:-1], TokenType.COMMENT) break # First character of the token if len(current_token.token) == 0: if is_double_width(char): current_token.type = TokenType.TEXT else: current_token.type = TokenType.COMMAND current_token.token += char else: # End of token if current_token.type == TokenType.COMMAND and is_double_width(char): # Add to list and reset the current one token_list.append(current_token) current_token = ScripterToken('', TokenType.TEXT) elif current_token.type == TokenType.TEXT and not is_double_width(char): # Add to list and reset the current one token_list.append(current_token) current_token = ScripterToken('', TokenType.COMMAND) current_token.token += char # Append last token of the line token_list.append(current_token) return token_list def is_double_width(char: str) -> bool: japanese_ranges = [ (0x4E00, 0x9FFF), # Kanji (0x3040, 0x309F), # Hiragana (0x30A0, 0x30FF), # Katakana (0xFF00, 0xFFEF), # Full-width Roman characters and symbols (0x3000, 0x303F), # CJK symbols and punctuation (including 「」) (0x201c, 0x201d), # The characters “ ” (0x2026, 0x2026), # The character … (9734, 9734), # ☆ (Nscripter treats it like a dw character) (215, 215), # × (treated as a command when it shouldnt) ] for start, end in japanese_ranges: if 0xFF < ord(char[0]) or start <= ord(char[0]) <= end: return True return False