kanji)['reading']; } public function getKanji(): string { return self::parseFurigana($this->kanji)['kanji']; } public static function parseFurigana(string $furigana): array { // 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading) preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER); $matchedKanji = array_map(fn($x) => $x[1], $matches); $matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches); return [ 'kanji' => join('', $matchedKanji), 'reading' => $matchedKanji == $matchedReading ? null : join('', $matchedReading), ]; } public function toAnkiVocabDef() { $ret = '' . $this->kanji; $ret .= match ([null !== $this->definitionJp, null !== $this->definitionEn]) { [false, false] => ':_', [false, true] => ': ' . $this->definitionEn, [true, false] => ':' . $this->definitionJp, [true, true] => ':' . $this->definitionJp . '(' . $this->definitionEn . ')', }; return $ret; } private static function fromVocabDefLine(string $vocabDefLine): ?Term { $term = new Term(); // ------------------------------------------------------ Get Kanji --- $jpStart = mb_strpos($vocabDefLine, ':'); $enStart = mb_strpos($vocabDefLine, ':'); // Get the kanji, as it may not be in the same order for some reason if (false !== $jpStart) { $term->kanji = mb_substr($vocabDefLine, 0, $jpStart); $def = mb_substr($vocabDefLine, $jpStart + 1, null); $jpStart = 0; } elseif (false !== $enStart) { $term->kanji = mb_substr($vocabDefLine, 0, $enStart); $def = mb_substr($vocabDefLine, $enStart + 1, null); $enStart = 0; } else { // Can't extract term from definition, it doesn't conform to the // established pattern. return null; } // Convert 「this」 into [this] $term->kanji = mb_trim(strtr($term->kanji, [ '「' => '[', '」' => ']', ' ' => ' ', ])); $def = mb_trim($def); if (!is_string($term->kanji)) { return null; } // -------------------------------------------------- No definition --- // Special case where there's no definitions if ($def === '' or $def === '_' or $def === '_') { $term->definitionJp = null; $term->definitionEn = null; return $term; } // This means there's both en and jp $parentStart = mb_strpos($def, '('); // -------------------------------------------------- Only Japanese --- if (false !== $jpStart and false === $parentStart) { // It's all japanese, start to finish $term->definitionJp = mb_trim(mb_substr($def, 0)); $term->definitionEn = null; return $term; } // -------------------------------------- Both Japanese and English --- if (false !== $jpStart and false !== $parentStart) { $term->definitionJp = mb_trim(mb_substr($def, 0, $parentStart)); // -1 to remove the parenthesis end $term->definitionEn = mb_trim(mb_substr($def, $parentStart + 1, -1)); return $term; } // --------------------------------------------------- Only english --- if (false !== $enStart) { $term->definitionJp = null; $term->definitionEn = mb_trim(mb_substr($def, 0)); return $term; } // ------------------------------------------------- Unvalid syntax --- dd("Unexpected error, couldn't parse definition line", $vocabDefLine); } public static function fromVocabDef(string $vocabDef): ?array { if (mb_trim($vocabDef) === '') return null; $terms = []; foreach (preg_split('|
|', $vocabDef) as $line) { $term = self::fromVocabDefLine(strip_tags($line)); // Error parsing term, can't parse using vocabDef if (null === $term) return null; $terms[] = $term; }; return $terms; } }