kanji)['reading']; } public function getKanji(): string { return self::parseFurigana($this->kanji)['kanji']; } public static function parseFurigana(string $furigana): array { // 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading) preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER); $matchedKanji = array_map(fn($x) => $x[1], $matches); $matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches); return [ 'kanji' => join('', $matchedKanji), 'reading' => $matchedKanji == $matchedReading ? null : join('', $matchedReading), ]; } public function toAnkiVocabDef() { $ret = '' . $this->kanji; $ret .= match ([null !== $this->definitionJp, null !== $this->definitionEn]) { [false, false] => ':_', [false, true] => ': ' . $this->definitionEn, [true, false] => ':' . $this->definitionJp, [true, true] => ':' . $this->definitionJp . '(' . $this->definitionEn . ')', }; return $ret; } private static function fromVocabDefLine(string $vocabDefLine): ?Term { $term = new Term(); // ------------------------------------------------------ Get Kanji --- $jpStart = mb_strpos($vocabDefLine, ':'); $enStart = mb_strpos($vocabDefLine, ':'); // Get the kanji, as it may not be in the same order for some reason if (false !== $jpStart) { $term->kanji = mb_substr($vocabDefLine, 0, $jpStart); $def = mb_substr($vocabDefLine, $jpStart + 1, null); $jpStart = 0; } elseif (false !== $enStart) { $term->kanji = mb_substr($vocabDefLine, 0, $enStart); $def = mb_substr($vocabDefLine, $enStart + 1, null); $enStart = 0; } else { // Can't extract term from definition, it doesn't conform to the // established pattern. return null; } // Convert 「this」 into [this] $term->kanji = mb_trim(strtr($term->kanji, [ '「' => '[', '」' => ']', ' ' => ' ', ])); $def = mb_trim($def); if (!is_string($term->kanji)) { return null; } // -------------------------------------------------- No definition --- // Special case where there's no definitions if ($def === '' or $def === '_' or $def === '_') { $term->definitionJp = null; $term->definitionEn = null; return $term; } // This means there's both en and jp $parentStart = mb_strpos($def, '('); // -------------------------------------------------- Only Japanese --- if (false !== $jpStart and false === $parentStart) { // It's all japanese, start to finish $term->definitionJp = mb_trim(mb_substr($def, 0)); $term->definitionEn = null; return $term; } // -------------------------------------- Both Japanese and English --- if (false !== $jpStart and false !== $parentStart) { $term->definitionJp = mb_trim(mb_substr($def, 0, $parentStart)); // -1 to remove the parenthesis end $term->definitionEn = mb_trim(mb_substr($def, $parentStart + 1, -1)); return $term; } // --------------------------------------------------- Only english --- if (false !== $enStart) { $term->definitionJp = null; $term->definitionEn = mb_trim(mb_substr($def, 0)); return $term; } // ------------------------------------------------- Unvalid syntax --- dd("Unexpected error, couldn't parse definition line", $vocabDefLine); } /** Turns a def like "" turn into ":" */ private static function fromLegacyLine(string $kanji, string $def): ?Term { // Select appropriate semicolon for the job $separator = preg_match('/[[:alpha:]]/u', $def) ? ': ' : ':'; // Stick the kanji at the start and see if it makes sense return Term::fromVocabDefLine($kanji . $separator . $def); } public static function fromNoteFields(array $fields): ?array { // -------------------- Trying to extract it with the modern syntax --- // 言葉: word // 上げる:上に動くこと。 // 雨:水粒を降ること。(rain) $terms = []; foreach (preg_split('|
|', $fields['VocabDef']) as $line) { $terms[] = self::fromVocabDefLine(strip_tags($line)); }; // If there's no nulls, everything went good if (!in_array(null, $terms, true)) return $terms; // ------------ Extracting failed, try to infer from other syntaxes --- $kanjis = explode('|', $fields['VocabKanji']); $defs = explode('|', $fields['VocabDef']); // Number of legacy definitions is different from number of kanji if (count($kanjis) !== count($defs)) return null; $terms = []; foreach (array_combine($kanjis, $defs) as $kanji => $def) { $terms[] = self::fromLegacyLine($kanji, $def); } // Search for nulls, if found, it's owari da return in_array(null, $terms, true) ? null : $terms; } }