diff --git a/src/Entity/Note.php b/src/Entity/Note.php index bd44a8f..32bfbf0 100644 --- a/src/Entity/Note.php +++ b/src/Entity/Note.php @@ -67,39 +67,7 @@ class Note $note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']); // Set VocabKanji field - $terms = Term::fromVocabDef($note->fields['VocabDef']); - if (null !== $terms) { - $note->terms = $terms; - } else { - // Something went wrong when trying to parse the definitions into - // terms, that means its format is non-conforming. If there's only - // one term that means that it's an old one that wasn't updated. - if ( - str_contains($note->fields['VocabKanji'], '|') or - str_contains($note->fields['VocabKanji'], '|') - ) { - dump("ERROR: Multiple vocab kanjis with no proper definition."); - dd($note->fields); - } - - if (mb_trim($note->fields['VocabDef']) === '') { - $note->fields['VocabDef'] = '_'; - } - - // Make the "" turn into ":". Select the - // appropriate semicolon character for each - $separator = ':'; - if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) { - $separator = ':'; - } - - $note->fields['VocabDef'] = $note->fields['VocabKanji'] - . $separator - . $note->fields['VocabDef']; - - $terms = Term::fromVocabDef($note->fields['VocabDef']); - $note->terms = $terms ?? dd($note->fields['VocabDef']); - } + $terms = Term::fromNoteFields($note->fields); // If not defined, find them from the highlighted parts in the sentence if (empty($note->terms)) { diff --git a/src/Entity/Term.php b/src/Entity/Term.php index df8e70d..36a6374 100644 --- a/src/Entity/Term.php +++ b/src/Entity/Term.php @@ -127,18 +127,46 @@ class Term dd("Unexpected error, couldn't parse definition line", $vocabDefLine); } - public static function fromVocabDef(string $vocabDef): ?array + /** Turns a def like "" turn into ":" */ + private static function fromLegacyLine(string $kanji, string $def): ?Term { - if (mb_trim($vocabDef) === '') return null; + // Select appropriate semicolon for the job + $separator = preg_match('/[[:alpha:]]/u', $def) + ? ': ' + : ':'; + + // Stick the kanji at the start and see if it makes sense + return Term::fromVocabDefLine($kanji . $separator . $def); + } + + public static function fromNoteFields(array $fields): ?array + { + // -------------------- Trying to extract it with the modern syntax --- + // 言葉: word + // 上げる:上に動くこと。 + // 雨:水粒を降ること。(rain) $terms = []; - foreach (preg_split('|
|', $vocabDef) as $line) { - $term = self::fromVocabDefLine(strip_tags($line)); - // Error parsing term, can't parse using vocabDef - if (null === $term) return null; - $terms[] = $term; + foreach (preg_split('|
|', $fields['VocabDef']) as $line) { + $terms[] = self::fromVocabDefLine(strip_tags($line)); }; + // If there's no nulls, everything went good + if (!in_array(null, $terms, true)) return $terms; - return $terms; + + // ------------ Extracting failed, try to infer from other syntaxes --- + + $kanjis = explode('|', $fields['VocabKanji']); + $defs = explode('|', $fields['VocabDef']); + // Number of legacy definitions is different from number of kanji + if (count($kanjis) !== count($defs)) return null; + + $terms = []; + foreach (array_combine($kanjis, $defs) as $kanji => $def) { + $terms[] = self::fromLegacyLine($kanji, $def); + } + + // Search for nulls, if found, it's owari da + return in_array(null, $terms, true) ? null : $terms; } }