From ee2525797ebab23d8037cb0686fe892f8069576c Mon Sep 17 00:00:00 2001 From: Dendy Faist Date: Thu, 6 Feb 2025 19:06:41 +0900 Subject: [PATCH] feat: More intelligent term detection mechanism supporting plain def. --- src/Entity/Note.php | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/src/Entity/Note.php b/src/Entity/Note.php index 158e90e..be7bde6 100644 --- a/src/Entity/Note.php +++ b/src/Entity/Note.php @@ -67,8 +67,39 @@ class Note $note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']); // Set VocabKanji field - //$vocabKanji = explode('|', $note->fields['VocabKanji']); - $note->terms = self::parseVocabDef($note->fields['VocabDef']); + $terms = self::parseVocabDef($note->fields['VocabDef']); + if (null !== $terms) { + $note->terms = $terms; + } else { + // Something went wrong when trying to parse the definitions into + // terms, that means its format is non-conforming. If there's only + // one term that means that it's an old one that wasn't updated. + if ( + str_contains($note->fields['VocabKanji'], '|') or + str_contains($note->fields['VocabKanji'], '|') + ) { + dump("ERROR: Multiple vocab kanjis with no proper definition."); + dd($note->fields); + } + + if (mb_trim($note->fields['VocabDef']) === '') { + $note->fields['VocabDef'] = '_'; + } + + // Make the "" turn into ":". Select the + // appropriate semicolon character for each + $separator = ':'; + if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) { + $separator = ':'; + } + + $note->fields['VocabDef'] = $note->fields['VocabKanji'] + . $separator + . $note->fields['VocabDef']; + + $terms = self::parseVocabDef($note->fields['VocabDef']); + $note->terms = $terms ?? dd($note->fields['VocabDef']); + } // If not defined, find them from the highlighted parts in the sentence if (empty($note->terms)) { @@ -130,15 +161,15 @@ class Note ]; } - public static function parseVocabDef(string $vocabDef): array + public static function parseVocabDef(string $vocabDef): ?array { - if (mb_trim($vocabDef) == "") return []; + if (mb_trim($vocabDef) === '') return null; $terms = []; - foreach (preg_split('|
|', $vocabDef) as $line) { $term = Term::fromVocabDefLine(strip_tags($line)); - if (null === $term) dd("error parsing term", $line); + // Error parsing term, can't parse using vocabDef + if (null === $term) return null; $terms[] = $term; };