feat: More intelligent term detection mechanism supporting plain def.

This commit is contained in:
Dendy 2025-02-06 19:06:41 +09:00
parent 9a59845473
commit ee2525797e
1 changed files with 37 additions and 6 deletions

View File

@ -67,8 +67,39 @@ class Note
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
//$vocabKanji = explode('', $note->fields['VocabKanji']);
$note->terms = self::parseVocabDef($note->fields['VocabDef']);
$terms = self::parseVocabDef($note->fields['VocabDef']);
if (null !== $terms) {
$note->terms = $terms;
} else {
// Something went wrong when trying to parse the definitions into
// terms, that means its format is non-conforming. If there's only
// one term that means that it's an old one that wasn't updated.
if (
str_contains($note->fields['VocabKanji'], '') or
str_contains($note->fields['VocabKanji'], '|')
) {
dump("ERROR: Multiple vocab kanjis with no proper definition.");
dd($note->fields);
}
if (mb_trim($note->fields['VocabDef']) === '') {
$note->fields['VocabDef'] = '_';
}
// Make the "<def>" turn into "<kanji>:<def>". Select the
// appropriate semicolon character for each
$separator = '';
if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) {
$separator = ':';
}
$note->fields['VocabDef'] = $note->fields['VocabKanji']
. $separator
. $note->fields['VocabDef'];
$terms = self::parseVocabDef($note->fields['VocabDef']);
$note->terms = $terms ?? dd($note->fields['VocabDef']);
}
// If not defined, find them from the highlighted parts in the sentence
if (empty($note->terms)) {
@ -130,15 +161,15 @@ class Note
];
}
public static function parseVocabDef(string $vocabDef): array
public static function parseVocabDef(string $vocabDef): ?array
{
if (mb_trim($vocabDef) == "") return [];
if (mb_trim($vocabDef) === '') return null;
$terms = [];
foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) {
$term = Term::fromVocabDefLine(strip_tags($line));
if (null === $term) dd("error parsing term", $line);
// Error parsing term, can't parse using vocabDef
if (null === $term) return null;
$terms[] = $term;
};