feat: More intelligent term detection mechanism supporting plain def.
This commit is contained in:
parent
9a59845473
commit
ee2525797e
|
@ -67,8 +67,39 @@ class Note
|
|||
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
|
||||
|
||||
// Set VocabKanji field
|
||||
//$vocabKanji = explode('|', $note->fields['VocabKanji']);
|
||||
$note->terms = self::parseVocabDef($note->fields['VocabDef']);
|
||||
$terms = self::parseVocabDef($note->fields['VocabDef']);
|
||||
if (null !== $terms) {
|
||||
$note->terms = $terms;
|
||||
} else {
|
||||
// Something went wrong when trying to parse the definitions into
|
||||
// terms, that means its format is non-conforming. If there's only
|
||||
// one term that means that it's an old one that wasn't updated.
|
||||
if (
|
||||
str_contains($note->fields['VocabKanji'], '|') or
|
||||
str_contains($note->fields['VocabKanji'], '|')
|
||||
) {
|
||||
dump("ERROR: Multiple vocab kanjis with no proper definition.");
|
||||
dd($note->fields);
|
||||
}
|
||||
|
||||
if (mb_trim($note->fields['VocabDef']) === '') {
|
||||
$note->fields['VocabDef'] = '_';
|
||||
}
|
||||
|
||||
// Make the "<def>" turn into "<kanji>:<def>". Select the
|
||||
// appropriate semicolon character for each
|
||||
$separator = ':';
|
||||
if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) {
|
||||
$separator = ':';
|
||||
}
|
||||
|
||||
$note->fields['VocabDef'] = $note->fields['VocabKanji']
|
||||
. $separator
|
||||
. $note->fields['VocabDef'];
|
||||
|
||||
$terms = self::parseVocabDef($note->fields['VocabDef']);
|
||||
$note->terms = $terms ?? dd($note->fields['VocabDef']);
|
||||
}
|
||||
|
||||
// If not defined, find them from the highlighted parts in the sentence
|
||||
if (empty($note->terms)) {
|
||||
|
@ -130,15 +161,15 @@ class Note
|
|||
];
|
||||
}
|
||||
|
||||
public static function parseVocabDef(string $vocabDef): array
|
||||
public static function parseVocabDef(string $vocabDef): ?array
|
||||
{
|
||||
if (mb_trim($vocabDef) == "") return [];
|
||||
if (mb_trim($vocabDef) === '') return null;
|
||||
|
||||
$terms = [];
|
||||
|
||||
foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) {
|
||||
$term = Term::fromVocabDefLine(strip_tags($line));
|
||||
if (null === $term) dd("error parsing term", $line);
|
||||
// Error parsing term, can't parse using vocabDef
|
||||
if (null === $term) return null;
|
||||
$terms[] = $term;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue