feat: More intelligent term detection mechanism supporting plain def.
This commit is contained in:
parent
9a59845473
commit
ee2525797e
|
@ -67,8 +67,39 @@ class Note
|
||||||
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
|
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
|
||||||
|
|
||||||
// Set VocabKanji field
|
// Set VocabKanji field
|
||||||
//$vocabKanji = explode('|', $note->fields['VocabKanji']);
|
$terms = self::parseVocabDef($note->fields['VocabDef']);
|
||||||
$note->terms = self::parseVocabDef($note->fields['VocabDef']);
|
if (null !== $terms) {
|
||||||
|
$note->terms = $terms;
|
||||||
|
} else {
|
||||||
|
// Something went wrong when trying to parse the definitions into
|
||||||
|
// terms, that means its format is non-conforming. If there's only
|
||||||
|
// one term that means that it's an old one that wasn't updated.
|
||||||
|
if (
|
||||||
|
str_contains($note->fields['VocabKanji'], '|') or
|
||||||
|
str_contains($note->fields['VocabKanji'], '|')
|
||||||
|
) {
|
||||||
|
dump("ERROR: Multiple vocab kanjis with no proper definition.");
|
||||||
|
dd($note->fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mb_trim($note->fields['VocabDef']) === '') {
|
||||||
|
$note->fields['VocabDef'] = '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the "<def>" turn into "<kanji>:<def>". Select the
|
||||||
|
// appropriate semicolon character for each
|
||||||
|
$separator = ':';
|
||||||
|
if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) {
|
||||||
|
$separator = ':';
|
||||||
|
}
|
||||||
|
|
||||||
|
$note->fields['VocabDef'] = $note->fields['VocabKanji']
|
||||||
|
. $separator
|
||||||
|
. $note->fields['VocabDef'];
|
||||||
|
|
||||||
|
$terms = self::parseVocabDef($note->fields['VocabDef']);
|
||||||
|
$note->terms = $terms ?? dd($note->fields['VocabDef']);
|
||||||
|
}
|
||||||
|
|
||||||
// If not defined, find them from the highlighted parts in the sentence
|
// If not defined, find them from the highlighted parts in the sentence
|
||||||
if (empty($note->terms)) {
|
if (empty($note->terms)) {
|
||||||
|
@ -130,15 +161,15 @@ class Note
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function parseVocabDef(string $vocabDef): array
|
public static function parseVocabDef(string $vocabDef): ?array
|
||||||
{
|
{
|
||||||
if (mb_trim($vocabDef) == "") return [];
|
if (mb_trim($vocabDef) === '') return null;
|
||||||
|
|
||||||
$terms = [];
|
$terms = [];
|
||||||
|
|
||||||
foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) {
|
foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) {
|
||||||
$term = Term::fromVocabDefLine(strip_tags($line));
|
$term = Term::fromVocabDefLine(strip_tags($line));
|
||||||
if (null === $term) dd("error parsing term", $line);
|
// Error parsing term, can't parse using vocabDef
|
||||||
|
if (null === $term) return null;
|
||||||
$terms[] = $term;
|
$terms[] = $term;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue