feat: Interpret terms from non-conformant defintions
This commit is contained in:
parent
6fc6307f6e
commit
c19250e29e
|
@ -67,39 +67,7 @@ class Note
|
||||||
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
|
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
|
||||||
|
|
||||||
// Set VocabKanji field
|
// Set VocabKanji field
|
||||||
$terms = Term::fromVocabDef($note->fields['VocabDef']);
|
$terms = Term::fromNoteFields($note->fields);
|
||||||
if (null !== $terms) {
|
|
||||||
$note->terms = $terms;
|
|
||||||
} else {
|
|
||||||
// Something went wrong when trying to parse the definitions into
|
|
||||||
// terms, that means its format is non-conforming. If there's only
|
|
||||||
// one term that means that it's an old one that wasn't updated.
|
|
||||||
if (
|
|
||||||
str_contains($note->fields['VocabKanji'], '|') or
|
|
||||||
str_contains($note->fields['VocabKanji'], '|')
|
|
||||||
) {
|
|
||||||
dump("ERROR: Multiple vocab kanjis with no proper definition.");
|
|
||||||
dd($note->fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mb_trim($note->fields['VocabDef']) === '') {
|
|
||||||
$note->fields['VocabDef'] = '_';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make the "<def>" turn into "<kanji>:<def>". Select the
|
|
||||||
// appropriate semicolon character for each
|
|
||||||
$separator = ':';
|
|
||||||
if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) {
|
|
||||||
$separator = ':';
|
|
||||||
}
|
|
||||||
|
|
||||||
$note->fields['VocabDef'] = $note->fields['VocabKanji']
|
|
||||||
. $separator
|
|
||||||
. $note->fields['VocabDef'];
|
|
||||||
|
|
||||||
$terms = Term::fromVocabDef($note->fields['VocabDef']);
|
|
||||||
$note->terms = $terms ?? dd($note->fields['VocabDef']);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If not defined, find them from the highlighted parts in the sentence
|
// If not defined, find them from the highlighted parts in the sentence
|
||||||
if (empty($note->terms)) {
|
if (empty($note->terms)) {
|
||||||
|
|
|
@ -127,18 +127,46 @@ class Term
|
||||||
dd("Unexpected error, couldn't parse definition line", $vocabDefLine);
|
dd("Unexpected error, couldn't parse definition line", $vocabDefLine);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function fromVocabDef(string $vocabDef): ?array
|
/** Turns a def like "<def>" turn into "<kanji>:<def>" */
|
||||||
|
private static function fromLegacyLine(string $kanji, string $def): ?Term
|
||||||
{
|
{
|
||||||
if (mb_trim($vocabDef) === '') return null;
|
// Select appropriate semicolon for the job
|
||||||
|
$separator = preg_match('/[[:alpha:]]/u', $def)
|
||||||
|
? ': '
|
||||||
|
: ':';
|
||||||
|
|
||||||
|
// Stick the kanji at the start and see if it makes sense
|
||||||
|
return Term::fromVocabDefLine($kanji . $separator . $def);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function fromNoteFields(array $fields): ?array
|
||||||
|
{
|
||||||
|
// -------------------- Trying to extract it with the modern syntax ---
|
||||||
|
// 言葉: word
|
||||||
|
// 上げる:上に動くこと。
|
||||||
|
// 雨:水粒を降ること。(rain)
|
||||||
|
|
||||||
$terms = [];
|
$terms = [];
|
||||||
foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) {
|
foreach (preg_split('|<br ?/?>|', $fields['VocabDef']) as $line) {
|
||||||
$term = self::fromVocabDefLine(strip_tags($line));
|
$terms[] = self::fromVocabDefLine(strip_tags($line));
|
||||||
// Error parsing term, can't parse using vocabDef
|
|
||||||
if (null === $term) return null;
|
|
||||||
$terms[] = $term;
|
|
||||||
};
|
};
|
||||||
|
// If there's no nulls, everything went good
|
||||||
|
if (!in_array(null, $terms, true)) return $terms;
|
||||||
|
|
||||||
return $terms;
|
|
||||||
|
// ------------ Extracting failed, try to infer from other syntaxes ---
|
||||||
|
|
||||||
|
$kanjis = explode('|', $fields['VocabKanji']);
|
||||||
|
$defs = explode('|', $fields['VocabDef']);
|
||||||
|
// Number of legacy definitions is different from number of kanji
|
||||||
|
if (count($kanjis) !== count($defs)) return null;
|
||||||
|
|
||||||
|
$terms = [];
|
||||||
|
foreach (array_combine($kanjis, $defs) as $kanji => $def) {
|
||||||
|
$terms[] = self::fromLegacyLine($kanji, $def);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search for nulls, if found, it's owari da
|
||||||
|
return in_array(null, $terms, true) ? null : $terms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue