]*)>(.*?)<\/span>/i';
const HIGHLIGHT_ATTR_KANJI = 'style="color: rgb(255, 78, 8);"';
public function getId(): int
{
return $this->id;
}
public function hasTerm(string $kanji): bool
{
foreach ($this->terms as $term) {
assert($term instanceof Term);
if ($term->kanji == $kanji) return true;
}
return false;
}
public static function fromAnki(array $noteInfo): self
{
$note = new self();
[
'noteId' => $note->id,
'mod' => $note->mod,
'profile' => $note->profile,
'tags' => $note->tags,
'modelName' => $note->model,
'cards' => $note->cardIds,
] = $noteInfo;
// the fields array key value comes with an order fields that is
// already maintained by PHP since arrays are ordered dictionaries.
// So we can safely just drop it.
//
// REVIEW: Having said that, maybe ordering the array before throwing
// the order would be advisable.
$note->fields = array_map(fn($x) => $x['value'], $noteInfo['fields']);
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
$note->terms = Term::fromNoteFields($note->fields);
// If not defined, find them from the highlighted parts in the sentence
if (empty($note->terms)) {
// 1. Get all spans in the text
preg_match_all(
self::HIGHLIGHT_PATTERN,
$note->fields['SentKanji'],
$matches,
PREG_SET_ORDER,
);
// 2. Check the ones that match with the kanji color
foreach ($matches as $match) {
if ($match[1] === self::HIGHLIGHT_ATTR_KANJI) {
$term = new Term();
$term->kanji = mb_trim($match[2]);
$term->definitionEn = null;
$term->definitionJp = null;
$note->terms[] = $term;
}
}
}
// Set to null whatever is null
$readings = array_map(
fn($x) => in_array($x, ['_', '_', '']) ? null : $x,
explode('|', $note->fields['VocabFurigana']),
);
// Set readings from furigana field
foreach ($note->terms as $key => &$term) {
if (null === $term->getReading()) {
if (null !== ($readings[$key] ?? null)) {
$term->kanji .= '[' . $readings[$key] . ']';
}
}
}
return $note;
}
public function toAnki(): array
{
return [
'id' => $this->id,
'fields' => [
'VocabKanji' => join('|', array_map(
fn(Term $x) => $x->getKanji(),
$this->terms,
)),
'VocabFurigana' => join('|', array_map(
fn(Term $x) => $x->getReading() ?? '_',
$this->terms,
)),
'VocabDef' => join(" \n", array_map(
fn(Term $x) => $x->toAnkiVocabDef(),
$this->terms,
)),
],
];
}
public function parseMediaInfo(string $notes): ?array
{
$matches = null;
// Parse the notes fields. It can be in the form of
// series-name_S01 EP07 (11h22m33s44ms)
// or
// movie-name EP (11h22m33s44ms)
if (1 !== preg_match(
'/(?[a-z\-_]+)(_S)?(?\d*) EP(?\d*) \((?