]*)>(.*?)<\/span>/i'; const HIGHLIGHT_ATTR_KANJI = 'style="color: rgb(255, 78, 8);"'; public function getId(): int { return $this->id; } public function hasTerm(string $kanji): bool { foreach ($this->terms as $term) { assert($term instanceof Term); if ($term->kanji == $kanji) return true; } return false; } public static function fromAnki(array $noteInfo): self { $note = new self(); [ 'noteId' => $note->id, 'mod' => $note->mod, 'profile' => $note->profile, 'tags' => $note->tags, 'modelName' => $note->model, 'cards' => $note->cardIds, ] = $noteInfo; // the fields array key value comes with an order fields that is // already maintained by PHP since arrays are ordered dictionaries. // So we can safely just drop it. // // REVIEW: Having said that, maybe ordering the array before throwing // the order would be advisable. $note->fields = array_map(fn($x) => $x['value'], $noteInfo['fields']); $note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']); // Set VocabKanji field $note->terms = Term::fromNoteFields($note->fields); // If not defined, find them from the highlighted parts in the sentence if (empty($note->terms)) { // 1. Get all spans in the text preg_match_all( self::HIGHLIGHT_PATTERN, $note->fields['SentKanji'], $matches, PREG_SET_ORDER, ); // 2. Check the ones that match with the kanji color foreach ($matches as $match) { if ($match[1] === self::HIGHLIGHT_ATTR_KANJI) { $term = new Term(); $term->kanji = mb_trim($match[2]); $term->definitionEn = null; $term->definitionJp = null; $note->terms[] = $term; } } } // Set to null whatever is null $readings = array_map( fn($x) => in_array($x, ['_', '_', '']) ? null : $x, explode('|', $note->fields['VocabFurigana']), ); // Set readings from furigana field foreach ($note->terms as $key => &$term) { if (null === $term->getReading()) { if (null !== ($readings[$key] ?? null)) { $term->kanji .= '[' . $readings[$key] . ']'; } } } return $note; } public function toAnki(): array { return [ 'id' => $this->id, 'fields' => [ 'VocabKanji' => join('|', array_map( fn(Term $x) => $x->getKanji(), $this->terms, )), 'VocabFurigana' => join('|', array_map( fn(Term $x) => $x->getReading() ?? '_', $this->terms, )), 'VocabDef' => join("
\n", array_map( fn(Term $x) => $x->toAnkiVocabDef(), $this->terms, )), ], ]; } public function parseMediaInfo(string $notes): ?array { $matches = null; // Parse the notes fields. It can be in the form of // series-name_S01 EP07 (11h22m33s44ms) // or // movie-name EP (11h22m33s44ms) if (1 !== preg_match( '/(?[a-z\-_]+)(_S)?(?\d*) EP(?\d*) \((?