feat: Add korean cards & korean production card creation

This commit is contained in:
Dendy 2025-09-28 09:48:17 +02:00
parent 8f24f23130
commit bc2b3ac0b4
9 changed files with 360 additions and 99 deletions

View File

@ -0,0 +1,72 @@
<?php
namespace App\Command;
use App\Entity\KoreanProductionNote;
use App\Entity\KoreanSentenceNote;
use App\Service\AnkiService;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
#[AsCommand('app:create:korean:production', 'Create new listening Anki Cards')]
class CreateKoreanProductionCommand extends Command
{
public function __construct(
private AnkiService $ankiService,
) {
parent::__construct();
}
protected function configure(): void
{
$this->addArgument(
'count',
InputArgument::REQUIRED,
'Amount of cards to make',
);
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$ksns = $this->ankiService->getAllFromClass(KoreanSentenceNote::class);
$kpns = $this->ankiService->getAllFromClass(KoreanProductionNote::class);
$existentTerms = [];
foreach ($kpns as $productionNote) {
$existentTerms[$productionNote->getTerm()->getKanji()] = null;
}
$newNotesCount = intval($input->getArgument('count'));
$newProductionNotes = [];
foreach ($ksns as $sentenceNote) {
foreach ($sentenceNote->getTerms() as $term) {
$termStr = $term->getKanji();
if (key_exists($termStr, $existentTerms)) continue;
$existentTerms[$term->getKanji()] = null;
$newProductionNotes[] = KoreanProductionNote::fromNote($sentenceNote, $term);
if (count($newProductionNotes) >= $newNotesCount) break 2;
}
}
foreach ($newProductionNotes as $newNote) {
$this->ankiService->addNote($newNote);
}
printf(
<<<FMNT
max usage: %0.2f MiB
current usage: %0.2f MiB\n
FMNT,
memory_get_peak_usage() / 1024 / 1024,
memory_get_usage() / 1024 / 1024,
);
return Command::SUCCESS;
}
}

View File

@ -0,0 +1,75 @@
<?php
namespace App\Entity;
class KoreanProductionNote extends Note
{
const MODEL_NAME = 'Korean production';
const DECK = '한국어::받아쓰기';
private ?array $mediaInfo = [];
private ?Term $term = null;
public static function fromNote(Note $origNote, Term $term): self
{
$slNote = new self();
foreach (get_object_vars($origNote) as $prop => $value) {
$slNote->$prop = $value;
}
// Related fields are updated using the setter
$slNote->setTerm($term);
// Reset relations and basic data
$slNote->id = null;
$slNote->model = self::MODEL_NAME;
$slNote->cardIds = [];
return $slNote;
}
// -------------------------------------------------- Getters & setters ---
public function getTerm(): Term
{
return $this->term;
}
public function setTerm(Term $term): static
{
$this->fields['Vocab'] = $term->getKanji();
$this->fields['VocabDef'] = $term->toAnkiVocabDef();
$this->fields['VocabAudio'] = $term->audio;
$this->fields['Sent'] = Note::stringHighlight(
$this->fields['Sent'],
$term->getKanji(),
);
$this->term = $term;
return $this;
}
// ------------------------------------------------------- Anki-related ---
/** @param array<string, string> $noteInfo */
public static function fromAnki(array $noteInfo): static
{
$note = parent::fromAnki($noteInfo);
if ($note->getModel() !== self::MODEL_NAME) {
throw new \Exception('Trying to parse wrong model');
}
$note->mediaInfo = Note::parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
$note->term = Term::fromNoteFields($note->fields)[0] ?? null;
if ($note->term === null) {
throw new \Exception("Couldn't get term for Listening card");
}
return $note;
}
// ---------------------------------------------------- Derived methods ---
}

View File

@ -0,0 +1,123 @@
<?php
namespace App\Entity;
class KoreanSentenceNote extends Note
{
const MODEL_NAME = 'Korean sentences';
private ?array $mediaInfo = [];
/** @var list<Term> */
private array $terms = [];
// -------------------------------------------------- Getters & setters ---
/** @return list<Term> */
public function getTerms(): array
{
return $this->terms;
}
/** @param list<Term> $terms */
public function setTerms(array $terms): static
{
$this->terms = $terms;
return $this;
}
// ------------------------------------------------------- Anki-related ---
/** @param array<string, string> $noteInfo */
public static function fromAnki(array $noteInfo): static
{
$note = parent::fromAnki($noteInfo);
if ($note->getModel() !== self::MODEL_NAME) {
throw new \Exception('Trying to parse wrong model');
}
$note->mediaInfo = Note::parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
$note->terms = Term::fromNoteFields($note->fields);
// If unable to, create them from the highlighted parts in the sentence
if (empty($note->terms)) {
foreach ($note->getHighlightedKanji() as $highlighedKanji) {
$term = new Term();
$term->kanji = $highlighedKanji;
$term->definitionEn = null;
$term->definitionJp = null;
$note->terms[] = $term;
}
}
return $note;
}
public function toAnki(): array
{
return array_merge(parent::toAnki(), [
'fields' => [
'VocabKanji' => join('', array_map(
fn(Term $x) => $x->getKanji(),
$this->terms,
)),
'VocabFurigana' => join('', array_map(
fn(Term $x) => $x->getReading() ?? '_',
$this->terms,
)),
'VocabDef' => join("<br>\n", array_map(
fn(Term $x) => $x->toAnkiVocabDef(),
$this->terms,
)),
],
]);
}
// ---------------------------------------------------- Derived methods ---
public function hasTerm(string $kanji): bool
{
foreach ($this->terms as $term) {
assert($term instanceof Term);
if ($term->kanji == $kanji) return true;
}
return false;
}
public function isSentKanjiHighlighted(): bool
{
return str_contains(
$this->fields['SentKanji'],
self::HIGHLIGHT_ATTR_KANJI,
);
}
/** Return an array of strings with the highlighted kanji in the SentKanji */
public function getHighlightedKanji(): array
{
$ret = [];
$matches = [];
// 1. Get all spans in the text
preg_match_all(
self::HIGHLIGHT_PATTERN,
$this->fields['SentKanji'],
$matches,
PREG_SET_ORDER,
);
// 2. Check the ones that match with the kanji color
foreach ($matches as $match) {
if ($match[1] === self::HIGHLIGHT_ATTR_KANJI) {
$ret[] = mb_trim($match[2]);
}
}
return $ret;
}
}

View File

@ -98,7 +98,7 @@ class Note
// -------------------------------------------------- Utility functions ---
protected static function stringHighlight(string $haystack, string $needle)
protected static function stringHighlight(string $haystack, string $needle): string
{
$replace = sprintf(
'<span %s>%s</span>',
@ -108,4 +108,47 @@ class Note
return str_replace($needle, $replace, strip_tags($haystack));
}
protected static function parseMediaInfo(string $notes): ?array
{
$matches = null;
// Parse the notes fields. It can be in the form of
// series-name_S01 EP07 (11h22m33s44ms)
// or
// movie-name EP (11h22m33s44ms)
if (1 !== preg_match(
'/(?<name>[0-9A-Za-z\-_]+)(_S)?(?<season>\d*) EP(?<episode>\d*) \((?<time>.*)\)/n',
$notes,
$matches,
)) {
return null;
}
// Remove number-indexed matches, cast numbers to integers
$matches = [
'name' => $matches['name'],
'time' => $matches['time'],
// NOTE: intval returns 0 if not a number, which is false-like
'season' => intval($matches['season']) ?: null,
'episode' => intval($matches['episode']) ?: null,
];
// Parse time into a DateInterval and replace it in the matches array
$time = new \DateInterval('PT0S');
preg_match('/(\d+)ms/', $matches['time'], $milliseconds);
preg_match('/(\d+)s/', $matches['time'], $seconds);
preg_match('/(\d+)m/', $matches['time'], $minutes);
preg_match('/(\d+)h/', $matches['time'], $hours);
if ($milliseconds[1] ?? false) $time->f = $milliseconds[1] * 1000;
if ($seconds[1] ?? false) $time->s = $seconds[1];
if ($minutes[1] ?? false) $time->i = $minutes[1];
if ($hours[1] ?? false) $time->h = $hours[1];
$matches['time'] = $time;
return $matches;
}
}

View File

@ -5,7 +5,7 @@ namespace App\Entity;
class SentenceListeningNote extends Note
{
const MODEL_NAME = 'Japanese sentences listening';
const DECK = 'jp::production';
const DECK = '日本語::漢字';
private ?array $mediaInfo = [];
private ?Term $term = null;
@ -18,11 +18,9 @@ class SentenceListeningNote extends Note
}
public function setTerm(Term $term): static
{
$this->fields['VocabKanji'] = $term->getKanji();
$this->fields['VocabFurigana'] = $term->getReading();
$this->fields['VocabDef'] = $term->toAnkiVocabDef();
$this->fields['SentFurigana'] = ''; // We don't want to keep this
$this->fields['SentKanji'] = $this->stringHighlight(
$this->fields['Vocab'] = $term->getKanji();
$this->fields['VocabDef'] = $term->toAnkiVocabDef();
$this->fields['SentKanji'] = $this->stringHighlight(
$this->fields['SentKanji'],
$term->getKanji(),
);
@ -41,7 +39,7 @@ class SentenceListeningNote extends Note
throw new \Exception('Trying to parse wrong model');
}
$note->mediaInfo = self::parseMediaInfo($note->fields['Notes']);
$note->mediaInfo = Note::parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
$note->term = Term::fromNoteFields($note->fields)[0] ?? null;
@ -84,47 +82,4 @@ class SentenceListeningNote extends Note
self::HIGHLIGHT_ATTR_KANJI,
);
}
private static function parseMediaInfo(string $notes): ?array
{
$matches = null;
// Parse the notes fields. It can be in the form of
// series-name_S01 EP07 (11h22m33s44ms)
// or
// movie-name EP (11h22m33s44ms)
if (1 !== preg_match(
'/(?<name>[0-9A-Za-z\-_]+)(_S)?(?<season>\d*) EP(?<episode>\d*) \((?<time>.*)\)/n',
$notes,
$matches,
)) {
return null;
}
// Remove number-indexed matches, cast numbers to integers
$matches = [
'name' => $matches['name'],
'time' => $matches['time'],
// NOTE: intval returns 0 if not a number, which is false-like
'season' => intval($matches['season']) ?: null,
'episode' => intval($matches['episode']) ?: null,
];
// Parse time into a DateInterval and replace it in the matches array
$time = new \DateInterval('PT0S');
preg_match('/(\d+)ms/', $matches['time'], $milliseconds);
preg_match('/(\d+)s/', $matches['time'], $seconds);
preg_match('/(\d+)m/', $matches['time'], $minutes);
preg_match('/(\d+)h/', $matches['time'], $hours);
if ($milliseconds[1] ?? false) $time->f = $milliseconds[1] * 1000;
if ($seconds[1] ?? false) $time->s = $seconds[1];
if ($minutes[1] ?? false) $time->i = $minutes[1];
if ($hours[1] ?? false) $time->h = $hours[1];
$matches['time'] = $time;
return $matches;
}
}

View File

@ -34,7 +34,7 @@ class SentenceNote extends Note
throw new \Exception('Trying to parse wrong model');
}
$note->mediaInfo = self::parseMediaInfo($note->fields['Notes']);
$note->mediaInfo = Note::parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
$note->terms = Term::fromNoteFields($note->fields);
@ -133,47 +133,4 @@ class SentenceNote extends Note
return $ret;
}
private static function parseMediaInfo(string $notes): ?array
{
$matches = null;
// Parse the notes fields. It can be in the form of
// series-name_S01 EP07 (11h22m33s44ms)
// or
// movie-name EP (11h22m33s44ms)
if (1 !== preg_match(
'/(?<name>[0-9A-Za-z\-_]+)(_S)?(?<season>\d*) EP(?<episode>\d*) \((?<time>.*)\)/n',
$notes,
$matches,
)) {
return null;
}
// Remove number-indexed matches, cast numbers to integers
$matches = [
'name' => $matches['name'],
'time' => $matches['time'],
// NOTE: intval returns 0 if not a number, which is false-like
'season' => intval($matches['season']) ?: null,
'episode' => intval($matches['episode']) ?: null,
];
// Parse time into a DateInterval and replace it in the matches array
$time = new \DateInterval('PT0S');
preg_match('/(\d+)ms/', $matches['time'], $milliseconds);
preg_match('/(\d+)s/', $matches['time'], $seconds);
preg_match('/(\d+)m/', $matches['time'], $minutes);
preg_match('/(\d+)h/', $matches['time'], $hours);
if ($milliseconds[1] ?? false) $time->f = $milliseconds[1] * 1000;
if ($seconds[1] ?? false) $time->s = $seconds[1];
if ($minutes[1] ?? false) $time->i = $minutes[1];
if ($hours[1] ?? false) $time->h = $hours[1];
$matches['time'] = $time;
return $matches;
}
}

View File

@ -7,6 +7,7 @@ class Term
public ?string $kanji;
public ?string $definitionJp;
public ?string $definitionEn;
public ?string $audio;
public function getReading(): ?string
{
@ -20,7 +21,7 @@ class Term
/**
* Get the kanji version & the reading for a given term
*
*
* TODO: Make this smarter & handle mixing of kanji & hiradana
*
* @return array{'kanji': string, 'reading': null|string}
@ -148,6 +149,8 @@ class Term
*/
public static function fromNoteFields(array $fields): array
{
$audios = explode('|', $fields['VocabAudio'] ?? '');
// -------------------- Trying to extract it with the modern syntax ---
// 言葉: word
// 上げる:上に動くこと。
@ -157,13 +160,22 @@ class Term
foreach (preg_split('|<br ?/?>|', $fields['VocabDef']) as $line) {
$terms[] = self::fromVocabDefLine(strip_tags($line));
};
// Assign audio
if (count($audios) === count($terms) and $terms[0] !== null) {
foreach (array_keys($audios) as $key) {
if ($terms[$key] === null) dd($fields);
$terms[$key]->audio = mb_trim(strip_tags($audios[$key]));
}
}
// If there's no nulls, everything went good
if (!in_array(null, $terms, true)) return $terms;
// ------------ Extracting failed, try to infer from other syntaxes ---
$kanjis = explode('', $fields['VocabKanji']);
$kanjis = explode('', $fields['VocabKanji'] ?? $fields['Vocab'] ?? '');
$defs = explode('', $fields['VocabDef']);
// Number of legacy definitions is different from number of kanji
if (count($kanjis) !== count($defs)) return [];

View File

@ -7,7 +7,7 @@ use App\Utils\Number;
class UnicodeNote extends Note
{
const string MODEL_NAME = 'Unicode';
const string DECK = 'jp::unicode';
const string DECK = '日本語::unicode';
private ?int $codepoint = null;
/** @var Term[] */

View File

@ -2,6 +2,8 @@
namespace App\Service;
use App\Entity\KoreanProductionNote;
use App\Entity\KoreanSentenceNote;
use App\Entity\Note;
use App\Entity\SentenceListeningNote;
use App\Entity\SentenceNote;
@ -38,8 +40,10 @@ class AnkiService
/** The note's id is updated on success.
* @return bool True on success
*/
public function addNote(Note &$note, string $deckName): bool
public function addNote(Note &$note, ?string $deckName = null): bool
{
$deckName ??= constant(get_class($note) . '::DECK');
$note->setId($this->request('addNote', ['note' => [
'deckName' => $deckName,
'modelName' => $note->getModel(),
@ -72,6 +76,24 @@ class AnkiService
return $this->request('findNotes', ['query' => $query]);
}
/** @return list<int> */
public function getAllIdsFromClass(string $class): array
{
$query = sprintf('"note:%s"', constant("$class::MODEL_NAME"));
return $this->request('findNotes', ['query' => $query]);
}
/**
* @template T of object
* @param class-string<T> $class
* @return list<T>
*/
public function getAllFromClass(string $class): array
{
$ids = $this->getAllIdsFromClass($class);
return $this->getNotes($ids);
}
public function getAllSentenceNoteIds(): array
{
return $this->request(
@ -119,6 +141,8 @@ class AnkiService
UnicodeNote::MODEL_NAME => UnicodeNote::fromAnki($noteInfo),
SentenceNote::MODEL_NAME => SentenceNote::fromAnki($noteInfo),
SentenceListeningNote::MODEL_NAME => SentenceListeningNote::fromAnki($noteInfo),
KoreanSentenceNote::MODEL_NAME => KoreanSentenceNote::fromAnki($noteInfo),
KoreanProductionNote::MODEL_NAME => KoreanProductionNote::fromAnki($noteInfo),
default => throw new \Exception(sprintf(
'Unrecognized Note "%s" of type "%s"',
$noteInfo['noteId'],