272 lines
9.4 KiB
PHP
272 lines
9.4 KiB
PHP
<?php
|
||
|
||
namespace App\Command;
|
||
|
||
use App\Entity\SentenceListeningNote;
|
||
use App\Entity\SentenceNote;
|
||
use App\Entity\Term;
|
||
use App\Service\AnkiService;
|
||
use App\Utils\Japanese;
|
||
use App\Utils\Progress;
|
||
|
||
use Symfony\Component\Console\Attribute\AsCommand;
|
||
use Symfony\Component\Console\Command\Command;
|
||
use Symfony\Component\Console\Input\InputArgument;
|
||
use Symfony\Component\Console\Input\InputInterface;
|
||
use Symfony\Component\Console\Output\OutputInterface;
|
||
|
||
#[AsCommand('app:create:listening', 'Create new listening Anki Cards')]
|
||
class CreateProductionCommand extends Command
|
||
{
|
||
public function __construct(
|
||
private AnkiService $ankiService,
|
||
) {
|
||
parent::__construct();
|
||
}
|
||
|
||
/** @return list<string> */
|
||
private static function extractKanji(string $str): array
|
||
{
|
||
preg_match_all('/\p{Script=Han}/u', $str, $matches);
|
||
return array_unique($matches[0]);
|
||
}
|
||
|
||
private static function getOnlyKanji(string $str): string
|
||
{
|
||
return preg_replace('/[^\p{Script=Han}]/u', '', $str);
|
||
}
|
||
|
||
/** @param array<string, 0> $ref */
|
||
private static function kanjiDiff(array &$ref, string $subject): bool
|
||
{
|
||
$len = mb_strlen($subject);
|
||
$hasUnseenKanji = false;
|
||
|
||
for ($i = 0; $i < $len; $i++) {
|
||
$subKanji = mb_substr($subject, $i, 1);
|
||
|
||
foreach (array_keys($ref) as $refKanji) {
|
||
if ($subKanji === $refKanji) continue 2;
|
||
}
|
||
|
||
$ref[$subKanji] = 0;
|
||
$hasUnseenKanji = true;
|
||
}
|
||
|
||
return $hasUnseenKanji;
|
||
}
|
||
|
||
protected function configure(): void
|
||
{
|
||
$this->addArgument(
|
||
'count',
|
||
InputArgument::REQUIRED,
|
||
'Amount of cards to make',
|
||
);
|
||
}
|
||
|
||
protected function createProductionNoteFromTerm(Term $term): void
|
||
{
|
||
$noteIds = $this->ankiService->findNotesIds(sprintf(
|
||
'"SentKanji:*%s*" "note:%s"',
|
||
$term->getKanji(),
|
||
SentenceNote::MODEL_NAME,
|
||
));
|
||
|
||
if (count($noteIds) <= 0) {
|
||
$noteIds = $this->ankiService->findNotesIds(sprintf(
|
||
'"VocabKanji:*%s*" "note:%s"',
|
||
$term->getKanji(),
|
||
SentenceNote::MODEL_NAME,
|
||
));
|
||
}
|
||
|
||
$sNote = $this->ankiService->getNote($noteIds[array_key_last($noteIds)]);
|
||
|
||
$newSlNote = SentenceListeningNote::fromNote($sNote, $term);
|
||
if (!$this->ankiService->addNote($newSlNote, 'production')) {
|
||
throw new \Exception('Failed to add note!');
|
||
}
|
||
}
|
||
|
||
/** @return list<SentenceNote> */
|
||
private function getAllSentenceNotes(): array
|
||
{
|
||
printf('Getting all SentenceNote...');
|
||
$allIds = $this->ankiService->getAllSentenceNoteIds();
|
||
$allNotes = $this->ankiService->getNotes($allIds);
|
||
printf(" OK (%d)\n", count($allNotes));
|
||
return $allNotes;
|
||
}
|
||
|
||
/** @return list<SentenceListeningNote> */
|
||
private function getAllSentenceListeningNotes(): array
|
||
{
|
||
printf('Getting all SentenceListeningNote...');
|
||
$allListeningIds = $this->ankiService->getAllSentenceListeningNoteIds();
|
||
$allListeningNotes = $this->ankiService->getNotes($allListeningIds);
|
||
printf(" OK (%d)\n", count($allListeningNotes));
|
||
return $allListeningNotes;
|
||
}
|
||
|
||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||
{
|
||
$allSentenceNotes = $this->getAllSentenceNotes();
|
||
$allListeningNotes = $this->getAllSentenceListeningNotes();
|
||
|
||
// Index of all the Terms indexed by its TermKanji
|
||
$allTerms = []; // ["パレートの法則" => App\Entity\Term]
|
||
// Set of known Kanji Characters
|
||
$knownKanji = []; // ["法" => 0, "則" => 0]
|
||
// How many times it appears (not as a term, but in KanjiSentence)
|
||
$termCounts = []; // ["パレートの法則" => 1]
|
||
|
||
printf('Indexing all terms...');
|
||
foreach ($allSentenceNotes as $note) {
|
||
foreach ($note->getTerms() as &$term) {
|
||
// Deduplicate list
|
||
if (key_exists($term->getKanji(), $allTerms)) continue;
|
||
// Actual indexing
|
||
$allTerms[$term->getKanji()] = &$term;
|
||
// Just simple intialization
|
||
$termCounts[$term->getKanji()] = 0;
|
||
foreach (self::extractKanji($term->getKanji()) as $kanji) {
|
||
$knownKanji[$kanji] = 0;
|
||
}
|
||
// Please put me into a function
|
||
unset($term); // Prevent things being reassigned
|
||
}
|
||
}
|
||
printf(" OK (%d)\n", count($knownKanji));
|
||
|
||
|
||
// Populate $knownKanji ["例" => 378, ...];
|
||
// TODO: Move this into own function to prevent side-effects. It's
|
||
// looping through the whole thing again anyway, so there's no
|
||
// need for it in here.
|
||
//
|
||
// Maybe while you're at it, it could be simplified into a
|
||
// function like $this->anki->getKanji('origField', 'countField')
|
||
// Where count can be null so it's just a Set
|
||
$progress = new Progress('Getting frequenciees', count($allSentenceNotes));
|
||
foreach ($allSentenceNotes as $note) {
|
||
$progress->tick();
|
||
|
||
// Sanitize sentence (remove those pesky \u{200E})
|
||
$_sentKanji = str_replace(
|
||
"\u{200E}",
|
||
'',
|
||
strip_tags($note->getFields()['SentKanji'])
|
||
);
|
||
|
||
foreach ($knownKanji as $kanji => &$count) {
|
||
if (str_contains($_sentKanji, $kanji)) $count++;
|
||
}
|
||
}
|
||
// TODO: Make progress a function with a callback? That way scope inside
|
||
// and side-effects are easy to control & track
|
||
unset($progress);
|
||
printf("\n");
|
||
|
||
|
||
// Build the values to be used in the ordering process
|
||
// TODO: It kinda feels wrong that $termCounts is used in this special
|
||
// manner while $seenScore is separate. Does it make sense to
|
||
// build them at this stage? Make a generic orderer?
|
||
//
|
||
// $termOrdering = ['first' => 32, 'second' => 34, 'apple' => 2];
|
||
//
|
||
// At first we just built the term list, then we generate an
|
||
// ordering array where the list is ordered
|
||
printf('Rating terms...');
|
||
$studiedKanji = $this->ankiService->getKnownSlnKanjiCounts('ASC');
|
||
$seenScore = [];
|
||
foreach ($allTerms as $key => $term) {
|
||
$diff = Japanese::kanjiDiff($term->getKanji(), array_keys($studiedKanji));
|
||
|
||
if (count($diff) <= 0) {
|
||
unset($allTerms[$key]);
|
||
unset($termCounts[$key]);
|
||
continue;
|
||
}
|
||
|
||
// Build $seenScore
|
||
$seenScore[$key] = 0;
|
||
foreach (Japanese::getKanjiList($key) as $_kanji) {
|
||
$seenScore[$key] += $studiedKanji[$_kanji] ?? 0;
|
||
}
|
||
// Build $termCounts
|
||
$termCounts[$term->getKanji()] = count($diff);
|
||
}
|
||
// Ordering in having:
|
||
// 1. Least new Kanji (ideally we just one 1 new kanji)
|
||
// 2. Most Kanji (most amount of unique kanji)
|
||
// 3. Least studied kanji
|
||
uksort($termCounts, fn($a, $b) => $seenScore[$a] <=> $seenScore[$b]);
|
||
uksort($termCounts, function ($a, $b) {
|
||
$aLen = count(Japanese::getKanjiList($a));
|
||
$bLen = count(Japanese::getKanjiList($b));
|
||
return $bLen <=> $aLen;
|
||
});
|
||
asort($termCounts, SORT_NUMERIC);
|
||
printf(" OK\n");
|
||
|
||
|
||
// Have into account the ones that have already been created.
|
||
// This will not only skip them but also update the general array for
|
||
// already seen kanji.
|
||
$seenKanji = [];
|
||
printf('Filtering out terms with no new kanji...');
|
||
// First pass: Get the list of the kanji we've seen
|
||
foreach ($allListeningNotes as $listeningNote) {
|
||
$termKanji = self::getOnlyKanji($listeningNote->getTerm()->getKanji());
|
||
self::kanjiDiff($seenKanji, $termKanji);
|
||
}
|
||
// Second pass: Remove terms with no new kanji at all
|
||
foreach ($termCounts as $term => $count) {
|
||
$termKanji = self::getOnlyKanji($term);
|
||
|
||
if (!self::kanjiDiff($seenKanji, $termKanji)) {
|
||
unset($termCounts[$term]);
|
||
unset($allTerms[$term]);
|
||
}
|
||
}
|
||
printf(" OK\n");
|
||
|
||
asort($termCounts, SORT_NUMERIC);
|
||
printf("\n");
|
||
|
||
|
||
$newNotesCount = intval($input->getArgument('count'));
|
||
|
||
foreach ($termCounts as $term => $count) {
|
||
if ($newNotesCount <= 0) break;
|
||
|
||
$termKanji = self::getOnlyKanji($term);
|
||
printf("%s %d | %d\n", "{$term}:", $count, $seenScore[$term]);
|
||
|
||
//$len = mb_strlen($termKanji);
|
||
//for ($i = 0; $i < $len; $i++) {
|
||
// $iKanji = mb_substr($termKanji, $i, 1);
|
||
// printf(" - %s: %0.2f\n", $iKanji, $knownKanji[$iKanji] / $len);
|
||
//}
|
||
|
||
$this->createProductionNoteFromTerm($allTerms[$term]);
|
||
$newNotesCount -= 1;
|
||
};
|
||
|
||
printf(
|
||
<<<FMNT
|
||
total: %d cards
|
||
max usage: %0.2f MiB
|
||
current usage: %0.2f MiB\n
|
||
FMNT,
|
||
count($termCounts),
|
||
memory_get_peak_usage() / 1024 / 1024,
|
||
memory_get_usage() / 1024 / 1024,
|
||
);
|
||
|
||
return Command::SUCCESS;
|
||
}
|
||
}
|