anker/src/Command/CreateProductionCommand.php

232 lines
7.4 KiB
PHP

<?php
namespace App\Command;
use App\Entity\SentenceListeningNote;
use App\Entity\SentenceNote;
use App\Entity\Term;
use App\Service\AnkiService;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:create-production',
description: 'Add a short description for your command',
)]
class CreateProductionCommand extends Command
{
public function __construct(
private AnkiService $ankiService,
) {
parent::__construct();
}
private static function extractKanji(string $str): array
{
preg_match_all('/\p{Script=Han}/u', $str, $matches);
return array_unique($matches[0]);
}
private static function getOnlyKanji(string $str): string
{
return preg_replace('/[^\p{Script=Han}]/u', '', $str);
}
private static function kanjiDiff(array &$ref, string $subject): bool
{
$len = mb_strlen($subject);
$hasUnseenKanji = false;
for ($i = 0; $i < $len; $i++) {
$subKanji = mb_substr($subject, $i, 1);
foreach ($ref as $refKanji => $value) {
if ($subKanji === $refKanji) continue 2;
}
$ref[$subKanji] = 0;
$hasUnseenKanji = true;
}
return $hasUnseenKanji;
}
protected function configure(): void
{
$this
->addArgument('count', InputArgument::OPTIONAL, 'Amount of cards to make', 1);
//->addOption('option1', null, InputOption::VALUE_NONE, 'Option description')
;
}
protected function createProductionNoteFromTerm(Term $term): void
{
$noteIds = $this->ankiService->findNotesIds(sprintf(
'"SentKanji:*%s*" "note:%s"',
$term->getKanji(),
SentenceNote::MODEL_NAME,
));
if (count($noteIds) <= 0) {
$noteIds = $this->ankiService->findNotesIds(sprintf(
'"VocabKanji:*%s*" "note:%s"',
$term->getKanji(),
SentenceNote::MODEL_NAME,
));
}
$sNote = $this->ankiService->getNote($noteIds[array_key_last($noteIds)]);
$newSlNote = SentenceListeningNote::fromNote($sNote, $term);
if (!$this->ankiService->addNote($newSlNote, 'production')) {
throw new \Exception('Failed to add note!');
}
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
printf('Getting all SentenceCards...');
$allIds = $this->ankiService->getAllSentenceNoteIds();
$allNotes = $this->ankiService->getNotes($allIds);
printf(" OK (%d)\n", count($allNotes));
printf('Getting all SentenceCards...');
$allListeningIds = $this->ankiService->getAllSentenceListeningNoteIds();
$allListeningNotes = $this->ankiService->getNotes($allListeningIds);
printf(" OK (%d)\n", count($allListeningNotes));
printf('Indexing all terms...');
$knownTerms = [];
$knownKanji = [];
$termCounts = [];
foreach ($allNotes as $note) {
if (!$note instanceof SentenceNote) throw new \Exception(sprintf(
'Expected SentenceNote, got %s',
$note::class,
));
foreach ($note->getTerms() as &$term) {
assert($term instanceof Term);
if (key_exists($term->getKanji(), $knownTerms)) continue;
$termCounts[$term->getKanji()] = 0;
$knownTerms[$term->getKanji()] = &$term;
foreach (self::extractKanji($term->getKanji()) as $kanji) {
$knownKanji[$kanji] = 0;
}
}
}
printf(" OK (%d)\n", count($knownTerms));
$progress = new Progress('Getting frequenciees', count($allSentenceNotes));
foreach ($allSentenceNotes as $note) {
$progress->tick();
assert($note instanceof SentenceNote);
$sentKanji = str_replace(
"\u{200E}",
'',
strip_tags($note->getFields()['SentKanji'])
);
//foreach ($knownTerms as &$term) {
// assert($term instanceof Term);
// if (str_contains($sentKanji, $term->getKanji())) {
// $termCounts[$term->getKanji()] += 1;
// }
//}
foreach ($knownKanji as $kanji => &$count) {
if (str_contains($sentKanji, $kanji)) {
$count++;
}
}
}
printf("\n");
$seenKanji = [];
//uksort($knownTerms, function ($a, $b) {
// //return strlen(self::getOnlyKanji($b)) <=> strlen(self::getOnlyKanji($a)); // descending order
// return strlen($b) <=> strlen($a); // ascending order
//});
printf('Rating terms...');
foreach ($knownTerms as $term) {
$termKanji = self::getOnlyKanji($term->getKanji());
$weight = 1 / max(mb_strlen($termKanji), 1);
// First pass: Calculate the weight
foreach ($knownKanji as $kanji => $count) {
if (str_contains($termKanji, $kanji)) {
$termCounts[$term->getKanji()] += ceil($count * $weight);
}
}
}
arsort($termCounts);
// Have into account the ones that have already been created.
// This will not only skip them but take into account the kanjis they
// have.
foreach ($allListeningNotes as $listeningNote) {
assert($listeningNote instanceof SentenceListeningNote);
$termKanji = self::getOnlyKanji($listeningNote->getTerm()->getKanji());
self::kanjiDiff($seenKanji, $termKanji);
}
foreach ($termCounts as $term => $count) {
$termKanji = self::getOnlyKanji($term);
// Second pass: Penalize terms with no new kanji at all
if (!self::kanjiDiff($seenKanji, $termKanji)) {
unset($termCounts[$term]);
//unset($knownTerms[$term->getKanji()]);
//$termCounts[$term->getKanji()] = 0;
}
}
printf(" OK\n");
arsort($termCounts);
printf("\n");
$newNotesCount = intval($input->getArgument('count'));
foreach ($termCounts as $term => $count) {
if ($newNotesCount <= 0) break;
$termKanji = self::getOnlyKanji($term);
printf("%s: %d\n", $term, $count);
$len = mb_strlen($termKanji);
for ($i = 0; $i < $len; $i++) {
$iKanji = mb_substr($termKanji, $i, 1);
printf(" - %s: %0.2f\n", $iKanji, $knownKanji[$iKanji] / $len);
}
$this->createProductionNoteFromTerm($knownTerms[$term]);
$newNotesCount -= 1;
};
printf(
<<<FMNT
total: %d cards
max usage: %0.2f MiB
current usage: %0.2f MiB\n
FMNT,
count($termCounts),
memory_get_peak_usage() / 1024 / 1024,
memory_get_usage() / 1024 / 1024,
);
return Command::SUCCESS;
}
}