diff --git a/src/Command/CreateProductionCommand.php b/src/Command/CreateProductionCommand.php new file mode 100644 index 0000000..78835be --- /dev/null +++ b/src/Command/CreateProductionCommand.php @@ -0,0 +1,209 @@ + $value) { + if ($subKanji === $refKanji) continue 2; + } + + $ref[$subKanji] = 0; + $hasUnseenKanji = true; + } + + return $hasUnseenKanji; + } + + protected function configure(): void + { + //$this + // ->addArgument('arg1', InputArgument::OPTIONAL, 'Argument description') + // ->addOption('option1', null, InputOption::VALUE_NONE, 'Option description') + //; + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + printf('Getting all SentenceCards...'); + $allIds = $this->ankiService->getAllSentenceNoteIds(); + $allNotes = $this->ankiService->getNotes($allIds); + printf(" OK (%d)\n", count($allNotes)); + + printf('Indexing all terms...'); + $knownTerms = []; + $knownKanji = []; + $termCounts = []; + foreach ($allNotes as $note) { + if (!$note instanceof SentenceNote) throw new \Exception(sprintf( + 'Expected SentenceNote, got %s', + $note::class, + )); + + foreach ($note->getTerms() as &$term) { + assert($term instanceof Term); + + if (key_exists($term->getKanji(), $knownTerms)) continue; + $termCounts[$term->getKanji()] = 0; + $knownTerms[$term->getKanji()] = &$term; + foreach (self::extractKanji($term->getKanji()) as $kanji) { + $knownKanji[$kanji] = 0; + } + } + } + printf(" OK (%d)\n", count($knownTerms)); + + $total = count($knownTerms); + $i = 0; + foreach ($allNotes as $note) { + $i += 1; + if ($i % 12 === 0 or $i === $total) { + printf( + "\33[2K\r% 7d/% 7d | %.2f GiB | Getting frequencies", + $i, + $total, + memory_get_usage() / 1024 / 1024 / 1024 + ); + } + + assert($note instanceof SentenceNote); + + $sentKanji = str_replace( + "\u{200E}", + '', + strip_tags($note->getFields()['SentKanji']) + ); + + //foreach ($knownTerms as &$term) { + // assert($term instanceof Term); + + // if (str_contains($sentKanji, $term->getKanji())) { + // $termCounts[$term->getKanji()] += 1; + // } + //} + + foreach ($knownKanji as $kanji => &$count) { + if (str_contains($sentKanji, $kanji)) { + $count++; + } + } + } + printf("\n"); + + $seenKanji = []; + //uksort($knownTerms, function ($a, $b) { + // //return strlen(self::getOnlyKanji($b)) <=> strlen(self::getOnlyKanji($a)); // descending order + // return strlen($b) <=> strlen($a); // ascending order + //}); + + + printf('Rating terms...'); + foreach ($knownTerms as $term) { + $termKanji = self::getOnlyKanji($term->getKanji()); + $weight = 1 / max(mb_strlen($termKanji), 1); + + // First pass: Calculate the weight + foreach ($knownKanji as $kanji => $count) { + if (str_contains($termKanji, $kanji)) { + $termCounts[$term->getKanji()] += ceil($count * $weight); + } + } + } + + arsort($termCounts); + + foreach ($termCounts as $term => $count) { + $termKanji = self::getOnlyKanji($term); + + // Second pass: Penalize terms with no new kanji at all + if (!self::kanjiDiff($seenKanji, $termKanji)) { + unset($termCounts[$term]); + //unset($knownTerms[$term->getKanji()]); + //$termCounts[$term->getKanji()] = 0; + } + } + printf(" OK\n"); + + asort($termCounts); + foreach ($termCounts as $term => $count) { + $termKanji = self::getOnlyKanji($term); + printf("%s: %d\n", $term, $count); + + $len = mb_strlen($termKanji); + for ($i = 0; $i < $len; $i++) { + $iKanji = mb_substr($termKanji, $i, 1); + printf(" - %s: %0.2f\n", $iKanji, $knownKanji[$iKanji] / $len); + } + + printf("\n"); + } + + dump(count($termCounts)); + + printf( + <<getArgument('arg1'); + + //if ($arg1) { + // $io->note(sprintf('You passed an argument: %s', $arg1)); + //} + + //if ($input->getOption('option1')) { + // // ... + //} + + //$io->success('You have a new command! Now make it your own! Pass --help to see your options.'); + + return Command::SUCCESS; + } +}