*/ private static function extractKanji(string $str): array { preg_match_all('/\p{Script=Han}/u', $str, $matches); return array_unique($matches[0]); } private static function getOnlyKanji(string $str): string { return preg_replace('/[^\p{Script=Han}]/u', '', $str); } /** @param array $ref */ private static function kanjiDiff(array &$ref, string $subject): bool { $len = mb_strlen($subject); $hasUnseenKanji = false; for ($i = 0; $i < $len; $i++) { $subKanji = mb_substr($subject, $i, 1); foreach (array_keys($ref) as $refKanji) { if ($subKanji === $refKanji) continue 2; } $ref[$subKanji] = 0; $hasUnseenKanji = true; } return $hasUnseenKanji; } protected function configure(): void { $this->addArgument( 'count', InputArgument::REQUIRED, 'Amount of cards to make', ); } protected function createProductionNoteFromTerm(Term $term): void { $noteIds = $this->ankiService->findNotesIds(sprintf( '"SentKanji:*%s*" "note:%s"', $term->getKanji(), SentenceNote::MODEL_NAME, )); if (count($noteIds) <= 0) { $noteIds = $this->ankiService->findNotesIds(sprintf( '"VocabKanji:*%s*" "note:%s"', $term->getKanji(), SentenceNote::MODEL_NAME, )); } $sNote = $this->ankiService->getNote($noteIds[array_key_last($noteIds)]); $newSlNote = SentenceListeningNote::fromNote($sNote, $term); if (!$this->ankiService->addNote($newSlNote, 'production')) { throw new \Exception('Failed to add note!'); } } /** @return list */ private function getAllSentenceNotes(): array { printf('Getting all SentenceNote...'); $allIds = $this->ankiService->getAllSentenceNoteIds(); $allNotes = $this->ankiService->getNotes($allIds); printf(" OK (%d)\n", count($allNotes)); return $allNotes; } /** @return list */ private function getAllSentenceListeningNotes(): array { printf('Getting all SentenceListeningNote...'); $allListeningIds = $this->ankiService->getAllSentenceListeningNoteIds(); $allListeningNotes = $this->ankiService->getNotes($allListeningIds); printf(" OK (%d)\n", count($allListeningNotes)); return $allListeningNotes; } protected function execute(InputInterface $input, OutputInterface $output): int { $allSentenceNotes = $this->getAllSentenceNotes(); $allListeningNotes = $this->getAllSentenceListeningNotes(); // Index of all the Terms indexed by its TermKanji $allTerms = []; // ["パレートの法則" => App\Entity\Term] // Set of known Kanji Characters $knownKanji = []; // ["法" => 0, "則" => 0] // How many times it appears (not as a term, but in KanjiSentence) $termCounts = []; // ["パレートの法則" => 1] printf('Indexing all terms...'); foreach ($allSentenceNotes as $note) { foreach ($note->getTerms() as &$term) { // Deduplicate list if (key_exists($term->getKanji(), $allTerms)) continue; // Actual indexing $allTerms[$term->getKanji()] = &$term; // Just simple intialization $termCounts[$term->getKanji()] = 0; foreach (self::extractKanji($term->getKanji()) as $kanji) { $knownKanji[$kanji] = 0; } // Please put me into a function unset($term); // Prevent things being reassigned } } printf(" OK (%d)\n", count($knownKanji)); // Populate $knownKanji ["例" => 378, ...]; // TODO: Move this into own function to prevent side-effects. It's // looping through the whole thing again anyway, so there's no // need for it in here. // // Maybe while you're at it, it could be simplified into a // function like $this->anki->getKanji('origField', 'countField') // Where count can be null so it's just a Set $progress = new Progress('Getting frequenciees', count($allSentenceNotes)); foreach ($allSentenceNotes as $note) { $progress->tick(); // Sanitize sentence (remove those pesky \u{200E}) $_sentKanji = str_replace( "\u{200E}", '', strip_tags($note->getFields()['SentKanji']) ); foreach ($knownKanji as $kanji => &$count) { if (str_contains($_sentKanji, $kanji)) $count++; } } // TODO: Make progress a function with a callback? That way scope inside // and side-effects are easy to control & track unset($progress); printf("\n"); // Build the values to be used in the ordering process // TODO: It kinda feels wrong that $termCounts is used in this special // manner while $seenScore is separate. Does it make sense to // build them at this stage? Make a generic orderer? // // $termOrdering = ['first' => 32, 'second' => 34, 'apple' => 2]; // // At first we just built the term list, then we generate an // ordering array where the list is ordered printf('Rating terms...'); $studiedKanji = $this->ankiService->getKnownSlnKanjiCounts('ASC'); $seenScore = []; foreach ($allTerms as $key => $term) { $diff = Japanese::kanjiDiff($term->getKanji(), array_keys($studiedKanji)); if (count($diff) <= 0) { unset($allTerms[$key]); unset($termCounts[$key]); continue; } // Build $seenScore $seenScore[$key] = 0; foreach (Japanese::getKanjiList($key) as $_kanji) { $seenScore[$key] += $studiedKanji[$_kanji] ?? 0; } // Build $termCounts $termCounts[$term->getKanji()] = count($diff); } // Ordering in having: // 1. Least new Kanji (ideally we just one 1 new kanji) // 2. Most Kanji (most amount of unique kanji) // 3. Least studied kanji uksort($termCounts, fn($a, $b) => $seenScore[$a] <=> $seenScore[$b]); uksort($termCounts, function ($a, $b) { $aLen = count(Japanese::getKanjiList($a)); $bLen = count(Japanese::getKanjiList($b)); return $bLen <=> $aLen; }); asort($termCounts, SORT_NUMERIC); printf(" OK\n"); // Have into account the ones that have already been created. // This will not only skip them but also update the general array for // already seen kanji. $seenKanji = []; printf('Filtering out terms with no new kanji...'); // First pass: Get the list of the kanji we've seen foreach ($allListeningNotes as $listeningNote) { $termKanji = self::getOnlyKanji($listeningNote->getTerm()->getKanji()); self::kanjiDiff($seenKanji, $termKanji); } // Second pass: Remove terms with no new kanji at all foreach ($termCounts as $term => $count) { $termKanji = self::getOnlyKanji($term); if (!self::kanjiDiff($seenKanji, $termKanji)) { unset($termCounts[$term]); unset($allTerms[$term]); } } printf(" OK\n"); asort($termCounts, SORT_NUMERIC); printf("\n"); $newNotesCount = intval($input->getArgument('count')); foreach ($termCounts as $term => $count) { if ($newNotesCount <= 0) break; $termKanji = self::getOnlyKanji($term); printf("%s %d | %d\n", "{$term}:", $count, $seenScore[$term]); //$len = mb_strlen($termKanji); //for ($i = 0; $i < $len; $i++) { // $iKanji = mb_substr($termKanji, $i, 1); // printf(" - %s: %0.2f\n", $iKanji, $knownKanji[$iKanji] / $len); //} $this->createProductionNoteFromTerm($allTerms[$term]); $newNotesCount -= 1; }; printf( <<