Compare commits

..

No commits in common. "a838347ff5fd089edf93934edc44b03e379f7cb5" and "da9d4d344ac934d754850c2b7eeb85cb35b86081" have entirely different histories.

11 changed files with 63 additions and 318 deletions

4
.gitignore vendored
View File

@ -23,7 +23,3 @@
/public/assets/
/assets/vendor/
###< symfony/asset-mapper ###
###> phpstan/phpstan ###
phpstan.neon
###< phpstan/phpstan ###

View File

@ -96,8 +96,6 @@
}
},
"require-dev": {
"phpstan/phpstan": "^2.1",
"phpstan/phpstan-symfony": "^2.0",
"phpunit/phpunit": "^9.5",
"symfony/browser-kit": "7.1.*",
"symfony/css-selector": "7.1.*",

131
composer.lock generated
View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "14aafa03aea05ab652ae5863659345dd",
"content-hash": "bfa3fd6c661ba7df71cffe78211433ff",
"packages": [
{
"name": "composer/semver",
@ -7868,135 +7868,6 @@
},
"time": "2022-02-21T01:04:05+00:00"
},
{
"name": "phpstan/phpstan",
"version": "2.1.22",
"source": {
"type": "git",
"url": "https://github.com/phpstan/phpstan.git",
"reference": "41600c8379eb5aee63e9413fe9e97273e25d57e4"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/phpstan/phpstan/zipball/41600c8379eb5aee63e9413fe9e97273e25d57e4",
"reference": "41600c8379eb5aee63e9413fe9e97273e25d57e4",
"shasum": ""
},
"require": {
"php": "^7.4|^8.0"
},
"conflict": {
"phpstan/phpstan-shim": "*"
},
"bin": [
"phpstan",
"phpstan.phar"
],
"type": "library",
"autoload": {
"files": [
"bootstrap.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "PHPStan - PHP Static Analysis Tool",
"keywords": [
"dev",
"static analysis"
],
"support": {
"docs": "https://phpstan.org/user-guide/getting-started",
"forum": "https://github.com/phpstan/phpstan/discussions",
"issues": "https://github.com/phpstan/phpstan/issues",
"security": "https://github.com/phpstan/phpstan/security/policy",
"source": "https://github.com/phpstan/phpstan-src"
},
"funding": [
{
"url": "https://github.com/ondrejmirtes",
"type": "github"
},
{
"url": "https://github.com/phpstan",
"type": "github"
}
],
"time": "2025-08-04T19:17:37+00:00"
},
{
"name": "phpstan/phpstan-symfony",
"version": "2.0.7",
"source": {
"type": "git",
"url": "https://github.com/phpstan/phpstan-symfony.git",
"reference": "392f7ab8f52a0a776977be4e62535358c28e1b15"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/phpstan/phpstan-symfony/zipball/392f7ab8f52a0a776977be4e62535358c28e1b15",
"reference": "392f7ab8f52a0a776977be4e62535358c28e1b15",
"shasum": ""
},
"require": {
"ext-simplexml": "*",
"php": "^7.4 || ^8.0",
"phpstan/phpstan": "^2.1.13"
},
"conflict": {
"symfony/framework-bundle": "<3.0"
},
"require-dev": {
"php-parallel-lint/php-parallel-lint": "^1.2",
"phpstan/phpstan-phpunit": "^2.0",
"phpstan/phpstan-strict-rules": "^2.0",
"phpunit/phpunit": "^9.6",
"psr/container": "1.1.2",
"symfony/config": "^5.4 || ^6.1",
"symfony/console": "^5.4 || ^6.1",
"symfony/dependency-injection": "^5.4 || ^6.1",
"symfony/form": "^5.4 || ^6.1",
"symfony/framework-bundle": "^5.4 || ^6.1",
"symfony/http-foundation": "^5.4 || ^6.1",
"symfony/messenger": "^5.4",
"symfony/polyfill-php80": "^1.24",
"symfony/serializer": "^5.4",
"symfony/service-contracts": "^2.2.0"
},
"type": "phpstan-extension",
"extra": {
"phpstan": {
"includes": [
"extension.neon",
"rules.neon"
]
}
},
"autoload": {
"psr-4": {
"PHPStan\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Lukáš Unger",
"email": "looky.msc@gmail.com",
"homepage": "https://lookyman.net"
}
],
"description": "Symfony Framework extensions and rules for PHPStan",
"support": {
"issues": "https://github.com/phpstan/phpstan-symfony/issues",
"source": "https://github.com/phpstan/phpstan-symfony/tree/2.0.7"
},
"time": "2025-07-22T09:40:57+00:00"
},
{
"name": "phpunit/php-code-coverage",
"version": "9.2.32",

View File

@ -1,8 +0,0 @@
parameters:
level: 6
paths:
- bin/
- config/
- public/
- src/
- tests/

View File

@ -8,12 +8,13 @@ use App\Entity\Term;
use App\Service\AnkiService;
use App\Utils\Japanese;
use App\Utils\Progress;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:create-production',
@ -27,7 +28,6 @@ class CreateProductionCommand extends Command
parent::__construct();
}
/** @return list<string> */
private static function extractKanji(string $str): array
{
preg_match_all('/\p{Script=Han}/u', $str, $matches);
@ -39,7 +39,6 @@ class CreateProductionCommand extends Command
return preg_replace('/[^\p{Script=Han}]/u', '', $str);
}
/** @param array<string, 0> $ref */
private static function kanjiDiff(array &$ref, string $subject): bool
{
$len = mb_strlen($subject);
@ -48,7 +47,7 @@ class CreateProductionCommand extends Command
for ($i = 0; $i < $len; $i++) {
$subKanji = mb_substr($subject, $i, 1);
foreach (array_keys($ref) as $refKanji) {
foreach ($ref as $refKanji => $value) {
if ($subKanji === $refKanji) continue 2;
}
@ -61,11 +60,10 @@ class CreateProductionCommand extends Command
protected function configure(): void
{
$this->addArgument(
'count',
InputArgument::REQUIRED,
'Amount of cards to make',
);
$this
->addArgument('count', InputArgument::OPTIONAL, 'Amount of cards to make', 1);
//->addOption('option1', null, InputOption::VALUE_NONE, 'Option description')
;
}
protected function createProductionNoteFromTerm(Term $term): void
@ -114,105 +112,71 @@ class CreateProductionCommand extends Command
protected function execute(InputInterface $input, OutputInterface $output): int
{
$allSentenceNotes = $this->getAllSentenceNotes();
$allSentenceNotes = $this->getAllSentenceNotes();
$allListeningNotes = $this->getAllSentenceListeningNotes();
// Index of all the Terms indexed by its TermKanji
$allTerms = []; // ["パレートの法則" => App\Entity\Term]
// Set of known Kanji Characters
$knownKanji = []; // ["法" => 0, "則" => 0]
// How many times it appears (not as a term, but in KanjiSentence)
$termCounts = []; // ["パレートの法則" => 1]
// --------- Getting list<SentenceNote> into array<TermKanji, Term> ---
$allTerms = [];
$knownKanji = [];
$termCounts = [];
printf('Indexing all terms...');
foreach ($allSentenceNotes as $note) {
foreach ($note->getTerms() as &$term) {
// Deduplicate list
assert($term instanceof Term);
if (key_exists($term->getKanji(), $allTerms)) continue;
// Actual indexing
$allTerms[$term->getKanji()] = &$term;
// Just simple intialization
$termCounts[$term->getKanji()] = 0;
$allTerms[$term->getKanji()] = &$term;
foreach (self::extractKanji($term->getKanji()) as $kanji) {
$knownKanji[$kanji] = 0;
}
}
}
printf(" OK (%d)\n", count($knownKanji));
printf(" OK (%d)\n", count($allTerms));
// Populate $knownKanji ["例" => 378, ...];
// TODO: Move this into own function to prevent side-effects. It's
// looping through the whole thing again anyway, so there's no
// need for it in here.
//
// Maybe while you're at it, it could be simplified into a
// function like $this->anki->getKanji('origField', 'countField')
// Where count can be null so it's just a Set
$progress = new Progress('Getting frequenciees', count($allSentenceNotes));
foreach ($allSentenceNotes as $note) {
$progress->tick();
// Sanitize sentence (remove those pesky \u{200E})
$_sentKanji = str_replace(
$sentKanji = str_replace(
"\u{200E}",
'',
strip_tags($note->getFields()['SentKanji'])
);
foreach ($knownKanji as $kanji => &$count) {
if (str_contains($_sentKanji, $kanji)) $count++;
if (str_contains($sentKanji, $kanji)) $count++;
}
}
// TODO: Make progress a function with a callback? That way scope inside
// and side-effects are easy to control & track
unset($progress);
printf("\n");
// Build the values to be used in the ordering process
// TODO: It kinda feels wrong that $termCounts is used in this special
// manner while $seenScore is separate. Does it make sense to
// build them at this stage? Make a generic orderer?
//
// $termOrdering = ['first' => 32, 'second' => 34, 'apple' => 2];
//
// At first we just built the term list, then we generate an
// ordering array where the list is ordered
$seenKanji = $this->ankiService->getKnownSlnKanjiCounts();
printf('Rating terms...');
$studiedKanji = $this->ankiService->getKnownSlnKanjiCounts('ASC');
$seenScore = [];
foreach ($allTerms as $key => $term) {
$diff = Japanese::kanjiDiff($term->getKanji(), array_keys($studiedKanji));
$count = Japanese::kanjiDiff(
$term->getKanji(),
array_keys($seenKanji),
);
if (count($diff) <= 0) {
if ($count <= 0) {
unset($allTerms[$key]);
unset($termCounts[$key]);
continue;
}
// Build $seenScore
$seenScore[$key] = 0;
foreach (Japanese::getKanjiList($key) as $_kanji) {
$seenScore[$key] += $studiedKanji[$_kanji] ?? 0;
}
// Build $termCounts
$termCounts[$term->getKanji()] = count($diff);
$termCounts[$term->getKanji()] = $count;
}
// Ordering in having:
// 1. Least new Kanji (ideally we just one 1 new kanji)
// 2. Most Kanji (most amount of unique kanji)
// 3. Least studied kanji
uksort($termCounts, fn($a, $b) => $seenScore[$a] <=> $seenScore[$b]);
uksort($termCounts, function ($a, $b) {
$aLen = count(Japanese::getKanjiList($a));
$bLen = count(Japanese::getKanjiList($b));
$aLen = mb_strlen(Japanese::getOnlyKanji($a));
$bLen = mb_strlen(Japanese::getOnlyKanji($b));
return $bLen <=> $aLen;
});
asort($termCounts, SORT_NUMERIC);
asort($termCounts);
printf(" OK\n");
// Have into account the ones that have already been created.
// This will not only skip them but also update the general array for
// already seen kanji.
@ -220,6 +184,8 @@ class CreateProductionCommand extends Command
printf('Filtering out terms with no new kanji...');
// First pass: Get the list of the kanji we've seen
foreach ($allListeningNotes as $listeningNote) {
assert($listeningNote instanceof SentenceListeningNote);
$termKanji = self::getOnlyKanji($listeningNote->getTerm()->getKanji());
self::kanjiDiff($seenKanji, $termKanji);
}
@ -243,8 +209,11 @@ class CreateProductionCommand extends Command
foreach ($termCounts as $term => $count) {
if ($newNotesCount <= 0) break;
// FIXME: This shouldn't happen at all
if (!$allTerms[$term] instanceof Term) continue;
$termKanji = self::getOnlyKanji($term);
printf("%s %d | %d\n", "{$term}", $count, $seenScore[$term]);
printf("%s: %d\n", $term, $count);
//$len = mb_strlen($termKanji);
//for ($i = 0; $i < $len; $i++) {
@ -252,7 +221,7 @@ class CreateProductionCommand extends Command
// printf(" - %s: %0.2f\n", $iKanji, $knownKanji[$iKanji] / $len);
//}
//$this->createProductionNoteFromTerm($allTerms[$term]);
$this->createProductionNoteFromTerm($allTerms[$term]);
$newNotesCount -= 1;
};

View File

@ -73,14 +73,6 @@ class AnkiController extends AbstractController
]);
}
#[Route('/kanji', 'kanji', methods: 'GET')]
public function kanji()
{
$thing = $this->ankiService->getKnownSnKanjiCounts();
asort($thing, SORT_DESC);
return new Response(implode('', array_keys($thing)));
}
#[Route('/note/{nid}/get', name: 'get_note')]
public function get_note(int $nid)
{

View File

@ -11,12 +11,10 @@ class SentenceNote extends Note
// -------------------------------------------------- Getters & setters ---
/** @return list<Term> */
public function getTerms(): array
{
return $this->terms;
}
/** @param list<Terms> $terms */
public function setTerms(array $terms): static
{
$this->terms = $terms;

View File

@ -18,21 +18,15 @@ class Term
return self::parseFurigana($this->kanji)['kanji'];
}
/**
* Get the kanji version & the reading for a given term
*
* TODO: Make this smarter & handle mixing of kanji & hiradana
*
* @return array{'kanji': string, 'reading': null|string}
* */
public static function parseFurigana(string $furigana): array
{
// 0: all, 1: (kanji/hiragana), 2: ([reading, ...]), 3: (reading)
// 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading)
preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER);
$matchedKanji = array_map(fn($x) => $x[1], $matches);
$matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches);
return [
'kanji' => join('', $matchedKanji),
'reading' => $matchedKanji == $matchedReading
@ -41,7 +35,7 @@ class Term
];
}
public function toAnkiVocabDef(): string
public function toAnkiVocabDef()
{
$ret = '<span ' . Note::HIGHLIGHT_ATTR_KANJI . '>' . $this->kanji;
@ -85,7 +79,10 @@ class Term
'」' => ']',
' ' => ' ',
]));
$def = mb_trim($def);
$def = mb_trim($def);
if (!is_string($term->kanji)) {
return null;
}
// -------------------------------------------------- No definition ---
@ -142,10 +139,6 @@ class Term
return Term::fromVocabDefLine($kanji . $separator . $def);
}
/**
* @param array<string, string> $fields
* @return list<Term>
*/
public static function fromNoteFields(array $fields): array
{
// -------------------- Trying to extract it with the modern syntax ---

View File

@ -134,7 +134,7 @@ class AnkiService
}
/** @return array<string, int> */
public function getKnownSlnKanjiCounts(?string $order = null): array
public function getKnownSlnKanjiCounts(): array
{
$allListeningIds = $this->getAllSentenceListeningNoteIds();
$ret = [];
@ -152,34 +152,6 @@ class AnkiService
}
}
uasort($ret, function (int $a, int $b) use ($order) {
return $order === 'ASC' ? $a <=> $b : $b <=> $a;
});
return $ret;
}
/** @return array<string, int> */
public function getKnownSnKanjiCounts(): array
{
$allListeningIds = $this->getAllSentenceNoteIds();
$ret = [];
foreach ($this->getNotes($allListeningIds) as $sNote) {
assert($sNote instanceof SentenceNote);
foreach ($sNote->getTerms() as $term) {
$termKanji = Japanese::getOnlyKanji($term->getKanji());
$len = mb_strlen($termKanji);
for ($i = 0; $i < $len; $i++) {
$kanji = mb_substr($termKanji, $i, 1);
$ret[$kanji] ??= 0;
$ret[$kanji]++;
}
}
}
return $ret;
}
}

View File

@ -6,48 +6,33 @@ class Japanese
{
public static function getOnlyKanji(string $str): string
{
return preg_replace('/[^\p{Script=Han}]/u', '', $str) ?? '';
return preg_replace('/[^\p{Script=Han}]/u', '', $str);
}
/** @return \Generator<int, string> */
public static function mbIterate(
string $str,
int $start = 0,
int $length = 1,
?string $encoding = null,
): \Generator {
while (($char = mb_substr($str, $start++, $length, $encoding)) !== '') {
yield $char;
}
}
/** @return list<string> */
public static function getKanjiList(string $str): array
{
$ret = [];
foreach (self::mbIterate(self::getOnlyKanji($str)) as $kanji) {
$ret[$kanji] = 0;
}
return array_keys($ret);
}
/**
* Get the list of kanji that are not present in a given string.
* Only kanji are considered, not katakana, hiragana or any other symbols.
/** Get the number of kanji of a string that are not in the given set
* of kanji
*
* Only kanji are considere, not katakana, hiragana or any other symbols.
*
* @param list<string> $kanjiSet
* @return list<string>
*/
public static function kanjiDiff(string $str, array $kanjiSet): array
public static function kanjiDiff(string $str, array $kanjiSet): int
{
$ret = [];
$ret = 0;
foreach (self::mbIterate(self::getOnlyKanji($str)) as $kanji) {
if (!array_search($kanji, $kanjiSet)) $ret[$kanji] = 0;
$strKanji = self::getOnlyKanji($str);
$len = mb_strlen($strKanji);
for ($i = 0; $i < $len; $i++) {
$kanji = mb_substr($strKanji, $i, 1);
if (!array_search($kanji, $kanjiSet)) {
$ret++;
}
}
return array_keys($ret);
//dump($str, $strKanji, $ret);
//echo "\n";
return $ret;
}
}

View File

@ -1,13 +1,4 @@
{
"doctrine/deprecations": {
"version": "1.1",
"recipe": {
"repo": "github.com/symfony/recipes",
"branch": "main",
"version": "1.0",
"ref": "87424683adc81d7dc305eefec1fced883084aab9"
}
},
"doctrine/doctrine-bundle": {
"version": "2.13",
"recipe": {
@ -35,18 +26,6 @@
"migrations/.gitignore"
]
},
"phpstan/phpstan": {
"version": "2.1",
"recipe": {
"repo": "github.com/symfony/recipes-contrib",
"branch": "main",
"version": "1.0",
"ref": "5e490cc197fb6bb1ae22e5abbc531ddc633b6767"
},
"files": [
"phpstan.dist.neon"
]
},
"phpunit/phpunit": {
"version": "9.6",
"recipe": {