feat: Separate SentenceNote into its own extension of Note

This commit is contained in:
Dendy 2025-02-09 23:15:57 +09:00
parent 8c1613187a
commit eadd8a01ea
4 changed files with 249 additions and 150 deletions

View File

@ -3,6 +3,8 @@
namespace App\Controller;
use App\Entity\Note;
use App\Entity\SentenceNote;
use App\Entity\Term;
use App\Service\AnkiService;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Response;
@ -23,11 +25,35 @@ class AnkiController extends AbstractController
#[Route('/', name: 'main')]
public function index(): Response
{
$allIds = $this->ankiService->getAllNoteIds();
$allIds = $this->ankiService->getAllSentenceNoteIds();
$allNotes = $this->ankiService->getNotes($allIds);
dd($allNotes);
$kanjiNotes = [];
foreach ($allNotes as $note) {
if (!$note instanceof SentenceNote) throw new \Exception(sprintf(
'Expected SentenceNote, got %s',
$note::class,
));
foreach ($note->getTerms() as $term) {
assert($term instanceof Term);
if (key_exists($term->getKanji(), $kanjiNotes)) continue;
$newNote = new Note();
echo $note->getFields()['SentKanji'];
echo '<br><small>';
echo var_dump($note->getHighlightedKanji());
echo '</small><br><br>';
//echo $term->getKanji();
//echo ' | ';
$kanjiNotes[$term->getKanji()] = $newNote;
}
}
die();
}
#[Route('/nonconforming', name: 'nonconforming')]

View File

@ -2,58 +2,47 @@
namespace App\Entity;
//use App\Repository\NoteRepository;
//use Doctrine\ORM\Mapping as ORM;
//#[ORM\Entity(repositoryClass: NoteRepository::class)]
class Note
{
//#[ORM\Id]
//#[ORM\GeneratedValue]
//#[ORM\Column]
private int $id;
private int $mod;
private array $terms = [];
private string $profile;
private array $tags = [];
private string $model;
// Maybe these doesn't make sense to keep but leaving it here just in
// case for handiness' sake
private array $fields = [];
private ?array $mediaInfo = null;
private array $cardIds;
const HIGHLIGHT_PATTERN = '/<span\s+([^>]*)>(.*?)<\/span>/i';
const HIGHLIGHT_ATTR_KANJI = 'style="color: rgb(255, 78, 8);"';
private readonly int $id;
private readonly int $mod;
private readonly string $model;
private string $profile;
private array $cardIds = [];
protected array $fields = [];
private array $tags = [];
// -------------------------------------------------- Getters & setters ---
public function getId(): int
{
return $this->id;
}
public function getTerms(): array
public function getModel(): string
{
return $this->terms;
return $this->model;
}
public function getFields(): array
{
return $this->fields;
}
public function hasTerm(string $kanji): bool
public function setFields(array $fields): static
{
foreach ($this->terms as $term) {
assert($term instanceof Term);
if ($term->kanji == $kanji) return true;
}
return false;
$this->fields = $fields;
return $this;
}
public static function fromAnki(array $noteInfo): self
// ------------------------------------------------------- Anki-related ---
public static function fromAnki(array $noteInfo): static
{
$note = new self();
$note = new static();
[
'noteId' => $note->id,
@ -72,127 +61,18 @@ class Note
// the order would be advisable.
$note->fields = array_map(fn($x) => $x['value'], $noteInfo['fields']);
$note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
$note->terms = Term::fromNoteFields($note->fields);
// If unable to, create them from the highlighted parts in the sentence
if (empty($note->terms)) {
foreach ($note->getHighlightedKanji() as $highlighedKanji) {
$term = new Term();
$term->kanji = $highlighedKanji;
$term->definitionEn = null;
$term->definitionJp = null;
$note->terms[] = $term;
}
}
// Set to null whatever is null
$readings = array_map(
fn($x) => in_array($x, ['_', '_', '']) ? null : $x,
explode('', $note->fields['VocabFurigana']),
);
// Set readings from furigana field
foreach ($note->terms as $key => &$term) {
if (null === $term->getReading()) {
if (null !== ($readings[$key] ?? null)) {
$term->kanji .= '[' . $readings[$key] . ']';
}
}
}
return $note;
}
/** Return an array of strings with the highlighted kanji in the SentKanji */
public function getHighlightedKanji(): array
{
$ret = [];
$matches = [];
// 1. Get all spans in the text
preg_match_all(
self::HIGHLIGHT_PATTERN,
$this->fields['SentKanji'],
$matches,
PREG_SET_ORDER,
);
// 2. Check the ones that match with the kanji color
foreach ($matches as $match) {
if ($match[1] === self::HIGHLIGHT_ATTR_KANJI) {
$ret[] = mb_trim($match[2]);
}
}
return $ret;
}
public function toAnki(): array
{
return [
'id' => $this->id,
'fields' => [
'VocabKanji' => join('', array_map(
fn(Term $x) => $x->getKanji(),
$this->terms,
)),
'VocabFurigana' => join('', array_map(
fn(Term $x) => $x->getReading() ?? '_',
$this->terms,
)),
'VocabDef' => join("<br>\n", array_map(
fn(Term $x) => $x->toAnkiVocabDef(),
$this->terms,
)),
],
];
}
public function parseMediaInfo(string $notes): ?array
{
$matches = null;
// Parse the notes fields. It can be in the form of
// series-name_S01 EP07 (11h22m33s44ms)
// or
// movie-name EP (11h22m33s44ms)
if (1 !== preg_match(
'/(?<name>[a-z\-_]+)(_S)?(?<season>\d*) EP(?<episode>\d*) \((?<time>.*)\)/n',
$notes,
$matches,
)) {
return null;
}
// Remove number-indexed matches, cast numbers to integers
$matches = [
'name' => $matches['name'],
'time' => $matches['time'],
// NOTE: intval returns 0 if not a number, which is false-like
'season' => intval($matches['season']) ?: null,
'episode' => intval($matches['episode']) ?: null,
];
// Parse time into a DateInterval and replace it in the matches array
$time = new \DateInterval('PT0S');
preg_match('/(\d+)ms/', $matches['time'], $milliseconds);
preg_match('/(\d+)s/', $matches['time'], $seconds);
preg_match('/(\d+)m/', $matches['time'], $minutes);
preg_match('/(\d+)h/', $matches['time'], $hours);
if ($milliseconds[1] ?? false) $time->f = $milliseconds[1] * 1000;
if ($seconds[1] ?? false) $time->s = $seconds[1];
if ($minutes[1] ?? false) $time->i = $minutes[1];
if ($hours[1] ?? false) $time->h = $hours[1];
$matches['time'] = $time;
return $matches;
}
// ---------------------------------------------------- Derived methods ---
public function getCreatedAt(): \DateTimeImmutable
{

177
src/Entity/SentenceNote.php Normal file
View File

@ -0,0 +1,177 @@
<?php
namespace App\Entity;
class SentenceNote extends Note
{
const MODEL_NAME = 'Japanese sentences';
private ?array $mediaInfo = [];
private array $terms = [];
// -------------------------------------------------- Getters & setters ---
public function getTerms(): array
{
return $this->terms;
}
public function setTerms(array $terms): static
{
$this->terms = $terms;
return $this;
}
// ------------------------------------------------------- Anki-related ---
public static function fromAnki(array $noteInfo): static
{
$note = parent::fromAnki($noteInfo);
if ($note->getModel() !== self::MODEL_NAME) {
throw new \Exception('Trying to parse wrong model');
}
$note->mediaInfo = self::parseMediaInfo($note->fields['Notes']);
// Set VocabKanji field
$note->terms = Term::fromNoteFields($note->fields);
// If unable to, create them from the highlighted parts in the sentence
if (empty($note->terms)) {
foreach ($note->getHighlightedKanji() as $highlighedKanji) {
$term = new Term();
$term->kanji = $highlighedKanji;
$term->definitionEn = null;
$term->definitionJp = null;
$note->terms[] = $term;
}
}
// Set to null whatever is null
$readings = array_map(
fn($x) => in_array($x, ['_', '_', '']) ? null : $x,
explode('', $note->fields['VocabFurigana']),
);
// Set readings from furigana field
foreach ($note->terms as $key => &$term) {
if (null === $term->getReading()) {
if (null !== ($readings[$key] ?? null)) {
$term->kanji .= '[' . $readings[$key] . ']';
}
}
}
return $note;
}
public function toAnki(): array
{
return array_merge(parent::toAnki(), [
'fields' => [
'VocabKanji' => join('', array_map(
fn(Term $x) => $x->getKanji(),
$this->terms,
)),
'VocabFurigana' => join('', array_map(
fn(Term $x) => $x->getReading() ?? '_',
$this->terms,
)),
'VocabDef' => join("<br>\n", array_map(
fn(Term $x) => $x->toAnkiVocabDef(),
$this->terms,
)),
],
]);
}
// ---------------------------------------------------- Derived methods ---
public function hasTerm(string $kanji): bool
{
foreach ($this->terms as $term) {
assert($term instanceof Term);
if ($term->kanji == $kanji) return true;
}
return false;
}
public function isSentKanjiHighlighted(): bool
{
return str_contains(
$this->fields['SentKanji'],
self::HIGHLIGHT_ATTR_KANJI,
);
}
/** Return an array of strings with the highlighted kanji in the SentKanji */
public function getHighlightedKanji(): array
{
$ret = [];
$matches = [];
// 1. Get all spans in the text
preg_match_all(
self::HIGHLIGHT_PATTERN,
$this->fields['SentKanji'],
$matches,
PREG_SET_ORDER,
);
// 2. Check the ones that match with the kanji color
foreach ($matches as $match) {
if ($match[1] === self::HIGHLIGHT_ATTR_KANJI) {
$ret[] = mb_trim($match[2]);
}
}
return $ret;
}
private static function parseMediaInfo(string $notes): ?array
{
$matches = null;
// Parse the notes fields. It can be in the form of
// series-name_S01 EP07 (11h22m33s44ms)
// or
// movie-name EP (11h22m33s44ms)
if (1 !== preg_match(
'/(?<name>[0-9A-Za-z\-_]+)(_S)?(?<season>\d*) EP(?<episode>\d*) \((?<time>.*)\)/n',
$notes,
$matches,
)) {
return null;
}
// Remove number-indexed matches, cast numbers to integers
$matches = [
'name' => $matches['name'],
'time' => $matches['time'],
// NOTE: intval returns 0 if not a number, which is false-like
'season' => intval($matches['season']) ?: null,
'episode' => intval($matches['episode']) ?: null,
];
// Parse time into a DateInterval and replace it in the matches array
$time = new \DateInterval('PT0S');
preg_match('/(\d+)ms/', $matches['time'], $milliseconds);
preg_match('/(\d+)s/', $matches['time'], $seconds);
preg_match('/(\d+)m/', $matches['time'], $minutes);
preg_match('/(\d+)h/', $matches['time'], $hours);
if ($milliseconds[1] ?? false) $time->f = $milliseconds[1] * 1000;
if ($seconds[1] ?? false) $time->s = $seconds[1];
if ($minutes[1] ?? false) $time->i = $minutes[1];
if ($hours[1] ?? false) $time->h = $hours[1];
$matches['time'] = $time;
return $matches;
}
}

View File

@ -3,6 +3,7 @@
namespace App\Service;
use App\Entity\Note;
use App\Entity\SentenceNote;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class AnkiService
@ -30,7 +31,7 @@ class AnkiService
return $result;
}
public function getAllNoteIds(): array
public function getAllSentenceNoteIds(): array
{
return $this->request(
'findNotes',
@ -60,19 +61,34 @@ class AnkiService
public function getNotes(array $nids): array
{
return array_map(Note::fromAnki(...), $this->getNotesInfo($nids));
return array_map(self::parseNoteInfo(...), $this->getNotesInfo($nids));
}
public function getNote(int $nid): ?Note
{
return Note::fromAnki($this->getNoteInfo($nid));
$noteInfo = $this->getNoteInfo($nid)
?? throw new \Exception("Note $nid not found.");
return self::parseNoteInfo($noteInfo);
}
private static function parseNoteInfo(array $noteInfo): Note
{
return match ($noteInfo['modelName']) {
SentenceNote::MODEL_NAME => SentenceNote::fromAnki($noteInfo),
default => throw new \Exception(sprintf(
'Unrecognized Note "%s" of type "%s"',
$noteInfo['noteId'],
$noteInfo['modelName'],
))
};
}
public function getLatestNote(): ?Note
{
// NoteIDs are just timestamps in milliseconds, so the latest is just
// the biggest numerically
$latestId = max($this->getAllNoteIds());
$latestId = max($this->getAllSentenceNoteIds());
return $this->getNote($latestId);
}