chore: Move Term Class to its own file
This commit is contained in:
parent
39e6bc9af5
commit
e5f893a43d
|
@ -148,124 +148,3 @@ class Note
|
||||||
return \DateTimeImmutable::createFromFormat('U', $this->mod);
|
return \DateTimeImmutable::createFromFormat('U', $this->mod);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class Term
|
|
||||||
{
|
|
||||||
public ?string $kanji;
|
|
||||||
public ?string $definitionJp;
|
|
||||||
public ?string $definitionEn;
|
|
||||||
|
|
||||||
public function getReading(): ?string
|
|
||||||
{
|
|
||||||
return self::parseFurigana($this->kanji)['reading'];
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getKanji(): string
|
|
||||||
{
|
|
||||||
return self::parseFurigana($this->kanji)['kanji'];
|
|
||||||
}
|
|
||||||
|
|
||||||
public static function parseFurigana(string $furigana): array
|
|
||||||
{
|
|
||||||
// 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading)
|
|
||||||
preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER);
|
|
||||||
|
|
||||||
$matchedKanji = array_map(fn($x) => $x[1], $matches);
|
|
||||||
$matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches);
|
|
||||||
|
|
||||||
|
|
||||||
return [
|
|
||||||
'kanji' => join('', $matchedKanji),
|
|
||||||
'reading' => $matchedKanji == $matchedReading
|
|
||||||
? null
|
|
||||||
: join('', $matchedReading),
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
public function toAnkiVocabDef()
|
|
||||||
{
|
|
||||||
$ret = '<span ' . Note::HIGHLIGHT_ATTR_KANJI . '>' . $this->kanji;
|
|
||||||
|
|
||||||
$ret .= match ([null !== $this->definitionJp, null !== $this->definitionEn]) {
|
|
||||||
[false, false] => ':</span>_',
|
|
||||||
[false, true] => ':</span> ' . $this->definitionEn,
|
|
||||||
[true, false] => ':</span>' . $this->definitionJp,
|
|
||||||
[true, true] => ':</span>' . $this->definitionJp . '<span style="color: #aacebe;">(' . $this->definitionEn . ')</span>',
|
|
||||||
};
|
|
||||||
|
|
||||||
return $ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static function fromVocabDefLine(string $vocabDefLine): ?Term
|
|
||||||
{
|
|
||||||
$term = new Term();
|
|
||||||
|
|
||||||
// ------------------------------------------------------ Get Kanji ---
|
|
||||||
|
|
||||||
$jpStart = mb_strpos($vocabDefLine, ':');
|
|
||||||
$enStart = mb_strpos($vocabDefLine, ':');
|
|
||||||
|
|
||||||
// Get the kanji, as it may not be in the same order for some reason
|
|
||||||
if (false !== $jpStart) {
|
|
||||||
$term->kanji = mb_substr($vocabDefLine, 0, $jpStart);
|
|
||||||
$def = mb_substr($vocabDefLine, $jpStart + 1, null);
|
|
||||||
$jpStart = 0;
|
|
||||||
} elseif (false !== $enStart) {
|
|
||||||
$term->kanji = mb_substr($vocabDefLine, 0, $enStart);
|
|
||||||
$def = mb_substr($vocabDefLine, $enStart + 1, null);
|
|
||||||
$enStart = 0;
|
|
||||||
}
|
|
||||||
// Convert 「this」 into [this]
|
|
||||||
$term->kanji = mb_trim(strtr($term->kanji, [
|
|
||||||
'「' => '[',
|
|
||||||
'」' => ']',
|
|
||||||
' ' => ' ',
|
|
||||||
]));
|
|
||||||
$def = mb_trim($def);
|
|
||||||
if (!is_string($term->kanji)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------- No definition ---
|
|
||||||
|
|
||||||
// Special case where there's no definitions
|
|
||||||
if ($def === '' or $def === '_' or $def === '_') {
|
|
||||||
$term->definitionJp = null;
|
|
||||||
$term->definitionEn = null;
|
|
||||||
return $term;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This means there's both en and jp
|
|
||||||
$parentStart = mb_strpos($def, '(');
|
|
||||||
|
|
||||||
// -------------------------------------------------- Only Japanese ---
|
|
||||||
|
|
||||||
if (false !== $jpStart and false === $parentStart) {
|
|
||||||
// It's all japanese, start to finish
|
|
||||||
$term->definitionJp = mb_trim(mb_substr($def, 0));
|
|
||||||
$term->definitionEn = null;
|
|
||||||
return $term;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------- Both Japanese and English ---
|
|
||||||
|
|
||||||
if (false !== $jpStart and false !== $parentStart) {
|
|
||||||
$term->definitionJp = mb_trim(mb_substr($def, 0, $parentStart));
|
|
||||||
// -1 to remove the parenthesis end
|
|
||||||
$term->definitionEn = mb_trim(mb_substr($def, $parentStart + 1, -1));
|
|
||||||
return $term;
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------- Only english ---
|
|
||||||
|
|
||||||
if (false !== $enStart) {
|
|
||||||
$term->definitionJp = null;
|
|
||||||
$term->definitionEn = mb_trim(mb_substr($def, 0));
|
|
||||||
return $term;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ------------------------------------------------- Unvalid syntax ---
|
|
||||||
|
|
||||||
dd("Unexpected error, couldn't parse definition line", $vocabDefLine);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,124 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Entity;
|
||||||
|
|
||||||
|
class Term
|
||||||
|
{
|
||||||
|
public ?string $kanji;
|
||||||
|
public ?string $definitionJp;
|
||||||
|
public ?string $definitionEn;
|
||||||
|
|
||||||
|
public function getReading(): ?string
|
||||||
|
{
|
||||||
|
return self::parseFurigana($this->kanji)['reading'];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getKanji(): string
|
||||||
|
{
|
||||||
|
return self::parseFurigana($this->kanji)['kanji'];
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function parseFurigana(string $furigana): array
|
||||||
|
{
|
||||||
|
// 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading)
|
||||||
|
preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER);
|
||||||
|
|
||||||
|
$matchedKanji = array_map(fn($x) => $x[1], $matches);
|
||||||
|
$matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches);
|
||||||
|
|
||||||
|
|
||||||
|
return [
|
||||||
|
'kanji' => join('', $matchedKanji),
|
||||||
|
'reading' => $matchedKanji == $matchedReading
|
||||||
|
? null
|
||||||
|
: join('', $matchedReading),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function toAnkiVocabDef()
|
||||||
|
{
|
||||||
|
$ret = '<span ' . Note::HIGHLIGHT_ATTR_KANJI . '>' . $this->kanji;
|
||||||
|
|
||||||
|
$ret .= match ([null !== $this->definitionJp, null !== $this->definitionEn]) {
|
||||||
|
[false, false] => ':</span>_',
|
||||||
|
[false, true] => ':</span> ' . $this->definitionEn,
|
||||||
|
[true, false] => ':</span>' . $this->definitionJp,
|
||||||
|
[true, true] => ':</span>' . $this->definitionJp . '<span style="color: #aacebe;">(' . $this->definitionEn . ')</span>',
|
||||||
|
};
|
||||||
|
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function fromVocabDefLine(string $vocabDefLine): ?Term
|
||||||
|
{
|
||||||
|
$term = new Term();
|
||||||
|
|
||||||
|
// ------------------------------------------------------ Get Kanji ---
|
||||||
|
|
||||||
|
$jpStart = mb_strpos($vocabDefLine, ':');
|
||||||
|
$enStart = mb_strpos($vocabDefLine, ':');
|
||||||
|
|
||||||
|
// Get the kanji, as it may not be in the same order for some reason
|
||||||
|
if (false !== $jpStart) {
|
||||||
|
$term->kanji = mb_substr($vocabDefLine, 0, $jpStart);
|
||||||
|
$def = mb_substr($vocabDefLine, $jpStart + 1, null);
|
||||||
|
$jpStart = 0;
|
||||||
|
} elseif (false !== $enStart) {
|
||||||
|
$term->kanji = mb_substr($vocabDefLine, 0, $enStart);
|
||||||
|
$def = mb_substr($vocabDefLine, $enStart + 1, null);
|
||||||
|
$enStart = 0;
|
||||||
|
}
|
||||||
|
// Convert 「this」 into [this]
|
||||||
|
$term->kanji = mb_trim(strtr($term->kanji, [
|
||||||
|
'「' => '[',
|
||||||
|
'」' => ']',
|
||||||
|
' ' => ' ',
|
||||||
|
]));
|
||||||
|
$def = mb_trim($def);
|
||||||
|
if (!is_string($term->kanji)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------- No definition ---
|
||||||
|
|
||||||
|
// Special case where there's no definitions
|
||||||
|
if ($def === '' or $def === '_' or $def === '_') {
|
||||||
|
$term->definitionJp = null;
|
||||||
|
$term->definitionEn = null;
|
||||||
|
return $term;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This means there's both en and jp
|
||||||
|
$parentStart = mb_strpos($def, '(');
|
||||||
|
|
||||||
|
// -------------------------------------------------- Only Japanese ---
|
||||||
|
|
||||||
|
if (false !== $jpStart and false === $parentStart) {
|
||||||
|
// It's all japanese, start to finish
|
||||||
|
$term->definitionJp = mb_trim(mb_substr($def, 0));
|
||||||
|
$term->definitionEn = null;
|
||||||
|
return $term;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------- Both Japanese and English ---
|
||||||
|
|
||||||
|
if (false !== $jpStart and false !== $parentStart) {
|
||||||
|
$term->definitionJp = mb_trim(mb_substr($def, 0, $parentStart));
|
||||||
|
// -1 to remove the parenthesis end
|
||||||
|
$term->definitionEn = mb_trim(mb_substr($def, $parentStart + 1, -1));
|
||||||
|
return $term;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------- Only english ---
|
||||||
|
|
||||||
|
if (false !== $enStart) {
|
||||||
|
$term->definitionJp = null;
|
||||||
|
$term->definitionEn = mb_trim(mb_substr($def, 0));
|
||||||
|
return $term;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------- Unvalid syntax ---
|
||||||
|
|
||||||
|
dd("Unexpected error, couldn't parse definition line", $vocabDefLine);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue