diff --git a/src/Entity/Note.php b/src/Entity/Note.php index f3cd693..c9f9cf5 100644 --- a/src/Entity/Note.php +++ b/src/Entity/Note.php @@ -148,124 +148,3 @@ class Note return \DateTimeImmutable::createFromFormat('U', $this->mod); } } - -class Term -{ - public ?string $kanji; - public ?string $definitionJp; - public ?string $definitionEn; - - public function getReading(): ?string - { - return self::parseFurigana($this->kanji)['reading']; - } - - public function getKanji(): string - { - return self::parseFurigana($this->kanji)['kanji']; - } - - public static function parseFurigana(string $furigana): array - { - // 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading) - preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER); - - $matchedKanji = array_map(fn($x) => $x[1], $matches); - $matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches); - - - return [ - 'kanji' => join('', $matchedKanji), - 'reading' => $matchedKanji == $matchedReading - ? null - : join('', $matchedReading), - ]; - } - - public function toAnkiVocabDef() - { - $ret = '' . $this->kanji; - - $ret .= match ([null !== $this->definitionJp, null !== $this->definitionEn]) { - [false, false] => ':_', - [false, true] => ': ' . $this->definitionEn, - [true, false] => ':' . $this->definitionJp, - [true, true] => ':' . $this->definitionJp . '(' . $this->definitionEn . ')', - }; - - return $ret; - } - - public static function fromVocabDefLine(string $vocabDefLine): ?Term - { - $term = new Term(); - - // ------------------------------------------------------ Get Kanji --- - - $jpStart = mb_strpos($vocabDefLine, ':'); - $enStart = mb_strpos($vocabDefLine, ':'); - - // Get the kanji, as it may not be in the same order for some reason - if (false !== $jpStart) { - $term->kanji = mb_substr($vocabDefLine, 0, $jpStart); - $def = mb_substr($vocabDefLine, $jpStart + 1, null); - $jpStart = 0; - } elseif (false !== $enStart) { - $term->kanji = mb_substr($vocabDefLine, 0, $enStart); - $def = mb_substr($vocabDefLine, $enStart + 1, null); - $enStart = 0; - } - // Convert 「this」 into [this] - $term->kanji = mb_trim(strtr($term->kanji, [ - '「' => '[', - '」' => ']', - ' ' => ' ', - ])); - $def = mb_trim($def); - if (!is_string($term->kanji)) { - return null; - } - - // -------------------------------------------------- No definition --- - - // Special case where there's no definitions - if ($def === '' or $def === '_' or $def === '_') { - $term->definitionJp = null; - $term->definitionEn = null; - return $term; - } - - // This means there's both en and jp - $parentStart = mb_strpos($def, '('); - - // -------------------------------------------------- Only Japanese --- - - if (false !== $jpStart and false === $parentStart) { - // It's all japanese, start to finish - $term->definitionJp = mb_trim(mb_substr($def, 0)); - $term->definitionEn = null; - return $term; - } - - // -------------------------------------- Both Japanese and English --- - - if (false !== $jpStart and false !== $parentStart) { - $term->definitionJp = mb_trim(mb_substr($def, 0, $parentStart)); - // -1 to remove the parenthesis end - $term->definitionEn = mb_trim(mb_substr($def, $parentStart + 1, -1)); - return $term; - } - - // --------------------------------------------------- Only english --- - - if (false !== $enStart) { - $term->definitionJp = null; - $term->definitionEn = mb_trim(mb_substr($def, 0)); - return $term; - } - - // ------------------------------------------------- Unvalid syntax --- - - dd("Unexpected error, couldn't parse definition line", $vocabDefLine); - } -} diff --git a/src/Entity/Term.php b/src/Entity/Term.php new file mode 100644 index 0000000..1d95e7f --- /dev/null +++ b/src/Entity/Term.php @@ -0,0 +1,124 @@ +kanji)['reading']; + } + + public function getKanji(): string + { + return self::parseFurigana($this->kanji)['kanji']; + } + + public static function parseFurigana(string $furigana): array + { + // 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading) + preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER); + + $matchedKanji = array_map(fn($x) => $x[1], $matches); + $matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches); + + + return [ + 'kanji' => join('', $matchedKanji), + 'reading' => $matchedKanji == $matchedReading + ? null + : join('', $matchedReading), + ]; + } + + public function toAnkiVocabDef() + { + $ret = '' . $this->kanji; + + $ret .= match ([null !== $this->definitionJp, null !== $this->definitionEn]) { + [false, false] => ':_', + [false, true] => ': ' . $this->definitionEn, + [true, false] => ':' . $this->definitionJp, + [true, true] => ':' . $this->definitionJp . '(' . $this->definitionEn . ')', + }; + + return $ret; + } + + public static function fromVocabDefLine(string $vocabDefLine): ?Term + { + $term = new Term(); + + // ------------------------------------------------------ Get Kanji --- + + $jpStart = mb_strpos($vocabDefLine, ':'); + $enStart = mb_strpos($vocabDefLine, ':'); + + // Get the kanji, as it may not be in the same order for some reason + if (false !== $jpStart) { + $term->kanji = mb_substr($vocabDefLine, 0, $jpStart); + $def = mb_substr($vocabDefLine, $jpStart + 1, null); + $jpStart = 0; + } elseif (false !== $enStart) { + $term->kanji = mb_substr($vocabDefLine, 0, $enStart); + $def = mb_substr($vocabDefLine, $enStart + 1, null); + $enStart = 0; + } + // Convert 「this」 into [this] + $term->kanji = mb_trim(strtr($term->kanji, [ + '「' => '[', + '」' => ']', + ' ' => ' ', + ])); + $def = mb_trim($def); + if (!is_string($term->kanji)) { + return null; + } + + // -------------------------------------------------- No definition --- + + // Special case where there's no definitions + if ($def === '' or $def === '_' or $def === '_') { + $term->definitionJp = null; + $term->definitionEn = null; + return $term; + } + + // This means there's both en and jp + $parentStart = mb_strpos($def, '('); + + // -------------------------------------------------- Only Japanese --- + + if (false !== $jpStart and false === $parentStart) { + // It's all japanese, start to finish + $term->definitionJp = mb_trim(mb_substr($def, 0)); + $term->definitionEn = null; + return $term; + } + + // -------------------------------------- Both Japanese and English --- + + if (false !== $jpStart and false !== $parentStart) { + $term->definitionJp = mb_trim(mb_substr($def, 0, $parentStart)); + // -1 to remove the parenthesis end + $term->definitionEn = mb_trim(mb_substr($def, $parentStart + 1, -1)); + return $term; + } + + // --------------------------------------------------- Only english --- + + if (false !== $enStart) { + $term->definitionJp = null; + $term->definitionEn = mb_trim(mb_substr($def, 0)); + return $term; + } + + // ------------------------------------------------- Unvalid syntax --- + + dd("Unexpected error, couldn't parse definition line", $vocabDefLine); + } +}