anker/src/Entity/Term.php

145 lines
4.7 KiB
PHP
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace App\Entity;
class Term
{
public ?string $kanji;
public ?string $definitionJp;
public ?string $definitionEn;
public function getReading(): ?string
{
return self::parseFurigana($this->kanji)['reading'];
}
public function getKanji(): string
{
return self::parseFurigana($this->kanji)['kanji'];
}
public static function parseFurigana(string $furigana): array
{
// 0: all, 1: (kanji/hiragana), 2: ([reading]): 3: (reading)
preg_match_all('/([^ \[]+)(\[([^\]]*)\])? ?/', $furigana, $matches, PREG_SET_ORDER);
$matchedKanji = array_map(fn($x) => $x[1], $matches);
$matchedReading = array_map(fn($x) => $x[3] ?? $x[1], $matches);
return [
'kanji' => join('', $matchedKanji),
'reading' => $matchedKanji == $matchedReading
? null
: join('', $matchedReading),
];
}
public function toAnkiVocabDef()
{
$ret = '<span ' . Note::HIGHLIGHT_ATTR_KANJI . '>' . $this->kanji;
$ret .= match ([null !== $this->definitionJp, null !== $this->definitionEn]) {
[false, false] => '</span>_',
[false, true] => ':</span> ' . $this->definitionEn,
[true, false] => '</span>' . $this->definitionJp,
[true, true] => '</span>' . $this->definitionJp . '<span style="color: #aacebe;">(' . $this->definitionEn . ')</span>',
};
return $ret;
}
private static function fromVocabDefLine(string $vocabDefLine): ?Term
{
$term = new Term();
// ------------------------------------------------------ Get Kanji ---
$jpStart = mb_strpos($vocabDefLine, '');
$enStart = mb_strpos($vocabDefLine, ':');
// Get the kanji, as it may not be in the same order for some reason
if (false !== $jpStart) {
$term->kanji = mb_substr($vocabDefLine, 0, $jpStart);
$def = mb_substr($vocabDefLine, $jpStart + 1, null);
$jpStart = 0;
} elseif (false !== $enStart) {
$term->kanji = mb_substr($vocabDefLine, 0, $enStart);
$def = mb_substr($vocabDefLine, $enStart + 1, null);
$enStart = 0;
} else {
// Can't extract term from definition, it doesn't conform to the
// established pattern.
return null;
}
// Convert 「this」 into [this]
$term->kanji = mb_trim(strtr($term->kanji, [
'「' => '[',
'」' => ']',
' ' => ' ',
]));
$def = mb_trim($def);
if (!is_string($term->kanji)) {
return null;
}
// -------------------------------------------------- No definition ---
// Special case where there's no definitions
if ($def === '' or $def === '_' or $def === '_') {
$term->definitionJp = null;
$term->definitionEn = null;
return $term;
}
// This means there's both en and jp
$parentStart = mb_strpos($def, '(');
// -------------------------------------------------- Only Japanese ---
if (false !== $jpStart and false === $parentStart) {
// It's all japanese, start to finish
$term->definitionJp = mb_trim(mb_substr($def, 0));
$term->definitionEn = null;
return $term;
}
// -------------------------------------- Both Japanese and English ---
if (false !== $jpStart and false !== $parentStart) {
$term->definitionJp = mb_trim(mb_substr($def, 0, $parentStart));
// -1 to remove the parenthesis end
$term->definitionEn = mb_trim(mb_substr($def, $parentStart + 1, -1));
return $term;
}
// --------------------------------------------------- Only english ---
if (false !== $enStart) {
$term->definitionJp = null;
$term->definitionEn = mb_trim(mb_substr($def, 0));
return $term;
}
// ------------------------------------------------- Unvalid syntax ---
dd("Unexpected error, couldn't parse definition line", $vocabDefLine);
}
public static function fromVocabDef(string $vocabDef): ?array
{
if (mb_trim($vocabDef) === '') return null;
$terms = [];
foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) {
$term = self::fromVocabDefLine(strip_tags($line));
// Error parsing term, can't parse using vocabDef
if (null === $term) return null;
$terms[] = $term;
};
return $terms;
}
}