feat: Interpret terms from non-conformant defintions
This commit is contained in:
		
							parent
							
								
									6fc6307f6e
								
							
						
					
					
						commit
						c19250e29e
					
				|  | @ -67,39 +67,7 @@ class Note | ||||||
|         $note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']); |         $note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']); | ||||||
| 
 | 
 | ||||||
|         // Set VocabKanji field
 |         // Set VocabKanji field
 | ||||||
|         $terms = Term::fromVocabDef($note->fields['VocabDef']); |         $terms = Term::fromNoteFields($note->fields); | ||||||
|         if (null !== $terms) { |  | ||||||
|             $note->terms = $terms; |  | ||||||
|         } else { |  | ||||||
|             // Something went wrong when trying to parse the definitions into
 |  | ||||||
|             // terms, that means its format is non-conforming. If there's only
 |  | ||||||
|             // one term that means that it's an old one that wasn't updated.
 |  | ||||||
|             if ( |  | ||||||
|                 str_contains($note->fields['VocabKanji'], '|') or |  | ||||||
|                 str_contains($note->fields['VocabKanji'], '|') |  | ||||||
|             ) { |  | ||||||
|                 dump("ERROR: Multiple vocab kanjis with no proper definition."); |  | ||||||
|                 dd($note->fields); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             if (mb_trim($note->fields['VocabDef']) === '') { |  | ||||||
|                 $note->fields['VocabDef'] = '_'; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // Make the "<def>" turn into "<kanji>:<def>". Select the
 |  | ||||||
|             // appropriate semicolon character for each
 |  | ||||||
|             $separator = ':'; |  | ||||||
|             if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) { |  | ||||||
|                 $separator = ':'; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             $note->fields['VocabDef'] = $note->fields['VocabKanji'] |  | ||||||
|                 . $separator |  | ||||||
|                 . $note->fields['VocabDef']; |  | ||||||
| 
 |  | ||||||
|             $terms = Term::fromVocabDef($note->fields['VocabDef']); |  | ||||||
|             $note->terms = $terms ?? dd($note->fields['VocabDef']); |  | ||||||
|         } |  | ||||||
| 
 | 
 | ||||||
|         // If not defined, find them from the highlighted parts in the sentence
 |         // If not defined, find them from the highlighted parts in the sentence
 | ||||||
|         if (empty($note->terms)) { |         if (empty($note->terms)) { | ||||||
|  |  | ||||||
|  | @ -127,18 +127,46 @@ class Term | ||||||
|         dd("Unexpected error, couldn't parse definition line", $vocabDefLine); |         dd("Unexpected error, couldn't parse definition line", $vocabDefLine); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     public static function fromVocabDef(string $vocabDef): ?array |     /** Turns a def like "<def>" turn into "<kanji>:<def>" */ | ||||||
|  |     private static function fromLegacyLine(string $kanji, string $def): ?Term | ||||||
|     { |     { | ||||||
|         if (mb_trim($vocabDef) === '') return null; |         // Select appropriate semicolon for the job
 | ||||||
|  |         $separator = preg_match('/[[:alpha:]]/u', $def) | ||||||
|  |             ? ': ' | ||||||
|  |             : ':'; | ||||||
|  | 
 | ||||||
|  |         // Stick the kanji at the start and see if it makes sense
 | ||||||
|  |         return Term::fromVocabDefLine($kanji . $separator . $def); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     public static function fromNoteFields(array $fields): ?array | ||||||
|  |     { | ||||||
|  |         // -------------------- Trying to extract it with the modern syntax ---
 | ||||||
|  |         // 言葉: word
 | ||||||
|  |         // 上げる:上に動くこと。
 | ||||||
|  |         // 雨:水粒を降ること。(rain)
 | ||||||
| 
 | 
 | ||||||
|         $terms = []; |         $terms = []; | ||||||
|         foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) { |         foreach (preg_split('|<br ?/?>|', $fields['VocabDef']) as $line) { | ||||||
|             $term = self::fromVocabDefLine(strip_tags($line)); |             $terms[] = self::fromVocabDefLine(strip_tags($line)); | ||||||
|             // Error parsing term, can't parse using vocabDef
 |  | ||||||
|             if (null === $term) return null; |  | ||||||
|             $terms[] = $term; |  | ||||||
|         }; |         }; | ||||||
|  |         // If there's no nulls, everything went good
 | ||||||
|  |         if (!in_array(null, $terms, true)) return $terms; | ||||||
| 
 | 
 | ||||||
|         return $terms; | 
 | ||||||
|  |         // ------------ Extracting failed, try to infer from other syntaxes ---
 | ||||||
|  | 
 | ||||||
|  |         $kanjis = explode('|', $fields['VocabKanji']); | ||||||
|  |         $defs   = explode('|', $fields['VocabDef']); | ||||||
|  |         // Number of legacy definitions is different from number of kanji
 | ||||||
|  |         if (count($kanjis) !== count($defs)) return null; | ||||||
|  | 
 | ||||||
|  |         $terms = []; | ||||||
|  |         foreach (array_combine($kanjis, $defs) as $kanji => $def) { | ||||||
|  |             $terms[] = self::fromLegacyLine($kanji, $def); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Search for nulls, if found, it's owari da
 | ||||||
|  |         return in_array(null, $terms, true) ? null : $terms; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue