feat: More intelligent term detection mechanism supporting plain def.
This commit is contained in:
		
							parent
							
								
									9a59845473
								
							
						
					
					
						commit
						ee2525797e
					
				| 
						 | 
				
			
			@ -67,8 +67,39 @@ class Note
 | 
			
		|||
        $note->mediaInfo = $note->parseMediaInfo($note->fields['Notes']);
 | 
			
		||||
 | 
			
		||||
        // Set VocabKanji field
 | 
			
		||||
        //$vocabKanji = explode('|', $note->fields['VocabKanji']);
 | 
			
		||||
        $note->terms = self::parseVocabDef($note->fields['VocabDef']);
 | 
			
		||||
        $terms = self::parseVocabDef($note->fields['VocabDef']);
 | 
			
		||||
        if (null !== $terms) {
 | 
			
		||||
            $note->terms = $terms;
 | 
			
		||||
        } else {
 | 
			
		||||
            // Something went wrong when trying to parse the definitions into
 | 
			
		||||
            // terms, that means its format is non-conforming. If there's only
 | 
			
		||||
            // one term that means that it's an old one that wasn't updated.
 | 
			
		||||
            if (
 | 
			
		||||
                str_contains($note->fields['VocabKanji'], '|') or
 | 
			
		||||
                str_contains($note->fields['VocabKanji'], '|')
 | 
			
		||||
            ) {
 | 
			
		||||
                dump("ERROR: Multiple vocab kanjis with no proper definition.");
 | 
			
		||||
                dd($note->fields);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (mb_trim($note->fields['VocabDef']) === '') {
 | 
			
		||||
                $note->fields['VocabDef'] = '_';
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // Make the "<def>" turn into "<kanji>:<def>". Select the
 | 
			
		||||
            // appropriate semicolon character for each
 | 
			
		||||
            $separator = ':';
 | 
			
		||||
            if (preg_match('/[[:alpha:]]/u', $note->fields['VocabDef'])) {
 | 
			
		||||
                $separator = ':';
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            $note->fields['VocabDef'] = $note->fields['VocabKanji']
 | 
			
		||||
                . $separator
 | 
			
		||||
                . $note->fields['VocabDef'];
 | 
			
		||||
 | 
			
		||||
            $terms = self::parseVocabDef($note->fields['VocabDef']);
 | 
			
		||||
            $note->terms = $terms ?? dd($note->fields['VocabDef']);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // If not defined, find them from the highlighted parts in the sentence
 | 
			
		||||
        if (empty($note->terms)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -130,15 +161,15 @@ class Note
 | 
			
		|||
        ];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static function parseVocabDef(string $vocabDef): array
 | 
			
		||||
    public static function parseVocabDef(string $vocabDef): ?array
 | 
			
		||||
    {
 | 
			
		||||
        if (mb_trim($vocabDef) == "") return [];
 | 
			
		||||
        if (mb_trim($vocabDef) === '') return null;
 | 
			
		||||
 | 
			
		||||
        $terms = [];
 | 
			
		||||
 | 
			
		||||
        foreach (preg_split('|<br ?/?>|', $vocabDef) as $line) {
 | 
			
		||||
            $term = Term::fromVocabDefLine(strip_tags($line));
 | 
			
		||||
            if (null === $term) dd("error parsing term", $line);
 | 
			
		||||
            // Error parsing term, can't parse using vocabDef
 | 
			
		||||
            if (null === $term) return null;
 | 
			
		||||
            $terms[] = $term;
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue