package org.exmaralda.tagging;

import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.exmaralda.exakt.exmaraldaSearch.COMACorpusWalker;
import org.jdom.Document;
import org.jdom.JDOMException;

/* loaded from: input_file:org/exmaralda/tagging/CustomizedCorpusTreeTagger.class */
public class CustomizedCorpusTreeTagger extends COMACorpusWalker {
    TreeTagger treeTagger;
    String segmentationXPath;
    String tokenXPath;
    boolean writeSextantFiles;
    boolean integrate;
    String suffix;
    public static String TTC = "c:\\TreeTagger";
    public static String PF = "c:\\TreeTagger\\lib\\german-utf8.par";
    public static String ENC = "UTF-8";
    public static String[] OPT = {"-token", "-lemma", "-sgml", "-no-unknown", "", ""};

    public CustomizedCorpusTreeTagger(File file, TreeTagger treeTagger, String str, String str2, boolean z, boolean z2, String str3) throws IOException, JDOMException {
        readCorpus(file);
        this.treeTagger = treeTagger;
        this.segmentationXPath = str;
        this.tokenXPath = str2;
        this.writeSextantFiles = z;
        this.integrate = z2;
        this.suffix = str3;
    }

    File getOutputFile(File file) throws IOException {
        String name = file.getName();
        if (name.contains(".")) {
            name = file.getName().substring(0, file.getName().lastIndexOf("."));
        }
        if (this.writeSextantFiles) {
            return new File(file.getParentFile(), name + this.suffix + ".esa");
        }
        File createTempFile = File.createTempFile(name + this.suffix, ".esa");
        createTempFile.deleteOnExit();
        return createTempFile;
    }

    @Override // org.exmaralda.exakt.exmaraldaSearch.COMACorpusWalker
    public void processTranscription(Document document) throws JDOMException {
        try {
            File file = new File(this.currentPath);
            TreeTaggableSegmentedTranscription treeTaggableSegmentedTranscription = new TreeTaggableSegmentedTranscription(document, file, this.segmentationXPath, this.tokenXPath) { // from class: org.exmaralda.tagging.CustomizedCorpusTreeTagger.1
                @Override // org.exmaralda.tagging.TreeTaggableSegmentedTranscription
                public String normalizeToken(String str) {
                    return str.replace(":", "");
                }
            };
            File outputFile = getOutputFile(file);
            this.treeTagger.tag(treeTaggableSegmentedTranscription, outputFile);
            if (this.integrate) {
                SextantIntegrator sextantIntegrator = new SextantIntegrator(this.currentPath);
                sextantIntegrator.integrate(outputFile.getAbsolutePath());
                sextantIntegrator.writeDocument(this.currentPath);
            }
        } catch (IOException e) {
            e.printStackTrace();
            throw new JDOMException(e.getLocalizedMessage());
        }
    }

    public void tagCorpus() throws IOException, JDOMException {
        fireCorpusInit(0.1d, "Initialising tagger...");
        walk(SEGMENTED_TRANSCRIPTIONS);
    }

    public static void main(String[] strArr) {
        try {
            new CustomizedCorpusTreeTagger(new File("C:\\Users\\Schmidt\\ownCloud\\Shared\\ModiKo\\Datengrundlage\\MoDiKo-Gesamtkorpus-1\\modiko-gesamtkorpus-1.coma"), new TreeTagger(TTC, PF, ENC, OPT), TaggingProfiles.GENERIC_WORDS_PUNCTUATION_SEGMENTATION_XPATH, TaggingProfiles.GENERIC_WORDS_PUNCTUATION_TOKEN_XPATH, true, true, "_POS").tagCorpus();
        } catch (JDOMException e) {
            Logger.getLogger(CustomizedCorpusTreeTagger.class.getName()).log(Level.SEVERE, (String) null, e);
        } catch (IOException e2) {
            Logger.getLogger(CustomizedCorpusTreeTagger.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
        }
    }
}
