package org.exmaralda.tagging;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;

/* loaded from: input_file:org/exmaralda/tagging/CleanDerekoLexicon.class */
public class CleanDerekoLexicon {
    String INPUT = "Z:\\TAGGING\\WÖRTERBÜCHER\\DeReKo-2014-II-MainArchive-STT.100000.freq";
    String OUTPUT_GOOD = "Z:\\TAGGING\\WÖRTERBÜCHER\\DeReKo_GOOD.freq";
    String OUTPUT_BAD = "Z:\\TAGGING\\WÖRTERBÜCHER\\DeReKo_BAD.freq";
    ArrayList<String[]> allEntries = new ArrayList<>();
    ArrayList<String[]> badEntries = new ArrayList<>();
    ArrayList<String[]> additionalEntries = new ArrayList<>();

    public static void main(String[] strArr) {
        try {
            new CleanDerekoLexicon().doit();
        } catch (FileNotFoundException e) {
            Logger.getLogger(CleanDerekoLexicon.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
        } catch (UnsupportedEncodingException e2) {
            Logger.getLogger(CleanDerekoLexicon.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
        } catch (IOException e3) {
            Logger.getLogger(CleanDerekoLexicon.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e3);
        }
    }

    private void doit() throws FileNotFoundException, UnsupportedEncodingException, IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(this.INPUT)), "UTF-8"));
        System.out.println("Started reading document");
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            String str = readLine;
            if (readLine == null) {
                break;
            }
            if (i == 0 && str.charAt(0) == 65279) {
                str = str.substring(1);
            }
            this.allEntries.add(str.split("\t"));
            i++;
        }
        bufferedReader.close();
        System.out.println("Document read.");
        HashSet hashSet = new HashSet();
        for (String str2 : new String[]{"ADV", "KOKOM", "KON", "KOUI", "KOUS", "PWAV", "SEQ", "SPELL", "NGHES", "NGONO", "NGIRR", "NGONO", "PTKVZ", "PWAV", "APPR", "SEQU", "PTKMA", "NGAKW", "ART", "FM", "XY"}) {
            hashSet.add(str2);
        }
        Iterator<String[]> it = this.allEntries.iterator();
        while (it.hasNext()) {
            String[] next = it.next();
            if (next.length != 4 || next[1].equalsIgnoreCase("UNKNOWN") || next[2].equals("XY") || !(next[2].equals("NE") || next[2].equals("NN") || !next[0].matches("^[A-ZÄÖÜ].*")) || (((next[2].equals("NE") || next[2].equals("NN")) && next[0].matches("^[a-zäöü].*")) || ((next[0].matches("^[A-ZÄÖÜ].*") && next[1].matches("^[a-zäöü].*")) || next[2].equals("VVIMP") || next[0].matches(".*[0-9].*") || next[0].matches(".*[\\%\\$].*") || next[0].matches("^[A-ZÄÖÜ]$") || next[2].equals("ADV") || next[2].equals("FM") || next[2].equals("PAV") || next[2].equals("PROAV") || ((!next[2].equals("NE") && next[0].matches("[A-ZÄÖÜ]+")) || next[0].contains(".") || hashSet.contains(next[2]) || next[2].startsWith("$"))))) {
                this.badEntries.add(next);
            }
        }
        Iterator<String[]> it2 = this.badEntries.iterator();
        while (it2.hasNext()) {
            this.allEntries.remove(it2.next());
        }
        Iterator<String[]> it3 = this.allEntries.iterator();
        while (it3.hasNext()) {
            String[] next2 = it3.next();
            if (next2[2].equals("PTKANT") || next2[2].equals("ITJ")) {
                next2[2] = "NGIRR";
            }
            if (next2[2].equals("PIS")) {
                this.additionalEntries.add(new String[]{next2[0], next2[1], "PIAT", next2[3]});
            }
            if (next2[2].equals("PIAT")) {
                this.additionalEntries.add(new String[]{next2[0], next2[1], "PIS", next2[3]});
            }
            if (next2[2].equals("PDS")) {
                this.additionalEntries.add(new String[]{next2[0], next2[1], "PDAT", next2[3]});
            }
            if (next2[2].equals("PDAT")) {
                this.additionalEntries.add(new String[]{next2[0], next2[1], "PDS", next2[3]});
            }
            if (next2[2].equals("PPOSAT")) {
                this.additionalEntries.add(new String[]{next2[0], next2[1], "PPOSS", next2[3]});
            }
            if (next2[2].equals("PPOSS")) {
                this.additionalEntries.add(new String[]{next2[0], next2[1], "PPOSAT", next2[3]});
            }
            if (next2[2].equals("VVINF")) {
                this.additionalEntries.add(new String[]{next2[0], next2[1], "VVFIN", next2[3]});
            }
        }
        this.allEntries.addAll(this.additionalEntries);
        write(this.allEntries, this.OUTPUT_GOOD);
        write(this.badEntries, this.OUTPUT_BAD);
    }

    private void write(ArrayList<String[]> arrayList, String str) throws FileNotFoundException, IOException {
        System.out.println("started writing document... ");
        FileOutputStream fileOutputStream = new FileOutputStream(new File(str));
        Iterator<String[]> it = arrayList.iterator();
        while (it.hasNext()) {
            String[] next = it.next();
            int i = 0;
            for (String str2 : next) {
                fileOutputStream.write(str2.getBytes("UTF-8"));
                i++;
                if (i < next.length) {
                    fileOutputStream.write("\t".getBytes("UTF-8"));
                } else {
                    fileOutputStream.write(System.getProperty("line.separator").getBytes("UTF-8"));
                }
            }
        }
        fileOutputStream.close();
        System.out.println("document written.");
    }
}
