package normalization;

import ac.biu.nlp.nlp.general.configuration.ConfigurationException;
import ac.biu.nlp.nlp.general.configuration.ConfigurationParams;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:normalization/NumberNormalizer.class */
public class NumberNormalizer {
    private static final String VOCAB_EXP = "hundred|thousand|million|billion|trillion";
    private static final String VOCAB_TENS = "twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety";
    private static final String VOCAB_DIGITS = "one|two|three|four|five|six|seven|eight|nine|zero";
    private static String vocabNum = "twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|one|two|three|four|five|six|seven|eight|nine|zero|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen";
    private static String vocab = "(hundred|thousand|million|billion|trillion|" + vocabNum + ")";
    private static Hashtable<String, String> rules = null;
    NumberFormat nf;
    static final int MAX_LENGTH = 1000;

    public NumberNormalizer(File file) throws IOException {
        this.nf = null;
        rules = new Hashtable<>();
        loadRules(file);
        this.nf = NumberFormat.getInstance();
        this.nf.setGroupingUsed(false);
    }

    public NumberNormalizer(ConfigurationParams configurationParams) throws IOException, ConfigurationException {
        this.nf = null;
        rules = new Hashtable<>();
        loadRules(configurationParams.getFile("number-rules-file"));
        this.nf = NumberFormat.getInstance();
        this.nf.setGroupingUsed(false);
    }

    public String normalize(String str) throws Exception {
        return normalizeLine(str);
    }

    private void loadRules(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            String trim = readLine.trim();
            if (!trim.startsWith("#")) {
                String[] split = trim.split("\t");
                rules.put(split[0], split[1]);
            }
        }
    }

    private String normalizeLine(String str) {
        String replaceAll = str.replaceAll("[\n\r\f\t]", " ");
        if (replaceAll.length() > MAX_LENGTH) {
            int indexOf = replaceAll.indexOf(" ", 500);
            return (indexOf == -1 || indexOf > MAX_LENGTH) ? replaceAll : String.valueOf(normalizeLine(replaceAll.substring(0, indexOf))) + normalizeLine(replaceAll.substring(indexOf));
        }
        while (replaceAll.matches("(?s).*(\\d+)\\s+(\\d+).*")) {
            replaceAll = replaceAll.replaceAll("(\\d+)\\s+(\\d+)", "$1%%%$2");
        }
        while (replaceAll.matches("(?s).*(\\d+)(,(\\d{3}))+.*")) {
            replaceAll = replaceAll.replaceAll("(\\d+),(\\d{3})", "$1$2");
        }
        List<String> normalizeString = normalizeString("", replaceAll.replaceAll("(\\d+),(\\d{2})", "$1\\.$2").replaceAll("(?i)(euro|pound|yen|cent)(\\d+(\\.\\d+)?)", "$1 $2").replaceAll("(?i)(\\d+(\\.\\d+)?)bn\\b", "$1 billion").replaceAll("(?i)(\\$|\\$US |USD )(\\d+(\\.\\d+)?)m\\b", "$2 million dollars").replaceAll("(?i)(\\d+(\\.\\d+)?)m (dollars|US dollars|USD|\\$US)\\b", "$1 million dollars").replaceAll("(?i)(\\d+(\\.\\d+)?)k\\b", "$1 thousand").replaceAll("(?i)(" + vocabNum + "|" + VOCAB_EXP + ")-(" + VOCAB_EXP + ")", "$1 $2").replaceAll("(?i)(twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety)-(one|two|three|four|five|six|seven|eight|nine|zero)", "$1 $2").replaceAll("(?i)(\\d+(\\.\\d+)?|" + vocabNum + "|" + VOCAB_EXP + ") (" + VOCAB_EXP + ")s\\b", "$1 $3").replaceAll("(?i)(\\bhundred|thousand|million|billion|trillion\\b) and (\\b" + vocabNum + "\\b|\\d+)", "$1 $2"));
        String str2 = normalizeString.get(0);
        String str3 = normalizeString.get(1);
        String str4 = String.valueOf(".*?") + str2 + ".*?";
        while (true) {
            String str5 = str4;
            if (str2.length() <= 0) {
                break;
            }
            List<String> normalizeString2 = normalizeString(str5, str3);
            str2 = normalizeString2.get(0);
            str3 = normalizeString2.get(1);
            str4 = String.valueOf(str5) + str2 + ".*?";
        }
        String replaceAll2 = str3.replaceAll("(?i)(\\$|USD |\\$US)\\b((((" + vocab + "\\b)|(\\d+(\\.\\d+)?))\\s*)+)", "$2 dollars ").replaceAll("(\\d+)\\.0+\\b", "$1").replaceAll("(?i)(euro|pound|yen|cent) (\\d+(\\.\\d+)?)", "$2 $1").replaceAll("\\b1( |-)", "one$1");
        while (true) {
            String str6 = replaceAll2;
            if (!str6.matches(".*\\d+%%%\\d+")) {
                return str6.replaceAll(" +", " ");
            }
            replaceAll2 = str6.replaceAll("(\\d+)%%%(\\d+)", "$1 $2");
        }
    }

    private List<String> normalizeString(String str, String str2) {
        LinkedList linkedList = new LinkedList();
        Object obj = "";
        String str3 = "";
        Matcher matcher = Pattern.compile("(?i)" + str + "\\b.*?(-|(?:minus ))?(((\\b" + vocab + "\\b|\\d+(\\.\\d+)?)\\s*)+)").matcher(str2);
        if (matcher.find()) {
            int i = 1;
            if (matcher.group(1) != null) {
                i = -1;
                obj = "minus ";
            }
            String str4 = String.valueOf(obj) + matcher.group(2).trim();
            String str5 = str4;
            if (str5.startsWith("minus ")) {
                str5 = str5.replace("minus ", "");
                i = -1;
            }
            if (!str5.startsWith("0") || str5.startsWith("0.")) {
                Iterator<String> it2 = normalizeNum(str5).iterator();
                while (it2.hasNext()) {
                    str3 = String.valueOf(str3) + " " + this.nf.format(new Double(it2.next()).doubleValue() * i);
                }
                str2 = str2.replaceAll("\\b" + str4 + "\\b", str3.trim());
            }
        }
        linkedList.add(str3);
        linkedList.add(str2);
        return linkedList;
    }

    private String mergeElms(String str, String str2) {
        String str3 = str2;
        Matcher matcher = Pattern.compile(str).matcher(str3);
        while (matcher.find()) {
            if (matcher.group(1) != null) {
                str3 = str3.replace(matcher.group(1), String.valueOf(Integer.parseInt(matcher.group(2)) + Integer.parseInt(matcher.group(3))));
            }
        }
        return str3;
    }

    public List<String> normalizeNum(String str) {
        LinkedList linkedList = new LinkedList();
        Enumeration<String> keys = rules.keys();
        while (keys.hasMoreElements()) {
            String nextElement = keys.nextElement();
            str = str.replaceAll("(?i)\\b" + nextElement + "\\b", rules.get(nextElement));
        }
        String replaceAll = str.replaceAll("(?i)\\btrillions?\\b", "#000000000000#").replaceAll("(?i)\\bbillions?\\b", "#000000000#").replaceAll("(?i)\\bmillions?\\b", "#000000#").replaceAll("(?i)\\bthousands?\\b", "#000#").replaceAll("(?i)\\bhundreds?\\b", "#00#");
        Matcher matcher = Pattern.compile("(\\d+(\\.\\d+)) (#000+#)\\B").matcher(replaceAll);
        if (matcher.find()) {
            replaceAll = new StringBuilder().append((long) (Double.parseDouble(matcher.group(1)) * Long.parseLong("1" + matcher.group(3).replace("#", "")))).toString();
        }
        String replaceAll2 = replaceAll.replaceAll("(\\d+),(\\d+)", "$1$2");
        if (replaceAll2.matches("^#00+#.*")) {
            replaceAll2 = " 1 " + replaceAll2;
        }
        double d = 0.0d;
        String mergeElms = mergeElms("# ((\\d+) (\\d+))$", mergeElms("# ((\\d+) (\\d+)) #", mergeElms("^((\\d+) (\\d+)) #", replaceAll2)));
        for (String str2 : new String[]{"#000000000000#", "#000000000#", "#000000#", "#000#"}) {
            List<String> calcExp = calcExp(mergeElms, str2);
            double doubleValue = new Double(calcExp.get(0)).doubleValue();
            mergeElms = calcExp.get(1);
            d += doubleValue;
        }
        LinkedList linkedList2 = new LinkedList();
        String[] split = mergeElms.trim().split(" ");
        if (split.length > 1) {
            LinkedList linkedList3 = new LinkedList();
            for (int i = 0; i < split.length - 1; i++) {
                String str3 = split[i];
                String str4 = split[i + 1];
                if (!str3.matches("#0+#") && !str4.matches("#0+#") && str4.length() >= str3.length()) {
                    linkedList3.add(Integer.valueOf(i));
                }
            }
            linkedList2.clear();
            String str5 = "";
            int i2 = 0;
            Iterator it2 = linkedList3.iterator();
            while (it2.hasNext()) {
                int intValue = ((Integer) it2.next()).intValue();
                while (i2 <= intValue) {
                    str5 = String.valueOf(str5) + " " + split[i2];
                    i2++;
                }
                linkedList2.add(str5.replace("#", "").trim());
                str5 = "";
            }
            String str6 = "";
            for (int i3 = i2; i3 < split.length; i3++) {
                str6 = String.valueOf(str6) + " " + split[i3];
            }
            linkedList2.add(str6.replace("#", "").trim());
        } else {
            linkedList2.add(mergeElms);
        }
        for (int i4 = 0; i4 < linkedList2.size(); i4++) {
            linkedList.add(String.valueOf(d + calcFinalRemainder((String) linkedList2.get(i4))));
            d = 0.0d;
        }
        return linkedList;
    }

    private List<String> calcExp(String str, String str2) {
        LinkedList linkedList = new LinkedList();
        Matcher matcher = Pattern.compile("(.+?)\\s" + str2 + "\\s(.*)").matcher(str);
        if (!matcher.find()) {
            linkedList.add("0");
            linkedList.add(str);
            return linkedList;
        }
        String group = matcher.group(1);
        String group2 = matcher.group(2);
        if (group.length() == 0 && group2.length() > 0) {
            group = "1";
        }
        String[] split = group.replace("#", "").replace(" 0", "0").trim().split(" ");
        int length = split.length;
        String[] strArr = new String[length];
        for (int i = 0; i < length; i++) {
            strArr[i] = split[(length - i) - 1];
        }
        double d = 0.0d;
        for (String str3 : strArr) {
            d += new Double(str3.trim().replace("#", "")).doubleValue();
        }
        String sb = new StringBuilder().append(d).toString();
        linkedList.add(sb.matches("\\d+\\.\\d+") ? new StringBuilder().append(new Double(sb).doubleValue() * new Double("1" + str2.replace("#", "")).doubleValue()).toString() : String.valueOf(sb) + str2);
        linkedList.add(group2);
        return linkedList;
    }

    private double calcFinalRemainder(String str) {
        String[] split = str.replace(" 0", "0").trim().split(" ");
        int length = split.length;
        String[] strArr = new String[length];
        for (int i = 0; i < length; i++) {
            strArr[i] = split[(length - i) - 1];
        }
        double d = 0.0d;
        for (String str2 : strArr) {
            String trim = str2.replace("#", "").trim();
            if (trim.length() != 0) {
                d += new Double(trim).doubleValue();
            }
        }
        return d;
    }
}
