package com.archeus.dex.html;

import com.archeus.FormatedText;
import com.archeus.FormatedTextHelper;
import com.archeus.FormatedWord;
import com.archeus.TextAttr;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.html.dom.HTMLElementImpl;
import org.apache.html.dom.HTMLParagraphElementImpl;
import org.apache.xerces.dom.TextImpl;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;

/* loaded from: input_file:com/archeus/dex/html/HtmlParser.class */
public class HtmlParser {
    public static void main(String[] strArr) throws Exception {
        InputSource inputSource = new InputSource(new FileReader(strArr[0]));
        inputSource.setEncoding("UTF-8");
        Iterator<FormatedWord> it = FormatedTextHelper.wordList(parseFile(inputSource)).iterator();
        while (it.hasNext()) {
            System.out.println(it.next());
        }
    }

    public static ArrayList<FormatedText> parseFile(InputSource inputSource) throws Exception {
        ArrayList<FormatedText> arrayList = new ArrayList<>();
        DOMParser dOMParser = new DOMParser();
        dOMParser.parse(inputSource);
        parse(dOMParser.getDocument(), 0, arrayList);
        return arrayList;
    }

    public static void getFormat(Node node, TextAttr textAttr) {
        if (node == null) {
            return;
        }
        if (node.getClass().getName().equals(HTMLElementImpl.class.getName())) {
            String upperCase = node.getNodeName().toUpperCase();
            if (upperCase.equals("B")) {
                textAttr.setBold(true);
            }
            if (upperCase.equals("I")) {
                textAttr.setItalic(true);
            }
        }
        getFormat(node.getParentNode(), textAttr);
    }

    public static void parse(Node node, int i, ArrayList<FormatedText> arrayList) {
        if (node instanceof TextImpl) {
            TextAttr textAttr = new TextAttr();
            getFormat(node, textAttr);
            arrayList.add(new FormatedText(((TextImpl) node).getTextContent(), textAttr, i));
        } else if (node instanceof HTMLParagraphElementImpl) {
            arrayList.add(new FormatedText("\n", new TextAttr(), i));
        }
        Node firstChild = node.getFirstChild();
        while (true) {
            Node node2 = firstChild;
            if (node2 == null) {
                return;
            }
            parse(node2, i, arrayList);
            firstChild = node2.getNextSibling();
        }
    }
}
