package com.zzsn.thinktank.util;

import org.w3c.dom.*;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.*;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;

public class PageBuilderParser {

    public String parserStr(Object doc, String path)
            throws XPathExpressionException {
        String ret = "";
        Object obj = parser(doc, path);
        if (obj == null) {
            return ret;
        }
        if (obj instanceof NodeList) {
            StringBuffer buffer = new StringBuffer();
            NodeList nodeList = (NodeList) obj;
            for (int i = 0; i < nodeList.getLength(); i++) {
                Node node = nodeList.item(i);
                String txt = getNodeText(node);
                buffer.append(txt);
                if (i != nodeList.getLength() - 1) {
                    buffer.append(" ");
                }
            }
            ret = buffer.toString();
        } else if (obj instanceof Node) {
            String s = getNodeText((Node) obj);
            ret = s;
        } else if (obj instanceof String) {
            ret = (String) obj;
        } else if (obj instanceof Number) {
            Number number = (Number) obj;
            ret = number.toString();
        } else if (obj instanceof Boolean) {
            Boolean number = (Boolean) obj;
            ret = number.toString();
        }
        ret = StringUtil.trimWhiteSpace(ret);
        ret = StringUtil.normalizeHtmlTransf(ret);
        return ret;
    }

    public Object parser(Object doc, String path) throws XPathExpressionException {
        if (path == null || path.trim().length() == 0) {
            return null;
        }
        XPathFactory factory = XPathFactory.newInstance();
        XPath xPath = factory.newXPath();
        XPathExpression expression = xPath.compile(path);
        Object object = null;
        try {
            object = expression.evaluate(doc, XPathConstants.NODESET);
            return (NodeList) object;
        } catch (XPathExpressionException e) {
            try {
                object = expression.evaluate(doc, XPathConstants.NODE);
                return (Node) object;
            } catch (XPathExpressionException e1) {
                try {
                    object = expression.evaluate(doc, XPathConstants.STRING);
                    return (String) object;
                } catch (XPathExpressionException e2) {
                    try {
                        object = expression.evaluate(doc, XPathConstants.NUMBER);
                        return (Number) object;
                    } catch (XPathExpressionException e3) {
                        try {
                            object = expression.evaluate(doc, XPathConstants.BOOLEAN);
                            return (Boolean) object;
                        } catch (XPathExpressionException e4) {
                            // TODO Auto-generated catch block
                            e4.printStackTrace();
                        }
                    }
                }
            }
        }
        return object;
    }

    public String getNodeText(Node node) {
        String s = "";
        if (node.getNodeType() == Node.TEXT_NODE) {
            s = node.getTextContent();
        } else if (node.getNodeType() == Node.ELEMENT_NODE) {
            s = getNodeTextRec((Element) node);
        } else if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
            s = ((Attr) node).getValue();
        } else if (node.getNodeType() == Node.CDATA_SECTION_NODE) {
            s = node.getNodeValue();
        }
        s = StringUtil.trimWhiteSpace(s);
        s = StringUtil.normalizeHtmlTransf(s);
        return s;
    }

    private String getNodeTextRec(Element elem) {
        StringBuffer buffer = new StringBuffer();
        NodeList nodeList = elem.getChildNodes();
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node node = nodeList.item(i);
            if (node.getNodeType() == Node.TEXT_NODE) {
                String txt = node.getTextContent();
                buffer.append(StringUtil.trimWhiteSpace(txt));
            } else if (node.getNodeType() == Node.ELEMENT_NODE) {
                buffer.append(getNodeTextRec((Element) node));
            }
        }
        return buffer.toString();
    }

    public static Document xmlGetDocument(String pageBody)
            throws ParserConfigurationException, SAXException, IOException {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        InputStream is = new ByteArrayInputStream(pageBody.getBytes());
        Document document = builder.parse(is);
        return document;
    }
}
