/*
 * Decompiled with CFR 0.152.
 */
package com.centit.support.office;

import com.centit.support.network.HttpExecutor;
import com.centit.support.office.exception.ExtractorTextException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.Document;
import javax.swing.text.rtf.RTFEditorKit;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xdgf.usermodel.XmlVisioDocument;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
import org.apache.poi.xslf.usermodel.XSLFTextRun;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.visitors.HtmlPage;
import org.htmlparser.visitors.NodeVisitor;

public final class FileTextExtractor {
    public static String extractorWord(File file) throws ExtractorTextException, IOException {
        FileInputStream input = new FileInputStream(file);
        return FileTextExtractor.extractorWord(input);
    }

    public static String extractorWord(String filePath) throws ExtractorTextException, IOException {
        return FileTextExtractor.extractorWord(new File(filePath));
    }

    public static String extractorWord(InputStream input) throws ExtractorTextException, IOException {
        String content = null;
        if (!input.markSupported()) {
            input = new PushbackInputStream(input, 8);
        }
        if (POIFSFileSystem.hasPOIFSHeader((InputStream)input)) {
            HWPFDocument document = new HWPFDocument(input);
            WordExtractor extractor = new WordExtractor(document);
            content = extractor.getText();
        } else if (POIXMLDocument.hasOOXMLHeader((InputStream)input)) {
            XWPFDocument document = new XWPFDocument(input);
            XWPFWordExtractor extractor = new XWPFWordExtractor(document);
            content = extractor.getText();
        }
        if (input != null) {
            input.close();
        }
        System.out.println("Word_Text:\n" + content);
        return content;
    }

    public static String extractorExcel(File file) throws ExtractorTextException, IOException, InvalidFormatException {
        FileInputStream input = new FileInputStream(file);
        return FileTextExtractor.extractorExcel(input);
    }

    public static String extractorExcel(String filePath) throws ExtractorTextException, IOException, InvalidFormatException {
        return FileTextExtractor.extractorExcel(new File(filePath));
    }

    public static String extractorExcel(InputStream input) throws ExtractorTextException, IOException, InvalidFormatException {
        String content = null;
        if (!input.markSupported()) {
            input = new PushbackInputStream(input, 8);
        }
        if (POIFSFileSystem.hasPOIFSHeader((InputStream)input)) {
            HSSFWorkbook wb = new HSSFWorkbook(input);
            ExcelExtractor extractor = new ExcelExtractor(wb);
            extractor.setFormulasNotResults(true);
            extractor.setIncludeSheetNames(true);
            content = extractor.getText();
        } else if (POIXMLDocument.hasOOXMLHeader((InputStream)input)) {
            XSSFWorkbook wb = new XSSFWorkbook(OPCPackage.open((InputStream)input));
            XSSFExcelExtractor extractorX = new XSSFExcelExtractor(wb);
            content = extractorX.getText();
        } else {
            System.out.println("\u7248\u672c\u4e0d\u652f\u6301");
        }
        if (input != null) {
            input.close();
        }
        System.out.println("Excel_Text:\n" + content);
        return content;
    }

    public static String extractorPpt(File file) throws ExtractorTextException, IOException, InvalidFormatException {
        FileInputStream input = new FileInputStream(file);
        return FileTextExtractor.extractorPpt(input);
    }

    public static String extractorPpt(String filePath) throws ExtractorTextException, IOException, InvalidFormatException {
        return FileTextExtractor.extractorPpt(new File(filePath));
    }

    public static String extractorPpt(InputStream input) throws ExtractorTextException, IOException {
        String content = "";
        if (!input.markSupported()) {
            input = new PushbackInputStream(input, 8);
        }
        if (POIFSFileSystem.hasPOIFSHeader((InputStream)input)) {
            PowerPointExtractor extractor = new PowerPointExtractor(input);
            content = extractor.getText();
            extractor.close();
        } else if (POIXMLDocument.hasOOXMLHeader((InputStream)input)) {
            XMLSlideShow ppt = new XMLSlideShow(input);
            for (XSLFSlide slide : ppt.getSlides()) {
                for (XSLFShape shape : slide.getShapes()) {
                    if (!(shape instanceof XSLFTextShape)) continue;
                    for (XSLFTextParagraph paragraph : (XSLFTextShape)shape) {
                        for (XSLFTextRun xslfTextRun : paragraph) {
                            content = content + xslfTextRun.getRawText() + "\t";
                        }
                    }
                }
                content = content + "\n";
            }
        } else {
            System.out.println("\u7248\u672c\u4e0d\u652f\u6301");
        }
        if (input != null) {
            input.close();
        }
        System.out.println("Ppt_Text:\n" + content);
        return content;
    }

    public static String extractorVisio(File file) throws ExtractorTextException, IOException {
        String content = "";
        FileInputStream istream = null;
        istream = new FileInputStream(file);
        XDGFVisioExtractor extractor = new XDGFVisioExtractor(new XmlVisioDocument((InputStream)istream));
        content = extractor.getText();
        if (istream != null) {
            istream.close();
        }
        System.out.println("Visio_Text:\n" + content);
        return content;
    }

    public static String extractorOutLook(File file) throws ExtractorTextException, IOException {
        FileInputStream input = new FileInputStream(file);
        return FileTextExtractor.extractorOutLook(input);
    }

    public static String extractorOutLook(String filePath) throws ExtractorTextException, IOException {
        return FileTextExtractor.extractorOutLook(new File(filePath));
    }

    public static String extractorOutLook(InputStream input) throws ExtractorTextException, IOException {
        String content = "";
        NPOIFSFileSystem poifs = null;
        OutlookTextExtactor extractor = null;
        poifs = new NPOIFSFileSystem(input);
        extractor = new OutlookTextExtactor(poifs);
        content = extractor.getText();
        if (extractor != null) {
            extractor.close();
        }
        if (poifs != null) {
            poifs.close();
        }
        if (input != null) {
            input.close();
        }
        System.out.println("OutLook_Text:\n" + content);
        return content;
    }

    public static String extractorPdf(File file) throws ExtractorTextException, IOException {
        FileInputStream input = new FileInputStream(file);
        return FileTextExtractor.extractorPdf(input);
    }

    public static String extractorPdf(String filePath) throws ExtractorTextException, IOException {
        return FileTextExtractor.extractorPdf(new File(filePath));
    }

    public static String extractorPdf(InputStream input) throws ExtractorTextException, IOException {
        String content = "";
        PdfReader reader = new PdfReader(input);
        int PageNum = reader.getNumberOfPages();
        for (int i = 1; i <= PageNum; ++i) {
            content = content + PdfTextExtractor.getTextFromPage((PdfReader)reader, (int)i);
        }
        if (input != null) {
            input.close();
        }
        System.out.println("txt_Text:\n" + content);
        return content;
    }

    public static String extractorTxt(File file) throws ExtractorTextException, IOException {
        FileInputStream input = new FileInputStream(file);
        return FileTextExtractor.extractorTxt(input);
    }

    public static String extractorTxt(String filePath) throws ExtractorTextException, IOException {
        return FileTextExtractor.extractorTxt(new File(filePath));
    }

    public static String extractorTxt(InputStream input) throws ExtractorTextException, IOException {
        String eCode = null;
        StringBuffer sb = new StringBuffer("");
        BufferedInputStream bin = new BufferedInputStream(input);
        int p = (bin.read() << 8) + bin.read();
        switch (p) {
            case 61371: {
                eCode = "UTF-8";
                break;
            }
            case 65534: {
                eCode = "Unicode";
                break;
            }
            case 65279: {
                eCode = "UTF-16BE";
                break;
            }
            case 23669: {
                eCode = "ASCII";
                break;
            }
            default: {
                eCode = "GBK";
            }
        }
        BufferedReader reader = null;
        reader = new BufferedReader(new InputStreamReader(input, eCode));
        String line = "";
        while ((line = reader.readLine()) != null) {
            sb.append(line + "\n");
        }
        if (input != null) {
            input.close();
        }
        System.out.println("txt_Text:\n" + sb.toString());
        return sb.toString();
    }

    public static String extractorRtf(File file) throws ExtractorTextException, IOException, BadLocationException {
        FileInputStream input = new FileInputStream(file);
        return FileTextExtractor.extractorRtf(input);
    }

    public static String extractorRtf(String filePath) throws ExtractorTextException, IOException, BadLocationException {
        return FileTextExtractor.extractorRtf(new File(filePath));
    }

    public static String extractorRtf(InputStream input) throws IOException, BadLocationException {
        DefaultStyledDocument styledDoc = new DefaultStyledDocument();
        new RTFEditorKit().read(input, (Document)styledDoc, 0);
        String bodyText = styledDoc.getText(0, styledDoc.getLength());
        if (input != null) {
            input.close();
        }
        System.out.println("txt_Rtf:\n" + bodyText);
        return bodyText;
    }

    public static String extractorHTML(String urlString) throws ExtractorTextException, IOException {
        StringBuffer content = new StringBuffer("");
        try {
            Parser parser = null;
            parser = new Parser(urlString);
            HtmlPage visitor = new HtmlPage(parser);
            parser.visitAllNodesWith((NodeVisitor)visitor);
            NodeList nodes = visitor.getBody();
            int size = nodes.size();
            for (int i = 0; i < size; ++i) {
                Node node = nodes.elementAt(i);
                content.append(node.toPlainTextString());
            }
            System.out.println("test_HTML\n" + content.toString());
            return content.toString();
        }
        catch (Exception e) {
            e.getMessage();
            return null;
        }
    }

    public static String getWebUrl(String urlString) {
        StringBuffer content = new StringBuffer("");
        try {
            String inputHTML = HttpExecutor.simpleGet((CloseableHttpClient)HttpExecutor.createHttpClient(), (String)urlString, (String)null);
            Parser parser = new Parser();
            parser.setInputHTML(inputHTML);
            HtmlPage visitor = new HtmlPage(parser);
            parser.visitAllNodesWith((NodeVisitor)visitor);
            NodeList nodes = visitor.getBody();
            int size = nodes.size();
            for (int i = 0; i < size; ++i) {
                Node node = nodes.elementAt(i);
                content.append(node.toPlainTextString());
            }
            return content.toString();
        }
        catch (Exception e) {
            e.getMessage();
            return null;
        }
    }
}

