package org.apache.tika.parser.pkg;

import cn.hutool.core.text.StrPool;
import com.itextpdf.text.html.HtmlTags;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.compress.PasswordRequiredException;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.StreamingNotSupportedException;
import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
import org.apache.commons.compress.archivers.sevenz.SevenZFile;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.config.Field;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:WEB-INF/lib/tika-parsers-1.28.4.jar:org/apache/tika/parser/pkg/PackageParser.class */
public class PackageParser extends AbstractEncodingDetectorParser {
    private static final long serialVersionUID = -5331043266963888708L;
    private static final MediaType ZIP = MediaType.APPLICATION_ZIP;
    private static final MediaType JAR = MediaType.application("java-archive");
    private static final MediaType AR = MediaType.application("x-archive");
    private static final MediaType ARJ = MediaType.application("x-arj");
    private static final MediaType CPIO = MediaType.application("x-cpio");
    private static final MediaType DUMP = MediaType.application("x-tika-unix-dump");
    private static final MediaType TAR = MediaType.application("x-tar");
    private static final MediaType SEVENZ = MediaType.application("x-7z-compressed");
    private static final MediaType TIKA_OOXML = MediaType.application("x-tika-ooxml");
    private static final MediaType GTAR = MediaType.application("x-gtar");
    private static final MediaType KMZ = MediaType.application("vnd.google-earth.kmz");
    private static final Set<MediaType> SUPPORTED_TYPES = MediaType.set(ZIP, JAR, AR, ARJ, CPIO, DUMP, TAR, SEVENZ);
    static final Set<MediaType> PACKAGE_SPECIALIZATIONS = loadPackageSpecializations();
    private static final int MARK_LIMIT = 104857600;
    private boolean detectCharsetsInEntryNames;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/tika-parsers-1.28.4.jar:org/apache/tika/parser/pkg/PackageParser$SevenZWrapper.class */
    public static class SevenZWrapper extends ArchiveInputStream {
        private SevenZFile file;

        private SevenZWrapper(SevenZFile sevenZFile) {
            this.file = sevenZFile;
        }

        @Override // org.apache.commons.compress.archivers.ArchiveInputStream, java.io.InputStream
        public int read() throws IOException {
            return this.file.read();
        }

        @Override // java.io.InputStream
        public int read(byte[] bArr) throws IOException {
            return this.file.read(bArr);
        }

        @Override // java.io.InputStream
        public int read(byte[] bArr, int i, int i2) throws IOException {
            return this.file.read(bArr, i, i2);
        }

        @Override // org.apache.commons.compress.archivers.ArchiveInputStream
        public ArchiveEntry getNextEntry() throws IOException {
            return this.file.getNextEntry();
        }

        @Override // java.io.InputStream, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            this.file.close();
        }
    }

    static final Set<MediaType> loadPackageSpecializations() {
        HashSet hashSet = new HashSet();
        for (String str : new String[]{"application/bizagi-modeler", "application/epub+zip", "application/java-archive", "application/vnd.adobe.air-application-installer-package+zip", "application/vnd.android.package-archive", "application/vnd.apple.iwork", "application/vnd.apple.keynote", "application/vnd.apple.numbers", "application/vnd.apple.pages", "application/vnd.etsi.asic-e+zip", "application/vnd.etsi.asic-s+zip", "application/vnd.google-earth.kmz", "application/vnd.mindjet.mindmanager", "application/vnd.ms-excel.addin.macroenabled.12", "application/vnd.ms-excel.sheet.binary.macroenabled.12", "application/vnd.ms-excel.sheet.macroenabled.12", "application/vnd.ms-excel.template.macroenabled.12", "application/vnd.ms-powerpoint.addin.macroenabled.12", "application/vnd.ms-powerpoint.presentation.macroenabled.12", "application/vnd.ms-powerpoint.slide.macroenabled.12", "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "application/vnd.ms-powerpoint.template.macroenabled.12", "application/vnd.ms-visio.drawing", "application/vnd.ms-visio.drawing.macroenabled.12", "application/vnd.ms-visio.stencil", "application/vnd.ms-visio.stencil.macroenabled.12", "application/vnd.ms-visio.template", "application/vnd.ms-visio.template.macroenabled.12", "application/vnd.ms-word.document.macroenabled.12", "application/vnd.ms-word.template.macroenabled.12", "application/vnd.ms-xpsdocument", "application/vnd.oasis.opendocument.formula", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.openxmlformats-officedocument.presentationml.slide", "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "application/vnd.openxmlformats-officedocument.presentationml.template", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "application/x-ibooks+zip", "application/x-itunes-ipa", "application/x-tika-iworks-protected", "application/x-tika-java-enterprise-archive", "application/x-tika-java-web-archive", "application/x-tika-ooxml", "application/x-tika-ooxml-protected", "application/x-tika-visio-ooxml", "application/x-xliff+zip", "application/x-xmind", "model/vnd.dwfx+xps", "application/vnd.sun.xml.calc", "application/vnd.sun.xml.writer", "application/vnd.sun.xml.writer.template", "application/vnd.sun.xml.draw", "application/vnd.sun.xml.impress", "application/vnd.openofficeorg.autotext", "application/vnd.adobe.indesign-idml-package", "application/x-gtar"}) {
            hashSet.add(MediaType.parse(str));
        }
        return Collections.unmodifiableSet(hashSet);
    }

    @Deprecated
    static MediaType getMediaType(ArchiveInputStream archiveInputStream) {
        return archiveInputStream instanceof JarArchiveInputStream ? JAR : archiveInputStream instanceof ZipArchiveInputStream ? ZIP : archiveInputStream instanceof ArArchiveInputStream ? AR : archiveInputStream instanceof CpioArchiveInputStream ? CPIO : archiveInputStream instanceof DumpArchiveInputStream ? DUMP : archiveInputStream instanceof TarArchiveInputStream ? TAR : archiveInputStream instanceof SevenZWrapper ? SEVENZ : MediaType.OCTET_STREAM;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static MediaType getMediaType(String str) {
        return "jar".equals(str) ? JAR : "zip".equals(str) ? ZIP : ArchiveStreamFactory.AR.equals(str) ? AR : ArchiveStreamFactory.ARJ.equals(str) ? ARJ : ArchiveStreamFactory.CPIO.equals(str) ? CPIO : ArchiveStreamFactory.DUMP.equals(str) ? DUMP : ArchiveStreamFactory.TAR.equals(str) ? TAR : ArchiveStreamFactory.SEVEN_Z.equals(str) ? SEVENZ : MediaType.OCTET_STREAM;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static boolean isZipArchive(MediaType mediaType) {
        return mediaType.equals(ZIP) || mediaType.equals(JAR);
    }

    public PackageParser() {
        this.detectCharsetsInEntryNames = true;
    }

    public PackageParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
        this.detectCharsetsInEntryNames = true;
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        ArchiveInputStream sevenZWrapper;
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream);
        }
        TemporaryResources temporaryResources = new TemporaryResources();
        String str = null;
        try {
            ArchiveStreamFactory archiveStreamFactory = (ArchiveStreamFactory) parseContext.get(ArchiveStreamFactory.class, new ArchiveStreamFactory());
            str = archiveStreamFactory.getEntryEncoding();
            sevenZWrapper = archiveStreamFactory.createArchiveInputStream(new CloseShieldInputStream(inputStream));
        } catch (StreamingNotSupportedException e) {
            if (!e.getFormat().equals(ArchiveStreamFactory.SEVEN_Z)) {
                temporaryResources.close();
                throw new TikaException("Unknown non-streaming format " + e.getFormat(), e);
            }
            inputStream.reset();
            TikaInputStream tikaInputStream = TikaInputStream.get(inputStream, temporaryResources);
            String str2 = null;
            PasswordProvider passwordProvider = (PasswordProvider) parseContext.get(PasswordProvider.class);
            if (passwordProvider != null) {
                str2 = passwordProvider.getPassword(metadata);
            }
            try {
                sevenZWrapper = new SevenZWrapper(str2 == null ? new SevenZFile(tikaInputStream.getFile()) : new SevenZFile(tikaInputStream.getFile(), str2.getBytes("UnicodeLittleUnmarked")));
            } catch (PasswordRequiredException e2) {
                throw new EncryptedDocumentException(e2);
            }
        } catch (ArchiveException e3) {
            temporaryResources.close();
            throw new TikaException("Unable to unpack document stream", e3);
        }
        updateMediaType(sevenZWrapper, metadata);
        EmbeddedDocumentExtractor embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        inputStream.mark(MARK_LIMIT);
        AtomicInteger atomicInteger = new AtomicInteger(0);
        try {
            try {
                parseEntries(false, sevenZWrapper, metadata, embeddedDocumentExtractor, xHTMLContentHandler, atomicInteger);
                sevenZWrapper.close();
                temporaryResources.close();
            } catch (UnsupportedZipFeatureException e4) {
                if (e4.getFeature() == UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR) {
                    sevenZWrapper.close();
                    inputStream.reset();
                    sevenZWrapper = new ZipArchiveInputStream(new CloseShieldInputStream(inputStream), str, true, true);
                    parseEntries(true, sevenZWrapper, metadata, embeddedDocumentExtractor, xHTMLContentHandler, atomicInteger);
                }
                sevenZWrapper.close();
                temporaryResources.close();
            }
            xHTMLContentHandler.endDocument();
        } catch (Throwable th) {
            sevenZWrapper.close();
            temporaryResources.close();
            throw th;
        }
    }

    private void parseEntries(boolean z, ArchiveInputStream archiveInputStream, Metadata metadata, EmbeddedDocumentExtractor embeddedDocumentExtractor, XHTMLContentHandler xHTMLContentHandler, AtomicInteger atomicInteger) throws TikaException, IOException, SAXException {
        try {
            ArchiveEntry nextEntry = archiveInputStream.getNextEntry();
            while (nextEntry != null) {
                if (z) {
                    if (atomicInteger.get() > 0) {
                        atomicInteger.decrementAndGet();
                        nextEntry = archiveInputStream.getNextEntry();
                    }
                }
                if (!nextEntry.isDirectory()) {
                    parseEntry(archiveInputStream, nextEntry, embeddedDocumentExtractor, metadata, xHTMLContentHandler);
                }
                if (!z) {
                    atomicInteger.incrementAndGet();
                }
                nextEntry = archiveInputStream.getNextEntry();
            }
        } catch (PasswordRequiredException e) {
            throw new EncryptedDocumentException(e);
        } catch (UnsupportedZipFeatureException e2) {
            if (e2.getFeature() == UnsupportedZipFeatureException.Feature.ENCRYPTION) {
                throw new EncryptedDocumentException(e2);
            }
            if (e2.getFeature() != UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR) {
                throw new TikaException("UnsupportedZipFeature", e2);
            }
            throw e2;
        }
    }

    private void updateMediaType(ArchiveInputStream archiveInputStream, Metadata metadata) {
        MediaType mediaType = getMediaType(archiveInputStream);
        if (mediaType.equals(MediaType.OCTET_STREAM)) {
            return;
        }
        String str = metadata.get("Content-Type");
        if (str == null) {
            metadata.set("Content-Type", mediaType.toString());
            return;
        }
        MediaType parse = MediaType.parse(str);
        if (parse == null) {
            metadata.set("Content-Type", mediaType.toString());
        } else {
            if (PACKAGE_SPECIALIZATIONS.contains(parse)) {
                return;
            }
            metadata.set("Content-Type", mediaType.toString());
        }
    }

    private void parseEntry(ArchiveInputStream archiveInputStream, ArchiveEntry archiveEntry, EmbeddedDocumentExtractor embeddedDocumentExtractor, Metadata metadata, XHTMLContentHandler xHTMLContentHandler) throws SAXException, IOException, TikaException {
        Charset detect;
        String name = archiveEntry.getName();
        if (this.detectCharsetsInEntryNames && (archiveEntry instanceof ZipArchiveEntry) && (detect = getEncodingDetector().detect(new ByteArrayInputStream(((ZipArchiveEntry) archiveEntry).getRawName()), metadata)) != null) {
            name = new String(((ZipArchiveEntry) archiveEntry).getRawName(), detect);
        }
        if (archiveInputStream.canReadEntryData(archiveEntry)) {
            Metadata handleEntryMetadata = handleEntryMetadata(name, null, archiveEntry.getLastModifiedDate(), Long.valueOf(archiveEntry.getSize()), xHTMLContentHandler);
            if (embeddedDocumentExtractor.shouldParseEmbedded(handleEntryMetadata)) {
                TemporaryResources temporaryResources = new TemporaryResources();
                try {
                    embeddedDocumentExtractor.parseEmbedded(TikaInputStream.get(archiveInputStream, temporaryResources), xHTMLContentHandler, handleEntryMetadata, true);
                    temporaryResources.dispose();
                    return;
                } catch (Throwable th) {
                    temporaryResources.dispose();
                    throw th;
                }
            }
            return;
        }
        String str = name == null ? "" : name;
        if (archiveEntry instanceof ZipArchiveEntry) {
            ZipArchiveEntry zipArchiveEntry = (ZipArchiveEntry) archiveEntry;
            if (zipArchiveEntry.getGeneralPurposeBit().usesEncryption()) {
                EmbeddedDocumentUtil.recordEmbeddedStreamException(new EncryptedDocumentException("stream (" + str + ") is encrypted"), metadata);
            }
            if (zipArchiveEntry.getGeneralPurposeBit().usesDataDescriptor() && zipArchiveEntry.getMethod() == 0) {
                throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, zipArchiveEntry);
            }
        } else {
            EmbeddedDocumentUtil.recordEmbeddedStreamException(new TikaException("Can't read archive stream (" + str + ")"), metadata);
        }
        if (str.length() > 0) {
            xHTMLContentHandler.element("p", str);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Metadata handleEntryMetadata(String str, Date date, Date date2, Long l, XHTMLContentHandler xHTMLContentHandler) throws SAXException, IOException, TikaException {
        Metadata metadata = new Metadata();
        if (date != null) {
            metadata.set(TikaCoreProperties.CREATED, date);
        }
        if (date2 != null) {
            metadata.set(TikaCoreProperties.MODIFIED, date2);
        }
        if (l != null) {
            metadata.set("Content-Length", Long.toString(l.longValue()));
        }
        if (str != null && str.length() > 0) {
            String replace = str.replace(StrPool.BACKSLASH, "/");
            metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, replace);
            AttributesImpl attributesImpl = new AttributesImpl();
            attributesImpl.addAttribute("", "class", "class", "CDATA", "embedded");
            attributesImpl.addAttribute("", "id", "id", "CDATA", replace);
            xHTMLContentHandler.startElement(HtmlTags.DIV, attributesImpl);
            xHTMLContentHandler.endElement(HtmlTags.DIV);
            metadata.set(TikaMetadataKeys.EMBEDDED_RELATIONSHIP_ID, replace);
        }
        return metadata;
    }

    @Field
    public void setDetectCharsetsInEntryNames(boolean z) {
        this.detectCharsetsInEntryNames = z;
    }
}
