package org.apache.pdfbox.tools;

import com.composum.sling.core.util.LinkUtil;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.util.Matrix;

/* loaded from: input_file:lib/slingcms.far:org/apache/tika/tika-bundle/1.28.4/tika-bundle-1.28.4.jar:pdfbox-tools-2.0.26.jar:org/apache/pdfbox/tools/ExtractText.class */
public final class ExtractText {
    private static final Log LOG = LogFactory.getLog(ExtractText.class);
    private static final String PASSWORD = "-password";
    private static final String ENCODING = "-encoding";
    private static final String CONSOLE = "-console";
    private static final String START_PAGE = "-startPage";
    private static final String END_PAGE = "-endPage";
    private static final String SORT = "-sort";
    private static final String IGNORE_BEADS = "-ignoreBeads";
    private static final String DEBUG = "-debug";
    private static final String HTML = "-html";
    private static final String ALWAYSNEXT = "-alwaysNext";
    private static final String ROTATION_MAGIC = "-rotationMagic";
    private static final String STD_ENCODING = "UTF-8";
    private boolean debugOutput = false;

    private ExtractText() {
    }

    public static void main(String[] strArr) throws IOException {
        System.setProperty("apple.awt.UIElement", "true");
        new ExtractText().startExtraction(strArr);
    }

    public void startExtraction(String[] strArr) throws IOException {
        PDFTextStripper filteredTextStripper;
        PDEmbeddedFilesNameTreeNode embeddedFiles;
        Map<String, PDComplexFileSpecification> names;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = true;
        boolean z5 = false;
        boolean z6 = false;
        String str = "";
        String str2 = "UTF-8";
        String str3 = null;
        String str4 = null;
        String str5 = ".txt";
        int i = 1;
        int i2 = Integer.MAX_VALUE;
        int i3 = 0;
        while (i3 < strArr.length) {
            if (strArr[i3].equals(PASSWORD)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                str = strArr[i3];
            } else if (strArr[i3].equals(ENCODING)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                str2 = strArr[i3];
            } else if (strArr[i3].equals(START_PAGE)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                i = Integer.parseInt(strArr[i3]);
            } else if (strArr[i3].equals(HTML)) {
                z2 = true;
                str5 = LinkUtil.EXT_HTML;
            } else if (strArr[i3].equals(SORT)) {
                z3 = true;
            } else if (strArr[i3].equals(IGNORE_BEADS)) {
                z4 = false;
            } else if (strArr[i3].equals(ALWAYSNEXT)) {
                z5 = true;
            } else if (strArr[i3].equals(ROTATION_MAGIC)) {
                z6 = true;
            } else if (strArr[i3].equals(DEBUG)) {
                this.debugOutput = true;
            } else if (strArr[i3].equals(END_PAGE)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                i2 = Integer.parseInt(strArr[i3]);
            } else if (strArr[i3].equals(CONSOLE)) {
                z = true;
            } else if (str3 == null) {
                str3 = strArr[i3];
            } else {
                str4 = strArr[i3];
            }
            i3++;
        }
        if (str3 == null) {
            usage();
            return;
        }
        OutputStreamWriter outputStreamWriter = null;
        PDDocument pDDocument = null;
        try {
            long startProcessing = startProcessing("Loading PDF " + str3);
            if (str4 == null && str3.length() > 4) {
                str4 = new File(str3.substring(0, str3.length() - 4) + str5).getAbsolutePath();
            }
            pDDocument = PDDocument.load(new File(str3), str);
            if (!pDDocument.getCurrentAccessPermission().canExtractContent()) {
                throw new IOException("You do not have permission to extract text");
            }
            stopProcessing("Time for loading: ", startProcessing);
            if (z) {
                outputStreamWriter = new OutputStreamWriter(System.out, str2);
            } else {
                if (z2 && !"UTF-8".equals(str2)) {
                    str2 = "UTF-8";
                    System.out.println("The encoding parameter is ignored when writing html output.");
                }
                outputStreamWriter = new OutputStreamWriter(new FileOutputStream(str4), str2);
            }
            long startProcessing2 = startProcessing("Starting text extraction");
            if (this.debugOutput) {
                System.err.println("Writing to " + str4);
            }
            if (z2) {
                filteredTextStripper = new PDFText2HTML();
                filteredTextStripper.setSortByPosition(z3);
                filteredTextStripper.setShouldSeparateByBeads(z4);
                filteredTextStripper.setStartPage(i);
                filteredTextStripper.setEndPage(i2);
                filteredTextStripper.writeText(pDDocument, outputStreamWriter);
            } else {
                filteredTextStripper = z6 ? new FilteredTextStripper() : new PDFTextStripper();
                filteredTextStripper.setSortByPosition(z3);
                filteredTextStripper.setShouldSeparateByBeads(z4);
                extractPages(i, Math.min(i2, pDDocument.getNumberOfPages()), filteredTextStripper, pDDocument, outputStreamWriter, z6, z5);
            }
            PDDocumentNameDictionary names2 = pDDocument.getDocumentCatalog().getNames();
            if (names2 != null && (embeddedFiles = names2.getEmbeddedFiles()) != null && (names = embeddedFiles.getNames()) != null) {
                for (Map.Entry<String, PDComplexFileSpecification> entry : names.entrySet()) {
                    if (this.debugOutput) {
                        System.err.println("Processing embedded file " + entry.getKey() + ":");
                    }
                    PDEmbeddedFile embeddedFile = entry.getValue().getEmbeddedFile();
                    if (embeddedFile != null && "application/pdf".equals(embeddedFile.getSubtype())) {
                        if (this.debugOutput) {
                            System.err.println("  is PDF (size=" + embeddedFile.getSize() + ")");
                        }
                        COSInputStream createInputStream = embeddedFile.createInputStream();
                        PDDocument pDDocument2 = null;
                        try {
                            pDDocument2 = PDDocument.load(createInputStream);
                            if (z2) {
                                filteredTextStripper.writeText(pDDocument2, outputStreamWriter);
                            } else {
                                extractPages(1, pDDocument2.getNumberOfPages(), filteredTextStripper, pDDocument2, outputStreamWriter, z6, z5);
                            }
                            createInputStream.close();
                            IOUtils.closeQuietly(pDDocument2);
                        } catch (Throwable th) {
                            createInputStream.close();
                            IOUtils.closeQuietly(pDDocument2);
                            throw th;
                        }
                    }
                }
            }
            stopProcessing("Time for extraction: ", startProcessing2);
            IOUtils.closeQuietly(outputStreamWriter);
            IOUtils.closeQuietly(pDDocument);
        } catch (Throwable th2) {
            IOUtils.closeQuietly(outputStreamWriter);
            IOUtils.closeQuietly(pDDocument);
            throw th2;
        }
    }

    private void extractPages(int i, int i2, PDFTextStripper pDFTextStripper, PDDocument pDDocument, Writer writer, boolean z, boolean z2) throws IOException {
        for (int i3 = i; i3 <= i2; i3++) {
            pDFTextStripper.setStartPage(i3);
            pDFTextStripper.setEndPage(i3);
            if (z) {
                try {
                    PDPage page = pDDocument.getPage(i3 - 1);
                    int rotation = page.getRotation();
                    page.setRotation(0);
                    AngleCollector angleCollector = new AngleCollector();
                    angleCollector.setStartPage(i3);
                    angleCollector.setEndPage(i3);
                    angleCollector.writeText(pDDocument, new NullWriter());
                    Iterator<Integer> it = angleCollector.getAngles().iterator();
                    while (it.hasNext()) {
                        int intValue = it.next().intValue();
                        PDPageContentStream pDPageContentStream = new PDPageContentStream(pDDocument, page, PDPageContentStream.AppendMode.PREPEND, false);
                        pDPageContentStream.transform(Matrix.getRotateInstance(-Math.toRadians(intValue), 0.0f, 0.0f));
                        pDPageContentStream.close();
                        pDFTextStripper.writeText(pDDocument, writer);
                        ((COSArray) page.getCOSObject().getItem(COSName.CONTENTS)).remove(0);
                    }
                    page.setRotation(rotation);
                } catch (IOException e) {
                    if (!z2) {
                        throw e;
                    }
                    LOG.error("Failed to process page " + i3, e);
                }
            } else {
                pDFTextStripper.writeText(pDDocument, writer);
            }
        }
    }

    private long startProcessing(String str) {
        if (this.debugOutput) {
            System.err.println(str);
        }
        return System.currentTimeMillis();
    }

    private void stopProcessing(String str, long j) {
        if (this.debugOutput) {
            System.err.println(str + (((float) (System.currentTimeMillis() - j)) / 1000.0f) + " seconds");
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static int getAngle(TextPosition textPosition) {
        textPosition.getTextMatrix().m9407clone().concatenate(textPosition.getFont().getFontMatrix());
        return (int) Math.round(Math.toDegrees(Math.atan2(r0.getShearY(), r0.getScaleY())));
    }

    private static void usage() {
        System.err.println("Usage: java -jar pdfbox-app-x.y.z.jar ExtractText [options] <inputfile> [output-text-file]\n\nOptions:\n  -password <password>        : Password to decrypt document\n  -encoding <output encoding> : UTF-8 (default) or ISO-8859-1, UTF-16BE,\n                                UTF-16LE, etc.\n  -console                    : Send text to console instead of file\n  -html                       : Output in HTML format instead of raw text\n  -sort                       : Sort the text before writing\n  -ignoreBeads                : Disables the separation by beads\n  -debug                      : Enables debug output about the time consumption\n                                of every stage\n  -alwaysNext                 : Process next page (if applicable) despite\n                                IOException (ignored when -html)\n  -rotationMagic              : Analyze each page for rotated/skewed text,\n                                rotate to 0° and extract separately\n                                (slower, and ignored when -html)\n  -startPage <number>         : The first page to start extraction (1 based)\n  -endPage <number>           : The last page to extract (1 based, inclusive)\n  <inputfile>                 : The PDF document to use\n  [output-text-file]          : The file to write the text to");
        System.exit(1);
    }
}
