package org.apache.lenya.search.crawler;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import websphinx.Crawler;
import websphinx.EventLog;
import websphinx.Link;
import websphinx.Mirror;
import websphinx.Page;

/* loaded from: input_file:org/apache/lenya/search/crawler/DumpingCrawler.class */
public class DumpingCrawler extends Crawler {
    private String[] crawlScopeURLs;
    private String dumpDir;
    private Mirror mirror;
    private int nofPages;
    private int maxPages;
    private PrintWriter meta;
    private static final String[] LINK_TYPES = {"hyperlink", "image", "code", "header-link"};

    public DumpingCrawler(String str, String str2, String str3) throws FileNotFoundException {
        this(str, makeArray(str2), str3);
    }

    private static String[] makeArray(String str) {
        return new String[]{str};
    }

    public DumpingCrawler(String str, String[] strArr, String str2) throws FileNotFoundException {
        this.nofPages = 0;
        this.maxPages = 100;
        try {
            setRoot(new Link(str));
        } catch (MalformedURLException e) {
            setRoot(null);
        }
        if (!str.startsWith(strArr[0])) {
            throw new IllegalArgumentException(new StringBuffer().append("crawlScopeURL [").append(strArr[0]).append("] must be a prefix of crawlStartURL [").append(str).append("]").toString());
        }
        this.crawlScopeURLs = strArr;
        for (int i = 0; i < this.crawlScopeURLs.length; i++) {
            if (!this.crawlScopeURLs[i].endsWith("/")) {
                this.crawlScopeURLs[i] = new StringBuffer().append(this.crawlScopeURLs[i]).append("/").toString();
            }
        }
        this.dumpDir = str2;
        setSynchronous(true);
        if (this.crawlScopeURLs.length > 1) {
            setDomain(Crawler.WEB);
        } else {
            setDomain(Crawler.SERVER);
        }
        setLinkType(LINK_TYPES);
        try {
            this.mirror = new Mirror(this.dumpDir, this.crawlScopeURLs[0]);
            new File(str2).mkdirs();
            this.meta = new PrintWriter(new FileOutputStream(new StringBuffer().append(this.dumpDir).append(File.separator).append(".meta").toString()));
        } catch (IOException e2) {
            throw new RuntimeException(new StringBuffer().append("Could not create mirror with directory: ").append(this.dumpDir).append(": ").append(e2).toString(), e2);
        }
    }

    @Override // websphinx.Crawler
    public void visit(Page page) {
        try {
            this.mirror.writePage(page);
            File localFile = page.getLocalFile();
            if (localFile != null) {
                String stringBuffer = new StringBuffer().append(localFile.getCanonicalPath().substring(new File(this.dumpDir).getCanonicalPath().length() + 1)).append(",").append(page.getMimeType()).toString();
                if (page.getContentEncoding() != null) {
                    stringBuffer = new StringBuffer().append(stringBuffer).append(",").append(page.getContentEncoding()).toString();
                }
                if (page.getMimeType() != null) {
                    this.meta.println(stringBuffer);
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(new StringBuffer().append("Could not save page: url=").append(page.getURL()).append(": ").append(e).toString(), e);
        }
    }

    @Override // websphinx.Crawler
    public boolean shouldVisit(Link link) {
        for (int i = 0; i < this.crawlScopeURLs.length; i++) {
            if (link.getURL().toString().startsWith(this.crawlScopeURLs[i]) && this.nofPages < this.maxPages) {
                this.nofPages++;
                return super.shouldVisit(link);
            }
        }
        return false;
    }

    public void close() {
        try {
            this.mirror.close();
            this.meta.flush();
            this.meta.close();
        } catch (IOException e) {
            throw new RuntimeException(new StringBuffer().append("Could not close mirror: ").append(e).toString(), e);
        }
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr[0];
        String[] split = strArr[1].split(",");
        String str2 = strArr[2];
        int parseInt = Integer.parseInt(strArr[3]);
        int parseInt2 = Integer.parseInt(strArr[4]);
        DumpingCrawler dumpingCrawler = new DumpingCrawler(str, split, str2);
        dumpingCrawler.setMaxDepth(parseInt);
        dumpingCrawler.setMaxPages(parseInt2);
        EventLog eventLog = new EventLog(System.out);
        dumpingCrawler.addCrawlListener(eventLog);
        dumpingCrawler.addLinkListener(eventLog);
        dumpingCrawler.run();
        dumpingCrawler.close();
    }

    public int getMaxPages() {
        return this.maxPages;
    }

    public void setMaxPages(int i) {
        this.maxPages = i;
    }
}
