package org.apache.lenya.search.crawler;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import websphinx.Crawler;
import websphinx.EventLog;
import websphinx.Link;
import websphinx.Mirror;
import websphinx.Page;

/* loaded from: input_file:org/apache/lenya/search/crawler/DumpingCrawler.class */
public class DumpingCrawler extends Crawler {
    private String crawlScopeURL;
    private String dumpDir;
    private Mirror mirror;
    private int nofPages = 0;
    private int maxPages = 100;
    private PrintWriter meta;
    private static final String[] LINK_TYPES = {"hyperlink", "image", "code", "header-link"};

    public DumpingCrawler(String str, String str2, String str3) throws FileNotFoundException {
        try {
            setRoot(new Link(str));
        } catch (MalformedURLException e) {
            setRoot(null);
        }
        if (!str.startsWith(str2)) {
            throw new IllegalArgumentException(new StringBuffer().append("crawlScopeURL [").append(str2).append("] must be a prefix of crawlStartURL [").append(str).append("]").toString());
        }
        this.crawlScopeURL = str2;
        if (!this.crawlScopeURL.endsWith("/")) {
            this.crawlScopeURL = new StringBuffer().append(this.crawlScopeURL).append("/").toString();
        }
        this.dumpDir = str3;
        setSynchronous(true);
        setDomain(Crawler.SERVER);
        setLinkType(LINK_TYPES);
        try {
            this.mirror = new Mirror(this.dumpDir, this.crawlScopeURL);
            new File(str3).mkdirs();
            this.meta = new PrintWriter(new FileOutputStream(new StringBuffer().append(this.dumpDir).append(File.separator).append(".meta").toString()));
        } catch (IOException e2) {
            throw new RuntimeException(new StringBuffer().append("Could not create mirror with directory: ").append(this.dumpDir).append(": ").append(e2).toString(), e2);
        }
    }

    @Override // websphinx.Crawler
    public void visit(Page page) {
        try {
            this.mirror.writePage(page);
            File localFile = page.getLocalFile();
            if (localFile != null) {
                String stringBuffer = new StringBuffer().append(localFile.getCanonicalPath().substring(new File(this.dumpDir).getCanonicalPath().length() + 1)).append(",").append(page.getMimeType()).toString();
                if (page.getContentEncoding() != null) {
                    stringBuffer = new StringBuffer().append(stringBuffer).append(",").append(page.getContentEncoding()).toString();
                }
                if (page.getMimeType() != null) {
                    this.meta.println(stringBuffer);
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(new StringBuffer().append("Could not save page: url=").append(page.getURL()).append(": ").append(e).toString(), e);
        }
    }

    @Override // websphinx.Crawler
    public boolean shouldVisit(Link link) {
        if (!link.getURL().toString().startsWith(this.crawlScopeURL) || this.nofPages >= this.maxPages) {
            return false;
        }
        this.nofPages++;
        return super.shouldVisit(link);
    }

    public void close() {
        try {
            this.mirror.close();
            this.meta.flush();
            this.meta.close();
        } catch (IOException e) {
            throw new RuntimeException(new StringBuffer().append("Could not close mirror: ").append(e).toString(), e);
        }
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        int parseInt = Integer.parseInt(strArr[3]);
        int parseInt2 = Integer.parseInt(strArr[4]);
        DumpingCrawler dumpingCrawler = new DumpingCrawler(str, str2, str3);
        dumpingCrawler.setMaxDepth(parseInt);
        dumpingCrawler.setMaxPages(parseInt2);
        EventLog eventLog = new EventLog(System.out);
        dumpingCrawler.addCrawlListener(eventLog);
        dumpingCrawler.addLinkListener(eventLog);
        dumpingCrawler.run();
        dumpingCrawler.close();
    }

    public int getMaxPages() {
        return this.maxPages;
    }

    public void setMaxPages(int i) {
        this.maxPages = i;
    }
}
