package org.apache.lenya.search.crawler;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;

/* loaded from: input_file:org/apache/lenya/search/crawler/HTMLHandler.class */
public final class HTMLHandler extends HTMLEditorKit.ParserCallback implements ContentHandler {
    private static final char space = ' ';
    private static final char NONE = 0;
    private static final char TITLE = 1;
    private static final char HREF = 2;
    private static final char SCRIPT = 3;
    private static ParserDelegator pd = new ParserDelegator();
    private String title;
    private String description;
    private String keywords;
    private String categories;
    private String href;
    private String author;
    private boolean robotIndex;
    private boolean robotFollow;
    private char state;
    private StringBuffer contents = new StringBuffer();
    private ArrayList links = new ArrayList();
    private long published = -1;
    private SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss z");

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public String getAuthor() {
        return this.author;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public String getCategories() {
        return this.categories;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public String getContents() {
        return this.contents.toString();
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public String getDescription() {
        return this.description;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public String getHREF() {
        return this.href;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public String getKeywords() {
        return this.keywords;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public List getLinks() {
        return this.links;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public long getPublished() {
        return this.published;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public boolean getRobotFollow() {
        return this.robotFollow;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public boolean getRobotIndex() {
        return this.robotIndex;
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public String getTitle() {
        return this.title;
    }

    public void handleAnchor(MutableAttributeSet mutableAttributeSet) {
        String str = (String) mutableAttributeSet.getAttribute(HTML.Attribute.HREF);
        if (str != null) {
            this.links.add(str);
            this.state = (char) 2;
        }
    }

    public void handleEndTag(HTML.Tag tag, int i) {
        if (this.state == 0) {
            return;
        }
        if (this.state == 1 && tag.equals(HTML.Tag.TITLE)) {
            this.state = (char) 0;
            return;
        }
        if (this.state == 2 && tag.equals(HTML.Tag.A)) {
            this.state = (char) 0;
        } else if (this.state == 3 && tag.equals(HTML.Tag.SCRIPT)) {
            this.state = (char) 0;
        }
    }

    public void handleMeta(MutableAttributeSet mutableAttributeSet) {
        String str = (String) mutableAttributeSet.getAttribute(HTML.Attribute.NAME);
        String str2 = (String) mutableAttributeSet.getAttribute(HTML.Attribute.CONTENT);
        if (str == null || str2 == null) {
            return;
        }
        String upperCase = str.toUpperCase();
        if (upperCase.equals("DESCRIPTION")) {
            this.description = str2;
            return;
        }
        if (upperCase.equals("KEYWORDS")) {
            this.keywords = str2;
            return;
        }
        if (upperCase.equals("CATEGORIES")) {
            this.categories = str2;
            return;
        }
        if (upperCase.equals("PUBLISHED")) {
            try {
                this.published = this.dateFormatter.parse(str2).getTime();
                return;
            } catch (ParseException e) {
                e.printStackTrace();
                return;
            }
        }
        if (upperCase.equals("HREF")) {
            this.href = str2;
            return;
        }
        if (upperCase.equals("AUTHOR")) {
            this.author = str2;
            return;
        }
        if (upperCase.equals("ROBOTS")) {
            if (str2.indexOf("noindex") != -1) {
                this.robotIndex = false;
            }
            if (str2.indexOf("nofollow") != -1) {
                this.robotFollow = false;
            }
            this.author = str2;
        }
    }

    public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet mutableAttributeSet, int i) {
        if (tag.equals(HTML.Tag.META)) {
            handleMeta(mutableAttributeSet);
        }
    }

    public void handleStartTag(HTML.Tag tag, MutableAttributeSet mutableAttributeSet, int i) {
        if (tag.equals(HTML.Tag.TITLE)) {
            this.state = (char) 1;
        } else if (tag.equals(HTML.Tag.A)) {
            handleAnchor(mutableAttributeSet);
        } else if (tag.equals(HTML.Tag.SCRIPT)) {
            this.state = (char) 3;
        }
    }

    public void handleText(char[] cArr, int i) {
        switch (this.state) {
            case 0:
                this.contents.append(cArr);
                this.contents.append(' ');
                return;
            case 1:
                this.title = new String(cArr);
                return;
            case 2:
                this.contents.append(cArr);
                this.contents.append(' ');
                return;
            default:
                return;
        }
    }

    @Override // org.apache.lenya.search.crawler.ContentHandler
    public void parse(InputStream inputStream) {
        try {
            reset();
            pd.parse(new BufferedReader(new InputStreamReader(inputStream)), this, true);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void reset() {
        this.title = null;
        this.description = null;
        this.keywords = null;
        this.categories = null;
        this.href = null;
        this.author = null;
        this.contents.setLength(0);
        this.links = new ArrayList();
        this.published = -1L;
        this.robotIndex = true;
        this.robotFollow = true;
        this.state = (char) 0;
    }
}
