/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.benchmark.byTask.feeds;

import java.io.IOException;
import java.util.Date;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.feeds.TrecContentSource;
import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser;

public class TrecFR94Parser
extends TrecDocParser {
    private static final String TEXT = "<TEXT>";
    private static final int TEXT_LENGTH = "<TEXT>".length();
    private static final String TEXT_END = "</TEXT>";
    private static final String DATE = "<DATE>";
    private static final String[] DATE_NOISE_PREFIXES = new String[]{"DATE:", "date:", "t.c."};
    private static final String DATE_END = "</DATE>";

    @Override
    public DocData parse(DocData docData, String name, TrecContentSource trecSrc, StringBuilder docBuf, TrecDocParser.ParsePathType pathType) throws IOException {
        int mark = 0;
        Date date = null;
        int h1 = docBuf.indexOf(TEXT);
        if (h1 >= 0) {
            int h2 = docBuf.indexOf(TEXT_END, h1);
            mark = h1 + TEXT_LENGTH;
            String dateStr = TrecFR94Parser.extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
            if (dateStr != null) {
                dateStr = TrecFR94Parser.stripTags(dateStr, 0);
                date = trecSrc.parseDate(dateStr.trim());
            }
        }
        docData.clear();
        docData.setName(name);
        docData.setDate(date);
        docData.setBody(TrecFR94Parser.stripTags(docBuf, mark));
        return docData;
    }
}

