/*
 * Decompiled with CFR 0.152.
 */
package org.apache.jackrabbit.core.query;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import javax.jcr.RepositoryException;
import org.apache.jackrabbit.core.query.LazyReader;
import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.query.lucene.FieldNames;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.core.value.BLOBFileValue;
import org.apache.jackrabbit.core.value.InternalValue;
import org.textmining.text.extraction.WordExtractor;

public class MsWordTextFilter
implements TextFilter {
    public boolean canFilter(String mimeType) {
        return "application/vnd.ms-word".equalsIgnoreCase(mimeType) || "application/msword".equalsIgnoreCase(mimeType);
    }

    public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
        InternalValue[] values = data.getValues();
        if (values.length > 0) {
            final BLOBFileValue blob = (BLOBFileValue)values[0].internalValue();
            LazyReader reader = new LazyReader(){

                protected void initializeReader() throws IOException {
                    InputStream in;
                    try {
                        in = blob.getStream();
                    }
                    catch (RepositoryException e) {
                        throw new IOException(e.getMessage());
                    }
                    try {
                        WordExtractor extractor = new WordExtractor();
                        String text = extractor.extractText(in);
                        this.delegate = new StringReader(text);
                    }
                    catch (Exception e) {
                        throw new IOException(e.getMessage());
                    }
                    finally {
                        in.close();
                    }
                }
            };
            HashMap<String, 1> result = new HashMap<String, 1>();
            result.put(FieldNames.FULLTEXT, reader);
            return result;
        }
        throw new RepositoryException("Multi-valued binary properties not supported.");
    }

    static {
        WordExtractor.class.getName();
    }
}

