Identifying file type in Java

I use Apache Tika which identifies the filetype using magic byte patterns and globbing hints (the file extension) to detect the MIME type. It also supports additional parsing of file contents (which I don’t really use).

Here is a quick and dirty example on how Tika can be used to detect the file type without performing any additional parsing on the file:

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.HashMap;

import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.helpers.DefaultHandler;

public class Detector {

    public static void main(String[] args) throws Exception {
        File file = new File("/pats/to/file.xls");

        AutoDetectParser parser = new AutoDetectParser();
        parser.setParsers(new HashMap<MediaType, Parser>());

        Metadata metadata = new Metadata();
        metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, file.getName());

        InputStream stream = new FileInputStream(file);
        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
        stream.close();

        String mimeType = metadata.get(HttpHeaders.CONTENT_TYPE);
        System.out.println(mimeType);
    }

}

Leave a Comment