public class WordExtractor extends Object
| Modifier and Type | Class and Description |
|---|---|
static class |
WordExtractor.TagAndStyle |
| Constructor and Description |
|---|
WordExtractor(ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
static WordExtractor.TagAndStyle |
buildParagraphTagAndStyle(String styleName,
boolean isTable)
Given a style name, return what tag should be used, and
what style should be applied to it.
|
protected Detector |
getDetector() |
protected MimeTypes |
getMimeTypes() |
protected String |
getPassword()
Returns the password to be used for this file, or null
if no / default password should be used
|
protected TikaConfig |
getTikaConfig() |
protected void |
handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
Handle an office document that's embedded at the POIFS level
|
protected void |
handleEmbeddedResource(TikaInputStream resource,
String filename,
String relationshipID,
String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml) |
public WordExtractor(ParseContext context)
public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable)
protected void parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
IOExceptionSAXExceptionTikaExceptionprotected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
IOExceptionSAXExceptionTikaExceptionprotected void parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
IOExceptionSAXExceptionTikaExceptionprotected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
IOExceptionSAXExceptionTikaExceptionprotected TikaConfig getTikaConfig()
protected Detector getDetector()
protected MimeTypes getMimeTypes()
protected String getPassword()
protected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOExceptionSAXExceptionTikaExceptionprotected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
IOExceptionSAXExceptionTikaExceptionCopyright © 2007–2016 The Apache Software Foundation. All rights reserved.