|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectorg.apache.poi.hwpf.converter.AbstractWordConverter
org.apache.poi.hwpf.converter.WordToTextConverter
@Beta public class WordToTextConverter
| Field Summary |
|---|
| Fields inherited from class org.apache.poi.hwpf.converter.AbstractWordConverter |
|---|
UNICODECHAR_NO_BREAK_SPACE, UNICODECHAR_NONBREAKING_HYPHEN, UNICODECHAR_ZERO_WIDTH_SPACE |
| Constructor Summary | |
|---|---|
WordToTextConverter()
Creates new instance of WordToTextConverter. |
|
WordToTextConverter(org.w3c.dom.Document document)
Creates new instance of WordToTextConverter. |
|
WordToTextConverter(TextDocumentFacade textDocumentFacade)
|
|
| Method Summary | |
|---|---|
protected void |
afterProcess()
Special actions that need to be called after processing complete, like updating stylesheets or building document notes list. |
org.w3c.dom.Document |
getDocument()
|
java.lang.String |
getText()
|
static java.lang.String |
getText(DirectoryNode root)
|
static java.lang.String |
getText(java.io.File docFile)
|
static java.lang.String |
getText(HWPFDocumentCore wordDocument)
|
boolean |
isOutputSummaryInformation()
|
static void |
main(java.lang.String[] args)
Java main() interface to interact with WordToTextConverter |
protected void |
outputCharacters(org.w3c.dom.Element block,
CharacterRun characterRun,
java.lang.String text)
|
protected void |
processBookmarks(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range range,
int currentTableLevel,
java.util.List<Bookmark> rangeBookmarks)
Wrap range into bookmark(s) and process it. |
protected void |
processDocumentInformation(SummaryInformation summaryInformation)
|
void |
processDocumentPart(HWPFDocumentCore wordDocument,
Range range)
|
protected void |
processDrawnObject(HWPFDocument doc,
CharacterRun characterRun,
OfficeDrawing officeDrawing,
java.lang.String path,
org.w3c.dom.Element block)
|
protected void |
processEndnoteAutonumbered(HWPFDocument wordDocument,
int noteIndex,
org.w3c.dom.Element block,
Range endnoteTextRange)
|
protected void |
processFootnoteAutonumbered(HWPFDocument wordDocument,
int noteIndex,
org.w3c.dom.Element block,
Range footnoteTextRange)
|
protected void |
processHyperlink(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range textRange,
int currentTableLevel,
java.lang.String hyperlink)
|
protected void |
processImage(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture)
|
protected void |
processImage(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture,
java.lang.String url)
|
protected void |
processImageWithoutPicturesManager(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture)
|
protected void |
processLineBreak(org.w3c.dom.Element block,
CharacterRun characterRun)
|
protected void |
processNote(HWPFDocument wordDocument,
org.w3c.dom.Element block,
Range noteTextRange)
|
protected boolean |
processOle2(HWPFDocument wordDocument,
org.w3c.dom.Element block,
Entry entry)
|
protected void |
processPageBreak(HWPFDocumentCore wordDocument,
org.w3c.dom.Element flow)
|
protected void |
processPageref(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range textRange,
int currentTableLevel,
java.lang.String pageref)
|
protected void |
processParagraph(HWPFDocumentCore wordDocument,
org.w3c.dom.Element parentElement,
int currentTableLevel,
Paragraph paragraph,
java.lang.String bulletText)
|
protected void |
processSection(HWPFDocumentCore wordDocument,
Section section,
int s)
|
protected void |
processTable(HWPFDocumentCore wordDocument,
org.w3c.dom.Element flow,
Table table)
|
void |
setOutputSummaryInformation(boolean outputDocumentInformation)
|
| Methods inherited from class org.apache.poi.hwpf.converter.AbstractWordConverter |
|---|
getCharacterRunTriplet, getFontReplacer, getNumberColumnsSpanned, getNumberRowsSpanned, getPicturesManager, processCharacters, processDeadField, processDocument, processDrawnObject, processDropDownList, processField, processNoteAnchor, processParagraphes, processSingleSection, processSymbol, setFontReplacer, setPicturesManager, tryDeadField |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public WordToTextConverter()
throws javax.xml.parsers.ParserConfigurationException
WordToTextConverter. Can be used for
output several HWPFDocuments into single text document.
javax.xml.parsers.ParserConfigurationException - if an internal DocumentBuilder cannot be createdpublic WordToTextConverter(org.w3c.dom.Document document)
WordToTextConverter. Can be used for
output several HWPFDocuments into single text document.
document - XML DOM Document used as storage for text piecespublic WordToTextConverter(TextDocumentFacade textDocumentFacade)
| Method Detail |
|---|
public static java.lang.String getText(DirectoryNode root)
throws java.lang.Exception
java.lang.Exception
public static java.lang.String getText(java.io.File docFile)
throws java.lang.Exception
java.lang.Exception
public static java.lang.String getText(HWPFDocumentCore wordDocument)
throws java.lang.Exception
java.lang.Exception
public static void main(java.lang.String[] args)
throws java.lang.Exception
WordToTextConverter
Usage: WordToTextConverter infile outfile
Where infile is an input .doc file ( Word 95-2007) which will be rendered as plain text into outfile
java.lang.Exceptionprotected void afterProcess()
AbstractWordConverter
afterProcess in class AbstractWordConverterpublic org.w3c.dom.Document getDocument()
getDocument in class AbstractWordConverter
public java.lang.String getText()
throws java.lang.Exception
java.lang.Exceptionpublic boolean isOutputSummaryInformation()
protected void outputCharacters(org.w3c.dom.Element block,
CharacterRun characterRun,
java.lang.String text)
outputCharacters in class AbstractWordConverter
protected void processBookmarks(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range range,
int currentTableLevel,
java.util.List<Bookmark> rangeBookmarks)
AbstractWordConverter
processBookmarks in class AbstractWordConverterprotected void processDocumentInformation(SummaryInformation summaryInformation)
processDocumentInformation in class AbstractWordConverter
public void processDocumentPart(HWPFDocumentCore wordDocument,
Range range)
processDocumentPart in class AbstractWordConverter
protected void processDrawnObject(HWPFDocument doc,
CharacterRun characterRun,
OfficeDrawing officeDrawing,
java.lang.String path,
org.w3c.dom.Element block)
processDrawnObject in class AbstractWordConverter
protected void processEndnoteAutonumbered(HWPFDocument wordDocument,
int noteIndex,
org.w3c.dom.Element block,
Range endnoteTextRange)
processEndnoteAutonumbered in class AbstractWordConverter
protected void processFootnoteAutonumbered(HWPFDocument wordDocument,
int noteIndex,
org.w3c.dom.Element block,
Range footnoteTextRange)
processFootnoteAutonumbered in class AbstractWordConverter
protected void processHyperlink(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range textRange,
int currentTableLevel,
java.lang.String hyperlink)
processHyperlink in class AbstractWordConverter
protected void processImage(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture)
processImage in class AbstractWordConverter
protected void processImage(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture,
java.lang.String url)
processImage in class AbstractWordConverter
protected void processImageWithoutPicturesManager(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture)
processImageWithoutPicturesManager in class AbstractWordConverter
protected void processLineBreak(org.w3c.dom.Element block,
CharacterRun characterRun)
processLineBreak in class AbstractWordConverter
protected void processNote(HWPFDocument wordDocument,
org.w3c.dom.Element block,
Range noteTextRange)
protected boolean processOle2(HWPFDocument wordDocument,
org.w3c.dom.Element block,
Entry entry)
throws java.lang.Exception
processOle2 in class AbstractWordConverterjava.lang.Exception
protected void processPageBreak(HWPFDocumentCore wordDocument,
org.w3c.dom.Element flow)
processPageBreak in class AbstractWordConverter
protected void processPageref(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range textRange,
int currentTableLevel,
java.lang.String pageref)
processPageref in class AbstractWordConverter
protected void processParagraph(HWPFDocumentCore wordDocument,
org.w3c.dom.Element parentElement,
int currentTableLevel,
Paragraph paragraph,
java.lang.String bulletText)
processParagraph in class AbstractWordConverter
protected void processSection(HWPFDocumentCore wordDocument,
Section section,
int s)
processSection in class AbstractWordConverter
protected void processTable(HWPFDocumentCore wordDocument,
org.w3c.dom.Element flow,
Table table)
processTable in class AbstractWordConverterpublic void setOutputSummaryInformation(boolean outputDocumentInformation)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||