-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathwordwin.c
209 lines (191 loc) · 5.45 KB
/
wordwin.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/*
* wordwin.c
* Copyright (C) 2002-2005 A.J. van Os; Released under GPL
*
* Description:
* Deal with the WIN internals of a MS Word file
*/
#include "antiword.h"
/*
* bGetDocumentText - make a list of the text blocks of a Word document
*
* Return TRUE when succesful, otherwise FALSE
*/
static BOOL
bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
{
text_block_type tTextBlock;
ULONG ulBeginOfText;
ULONG ulTextLen, ulFootnoteLen;
ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
UINT uiQuickSaves;
USHORT usDocStatus;
BOOL bTemplate, bFastSaved, bEncrypted, bSuccess;
fail(pFile == NULL);
fail(aucHeader == NULL);
DBG_MSG("bGetDocumentText");
/* Get the status flags from the header */
usDocStatus = usGetWord(0x0a, aucHeader);
DBG_HEX(usDocStatus);
bTemplate = (usDocStatus & BIT(0)) != 0;
DBG_MSG_C(bTemplate, "This document is a Template");
bFastSaved = (usDocStatus & BIT(2)) != 0;
uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
DBG_MSG_C(bFastSaved, "This document is Fast Saved");
DBG_DEC_C(bFastSaved, uiQuickSaves);
if (bFastSaved) {
werr(0, "Word2: fast saved documents are not supported yet");
return FALSE;
}
bEncrypted = (usDocStatus & BIT(8)) != 0;
if (bEncrypted) {
werr(0, "Encrypted documents are not supported");
return FALSE;
}
/* Get length information */
ulBeginOfText = ulGetLong(0x18, aucHeader);
DBG_HEX(ulBeginOfText);
ulTextLen = ulGetLong(0x34, aucHeader);
ulFootnoteLen = ulGetLong(0x38, aucHeader);
ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
ulMacroLen = ulGetLong(0x40, aucHeader);
ulAnnotationLen = ulGetLong(0x44, aucHeader);
DBG_DEC(ulTextLen);
DBG_DEC(ulFootnoteLen);
DBG_DEC(ulHdrFtrLen);
DBG_DEC(ulMacroLen);
DBG_DEC(ulAnnotationLen);
if (bFastSaved) {
bSuccess = FALSE;
} else {
tTextBlock.ulFileOffset = ulBeginOfText;
tTextBlock.ulCharPos = ulBeginOfText;
tTextBlock.ulLength = ulTextLen +
ulFootnoteLen +
ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
tTextBlock.bUsesUnicode = FALSE;
tTextBlock.usPropMod = IGNORE_PROPMOD;
bSuccess = bAdd2TextBlockList(&tTextBlock);
DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
}
if (bSuccess) {
vSplitBlockList(pFile,
ulTextLen,
ulFootnoteLen,
ulHdrFtrLen,
ulMacroLen,
ulAnnotationLen,
0,
0,
0,
FALSE);
} else {
vDestroyTextBlockList();
werr(0, "I can't find the text of this document");
}
return bSuccess;
} /* end of bGetDocumentText */
/*
* vGetDocumentData - make a list of the data blocks of a Word document
*/
static void
vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
{
data_block_type tDataBlock;
options_type tOptions;
ULONG ulEndOfText, ulBeginCharInfo;
BOOL bFastSaved, bHasImages, bSuccess;
USHORT usDocStatus;
/* Get the options */
vGetOptions(&tOptions);
/* Get the status flags from the header */
usDocStatus = usGetWord(0x0a, aucHeader);
DBG_HEX(usDocStatus);
bFastSaved = (usDocStatus & BIT(2)) != 0;
bHasImages = (usDocStatus & BIT(3)) != 0;
if (!bHasImages ||
tOptions.eConversionType == conversion_text ||
tOptions.eConversionType == conversion_fmt_text ||
tOptions.eConversionType == conversion_xml ||
tOptions.eImageLevel == level_no_images) {
/*
* No images in the document or text-only output or
* no images wanted, so no data blocks will be needed
*/
vDestroyDataBlockList();
return;
}
if (bFastSaved) {
bSuccess = FALSE;
} else {
/* This datablock is too big, but it contains all images */
ulEndOfText = ulGetLong(0x1c, aucHeader);
DBG_HEX(ulEndOfText);
ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
DBG_HEX(ulBeginCharInfo);
if (ulBeginCharInfo > ulEndOfText) {
tDataBlock.ulFileOffset = ulEndOfText;
tDataBlock.ulDataPos = ulEndOfText;
tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
bSuccess = bAdd2DataBlockList(&tDataBlock);
DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
} else {
bSuccess = ulBeginCharInfo == ulEndOfText;
}
}
if (!bSuccess) {
vDestroyDataBlockList();
werr(0, "I can't find the data of this document");
}
} /* end of vGetDocumentData */
/*
* iInitDocumentWIN - initialize an WIN document
*
* Returns the version of Word that made the document or -1
*/
int
iInitDocumentWIN(FILE *pFile, long lFilesize)
{
int iWordVersion;
BOOL bSuccess;
USHORT usIdent;
UCHAR aucHeader[384];
fail(pFile == NULL);
if (lFilesize < 384) {
return -1;
}
/* Read the headerblock */
if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
return -1;
}
/* Get the "magic number" from the header */
usIdent = usGetWord(0x00, aucHeader);
DBG_HEX(usIdent);
fail(usIdent != 0xa59b && /* WinWord 1.x */
usIdent != 0xa5db); /* WinWord 2.0 */
iWordVersion = iGetVersionNumber(aucHeader);
if (iWordVersion != 1 && iWordVersion != 2) {
werr(0, "This file is not from ''Win Word 1 or 2'.");
return -1;
}
bSuccess = bGetDocumentText(pFile, aucHeader);
if (bSuccess) {
vGetDocumentData(pFile, aucHeader);
vGetPropertyInfo(pFile, NULL,
NULL, 0, NULL, 0,
aucHeader, iWordVersion);
vSetDefaultTabWidth(pFile, NULL,
NULL, 0, NULL, 0,
aucHeader, iWordVersion);
vGetNotesInfo(pFile, NULL,
NULL, 0, NULL, 0,
aucHeader, iWordVersion);
}
return bSuccess ? iWordVersion : -1;
} /* end of iInitDocumentWIN */