-
Notifications
You must be signed in to change notification settings - Fork 0
/
CodeLineCounter.cpp
168 lines (155 loc) · 5.81 KB
/
CodeLineCounter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#include "stdafx.h"
#include "CodeLineCounter.h"
namespace SOUI
{
// These flags determine the encoding of input data for XML document
enum encoding
{
encoding_utf8, // UTF8 encoding
encoding_utf16_le, // Little-endian UTF16
encoding_utf16_be, // Big-endian UTF16
encoding_utf16, // UTF16 with native endianness
encoding_utf32_le, // Little-endian UTF32
encoding_utf32_be, // Big-endian UTF32
encoding_utf32, // UTF32 with native endianness
encoding_wchar, // The same encoding wchar_t has (either UTF16 or UTF32)
encoding_latin1
};
encoding guess_buffer_encoding(BYTE d0, BYTE d1, BYTE d2, BYTE d3)
{
// look for BOM in first few bytes
if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
// no known BOM detected, assume ans
return encoding_latin1;
}
enum remstate
{
none,
blank,
normal,
singlerem,
multirem1,
multirem2,
multirem21,
};
remstate HandlerLine(const SStringW & str,const CCodeConfig & config,remstate curState)
{
remstate st = none;
SStringW str2 = str;
str2.TrimBlank();
if(str2.IsEmpty())
return blank;
if(curState == multirem1)
{//look for multirem2
int nPos =str.Find(config.strMultiLinesRemarkEnd);
if(nPos != -1)
{
remstate st2 = HandlerLine(str.Right(str.GetLength()-nPos-config.strMultiLinesRemarkEnd.GetLength()),config,normal);
if(st2 == multirem1)
st = multirem21;
else
st = multirem2;
}
}else
{//look for singlerem or multirem1
bool bSingleLineRemark = !config.strSingleLineRemark.IsEmpty() && str2.Left(config.strSingleLineRemark.GetLength()) == config.strSingleLineRemark;
bool bMultiLineRemark = !config.strMultiLinesRemarkBegin.IsEmpty() && str2.Left(config.strMultiLinesRemarkBegin.GetLength()) == config.strMultiLinesRemarkBegin;
if(bSingleLineRemark && !bMultiLineRemark)
{//single remark
st = singlerem;
}else if(bMultiLineRemark)
{//multi rem begin
remstate st2 = HandlerLine(str.Right(str.GetLength()-config.strMultiLinesRemarkBegin.GetLength()),config,multirem1);
if(st2 != multirem2)
st = multirem1;
else if(str.Right(config.strMultiLinesRemarkEnd.GetLength())==config.strMultiLinesRemarkEnd)
st = singlerem;//multilines remark in the same line, treat as single line remark
else
// find multi rem end in the same line, treat it as normal
st = normal;
}else
{
st = normal;
}
}
return st;
}
BOOL CountCodeLines( LPCTSTR pszFileName, const CCodeConfig & config,int & nCodeLines,int & nRemarkLines,int & nBlankLines )
{
FILE *f = _tfopen(pszFileName,_T("rb"));
if(!f) return FALSE;
BYTE bom[4]={0};
fread(bom,1,4,f);
encoding enc = guess_buffer_encoding(bom[0],bom[1],bom[2],bom[3]);
BOOL canHanle = FALSE;;
if(enc == encoding_utf16_le)
{
canHanle = TRUE;
fseek(f,-2,SEEK_CUR);
}else if(enc == encoding_utf8)
{
canHanle = TRUE;
fseek(f,-1,SEEK_CUR);
}else if(enc == encoding_latin1)
{
fseek(f,-4,SEEK_CUR);
canHanle = TRUE;
}
if(canHanle)
{
nCodeLines = 0;
nBlankLines = 0;
nRemarkLines = 0;
remstate stCur = none;
for(;;)
{
wchar_t szLine[1024];
if(enc == encoding_utf16_le)
{
if(!fgetws(szLine,1024,f))
break;
}else
{
char szLine2[1024];
if(!fgets(szLine2,1024,f))
break;
MultiByteToWideChar(enc == encoding_utf8? CP_UTF8:CP_ACP,0,szLine2,-1,szLine,1024);
}
SStringW strLine(szLine);
strLine.TrimRight('\n');//去掉行尾的换行符
strLine.TrimRight('\r');//去掉行尾的换行符
remstate st = HandlerLine(strLine,config,stCur);
if(stCur == multirem1)
{
if(st != blank)
nRemarkLines ++;
else
nBlankLines ++;
if(st == multirem2)
stCur = none;
else if(st == multirem21)
stCur = multirem1;
}else if(st == singlerem)
{
nRemarkLines ++;
}else if(st == blank)
{
nBlankLines ++;
}else if(st == multirem1)
{
nRemarkLines ++;
stCur = st;
}else
{
nCodeLines ++;
}
}
}
fclose(f);
return canHanle;
}
}