-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfileReader.cpp
304 lines (250 loc) · 8.58 KB
/
fileReader.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
//
// fileReader.cpp
//
#include "fileReader.h"
#include <unistd.h>
#include <string.h> // strerror()
#include <fcntl.h> // open()
#include <sys/stat.h> // fstat()
#include <sys/mman.h> // mmap()
#include <assert.h> // assert()
#include <iostream> // std::cout, std::cerr
//
// Log reader implementation
//
bool FileReader::OpenFile(const std::string& fileName,
off_t readBeginOffset,
off_t readEndOffset /* -1 for EOF */)
{
CloseFile(); // Clean up first...just in case if FileReader is re-used
if(fileName.empty())
{
mErrMsg = "Invalid (empty) log file name";
return false;
}
mFileName = fileName;
mReadBeginOffset = readBeginOffset;
mReadEndOffset = readEndOffset;
if(mReadBeginOffset == mReadEndOffset)
{
return true; // Nothing to read
}
else if(mReadEndOffset != -1 && mReadBeginOffset > mReadEndOffset)
{
mErrMsg = "Read begin offset " + std::to_string(mReadBeginOffset) + " is past read end offset "
+ std::to_string(mReadEndOffset) + " of the file '" + mFileName + "'";
return false;
}
int fd = open(mFileName.c_str(), O_RDONLY);
if(fd < 0)
{
int errNo = errno;
mErrMsg = "Could not open '" + mFileName + "' because of: ";
mErrMsg += strerror(errNo);
return false;
}
// Get the file size and then mmap the file into memory.
struct stat fileStats;
if(fstat(fd, &fileStats) != 0)
{
int errNo = errno;
mErrMsg = "Could not fstat'" + mFileName + "' because of: ";
mErrMsg += strerror(errNo);
close(fd);
return false;
}
// Remember file size & mode
mFileSize = fileStats.st_size;
mFileMode = fileStats.st_mode;
// If the file is empty then we have nothing to map, just return
if(mFileSize == 0)
return true;
// Make sure that read end offset is NOT greater than the file size
if(mReadEndOffset > mFileSize)
{
mErrMsg = "Read end offset " + std::to_string(mReadEndOffset) + " is greater than file size "
+ std::to_string(mFileSize) + " of the file '" + fileName + "'";
close(fd);
return false;
}
// Are we going to read to the EOF?
if(mReadEndOffset < 0)
mReadEndOffset = mFileSize;
// Offset for mmap() must be page aligned
off_t alignedOffset = mReadBeginOffset & ~(sysconf(_SC_PAGE_SIZE) - 1);
// Get the length of mapping
size_t mapLength = mReadEndOffset - alignedOffset;
// Map in the file.
void* addr = mmap(NULL, mapLength, PROT_READ, MAP_PRIVATE, fd, alignedOffset);
close(fd); // Can close file now, no longer need it
// Validate mmap() result
if(addr == MAP_FAILED)
{
// std::cout << "### '" << fileName << "'"
// << ": fd=" << fd
// << ": readBeginOffset=" << mReadBeginOffset
// << ", readEndOffset=" << mReadEndOffset
// << ", mapLength=" << mapLength
// << ", alignedOffset=" << alignedOffset << std::endl;
int errNo = errno;
mErrMsg = "Could not map '" + mFileName + "' because of: ";
mErrMsg += strerror(errNo);
return false;
}
mMapAddr = addr;
mMapLength = mapLength;
// Get the actual file read address
mReadAddr = (unsigned char*)mMapAddr + mReadBeginOffset - alignedOffset;
return true;
}
void FileReader::CloseFile()
{
mErrMsg.clear();
mFileName.clear();
mFileMode = 0;
// Unmap the file.
if(mMapAddr && munmap(mMapAddr, mMapLength) != 0)
{
// Note: We should treat it as a warning, not an error
std::cerr << "Failed to unmap '" + mFileName + "' because of: " + strerror(errno) << std::endl;
}
mMapAddr = nullptr;
mMapLength = 0;
mReadAddr = nullptr;
mReadSize = 0;
mReadBeginOffset = 0;
mReadEndOffset = 0;
}
//
// Note: Only single ReadFile() per OpenFile() supported <--? What does it mean?
//
off_t FileReader::ReadRegularFile(/*out*/ std::string& buf,
ssize_t maxSize /* -1 for all */)
{
buf.clear();
size_t readMaxSize = (mReadEndOffset - mReadBeginOffset);
// End of last read - that is an offset to this new read
off_t readOffset = mReadBeginOffset + mReadSize;
if(mReadSize >= readMaxSize)
{
// Nothing left to read
}
else if(maxSize < 0)
{
// Read all at once
buf.assign((char*)mReadAddr, readMaxSize);
mReadSize = readMaxSize;
}
else
{
assert(mReadSize < readMaxSize);
ssize_t remainingSize = (readMaxSize - mReadSize);
size_t toRead = (remainingSize > maxSize ? maxSize : remainingSize);
buf.assign((char*)mReadAddr + mReadSize, toRead);
mReadSize += toRead;
}
return readOffset;
}
// Preserve sparseness support
off_t FileReader::ReadSparseFile(/*out*/ std::string& buf,
ssize_t maxSize /* -1 for all */)
{
buf.clear();
size_t readMaxSize = (mReadEndOffset - mReadBeginOffset);
if(mReadSize >= readMaxSize)
{
return (mReadBeginOffset + mReadSize);
}
if(maxSize < 0)
maxSize = readMaxSize;
// If maxSize is smaller than mMaxSparseBlockSize, then don't check
// for sparseness and just read normally
if((size_t)maxSize < mMaxSparseBlockSize)
{
return ReadFile(buf, maxSize);
}
size_t remainingSize = readMaxSize - mReadSize;
size_t sparseBlockSize = 0;
void* readAddr = nullptr;
// Skip sparse blocks
while(remainingSize > 0)
{
sparseBlockSize = (remainingSize > mMaxSparseBlockSize ? mMaxSparseBlockSize : remainingSize);
// std::cout << "01 sparseBlockSize=" << sparseBlockSize
// << ", mMaxSparseBlockSize=" << mMaxSparseBlockSize
// << ", remainingSize=" << remainingSize << std::endl;
readAddr = (unsigned char*)mReadAddr + mReadSize;
mReadSize += sparseBlockSize; // Consider this block read
if(!IsSparse(readAddr, sparseBlockSize))
break;
// Go to the next block
remainingSize -= sparseBlockSize;
}
if(remainingSize == 0)
{
// No data left
return (mReadBeginOffset + mReadSize);
}
// We found un-sparse block (valid data)
void* beginDataAddr = readAddr;
size_t dataSize = sparseBlockSize;
remainingSize -= sparseBlockSize;
assert(mReadSize < readMaxSize || remainingSize == 0);
// Keep reading until next sparse block
while(remainingSize > 0)
{
sparseBlockSize = (remainingSize > mMaxSparseBlockSize ? mMaxSparseBlockSize : remainingSize);
// std::cout << "02 sparseBlockSize=" << sparseBlockSize
// << ", mMaxSparseBlockSize=" << mMaxSparseBlockSize
// << ", remainingSize=" << remainingSize << std::endl;
if(dataSize + sparseBlockSize > (size_t)maxSize)
break; // No more room for data
readAddr = (unsigned char*)mReadAddr + mReadSize;
mReadSize += sparseBlockSize; // Consider this block read
if(IsSparse(readAddr, sparseBlockSize))
break;
// Go to the next block
dataSize += sparseBlockSize;
remainingSize -= sparseBlockSize;
}
// Get all the data
assert(dataSize <= (size_t)maxSize);
buf.assign((char*)beginDataAddr, dataSize);
// Get data offset
return ((unsigned char*)beginDataAddr - (unsigned char*)mReadAddr);
}
// Preserve sparseness support
bool FileReader::IsSparse(void* addr, size_t size)
{
// Treat input bytes as array of longs for a faster performance
const long* lbuf = reinterpret_cast<const long*>(addr);
size_t lsize = size / sizeof(long);
for(size_t i = 0; i < lsize; i++)
{
if(lbuf[i] != 0)
return false;
}
// Check the remaining bytes (if we have any)
size_t rest = size % sizeof(long);
if(rest == 0)
return true; // No remaining bytes
const char* buf = reinterpret_cast<char*>(addr) + size - rest;
for(size_t i = 0; i < rest; i++)
{
if(buf[i] != 0)
return false;
}
return true;
}
size_t FileReader::Checksum(const std::string& fileName)
{
// Open to read entire file
FileReader reader;
if(!reader.OpenFile(fileName, 0, -1))
return 0; // Failed to open
if(reader.mFileSize == 0)
return 0; // The file is empty, nothing to checksum
assert(reader.mReadAddr != nullptr);
std::hash<std::string_view> hash;
return hash(std::string_view((char*)reader.mReadAddr, reader.mFileSize));
}