1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
|
// may tried RandomAccess/SequentialScan
MemoryMapped MemFile(FilterBase.BaseFileName, MemoryMapped::WholeFile, MemoryMapped::RandomAccess);
// point to start of memory file
char* start = (char*)MemFile.getData();
// dummy in my case
char* tmpBuffer = start;
// looping counter
uint64_t i = 0;
// pre-allocate result vector
MyVector.resize(300000);
// Line counter
int LnCnt = 0;
//no. of field
int NumOfField=43;
//delimiter count, num of field + 1 since the leading and trailing delimiter are virtual
int DelimCnt=NoOfField+1;
//Delimiter position. May use new to allocate at run time
// or even use vector of integer
// This is to store the delimiter position in each line
// since the position is relative to start of file. if file is extremely
// large, may need to change from int to unsigner, long or even unsigned long long
static int DelimPos[DelimCnt];
// Max number of field need to read usually equal to NumOfField, can be smaller, eg in my case, I only need 4 fields
// from first 15 field, in this case, can assign 15 to MaxFieldNeed
int MaxFieldNeed=NumOfField;
// keep track how many comma read each line
int DelimCounter=0;
// define field and line seperator
char FieldDelim=',';
char LineSep='\n';
// 1st field, "virtual Delimiter" position
DelimPos[CommaCounter]=-1
DelimCounter++;
// loop through the whole memory field, 1 and only once
for (i = 0; i < MemFile.size();i++)
{
// grab all position of delimiter in each line
if ((MemFile[i] == FieldDelim) && (DelimCounter<=MaxFieldNeed)){
DelimPos[DelimCounter] = i;
DelimCounter++;
};
// grab all values when end of line hit
if (MemFile[i] == LineSep) {
// no need to use if (DelimCounter==NumOfField) just assign anyway, waste a little bit
// memory in integer array but gain performance
DelimPos[DelimCounter] = i;
// I know exactly what the format is and what field(s) I want
// a more general approach (as a CSV reader) may put all fields
// into vector of vector of string
// With *EFFORT* one may modify this piece of code so that it can parse
// different format at run time eg similar to:
// fscanf(fstream,"%d,%f....
// also, this piece of code cannot handle complex CSV e.g.
// Peter,28,157CM
// John,26,167CM
// "Mary,Brown",25,150CM
MyVector.StrField = string(strat+DelimPos[0] + 1, strat+DelimPos[1] - 1);
MyVector.IntField = strtol(strat+DelimPos[3] + 1,&tmpBuffer,10);
MyVector.IntField2 = strtol(strat+DelimPos[8] + 1,&tmpBuffer,10);
MyVector.FloatField = strtof(start + DelimPos[14] + 1,&tmpBuffer);
// reset Delim counter each line
DelimCounter=0
// previous line seperator treat as first delimiter of next line
DelimPos[DelimCounter] = i;
DelimCounter++
LnCnt++;
}
}
MyVector.resize(LnCnt);
MyVector.shrink_to_fit();
MemFile.close();
};
| |