143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// This is the function that RECURSIVELY extracts blocks of name/value pairs.
//
void parse_to_blocks(
std::istream & ins, // text to parse
block_list & blocks, // resulting list of blocks being created
bool first, // true if this is the top-level block
std::size_t & line, // line number
std::size_t & position ) // absolute position in text of first character in current line
{
// Helper to throw parsing errors with line and column information
auto error = [&]( auto message )
{
// heh, I learned something:
// https://stackoverflow.com/questions/13732338/
ins.clear();
return parse_to_blocks_error(
line, (std::size_t) ins.tellg() - position + 1, message );
};
// Our loop has two sequential actions:
// first Extract a name,
// then extract either a value or a nested block
// The two actions are fairly alike, but require different enough
// behaviors that we just unroll the action into two similar pieces of code.
while (true)
{
// . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
// (1) Extract the name
// Leading whitespace?
extract_whitespace( ins, line, position );
// EOF?
if (ins.eof())
{
if (first) return;
throw error( "unexpected EOF" );
}
// Comment?
if (extract_comment( ins, line, position ))
continue;
// End of block?
if (ins.peek() == '}')
{
if (first) throw error( "unexpected }" );
return;
}
// There must be a name here
std::string name;
if (!extract_name( ins, name ))
throw error( "expected name" );
// . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
// (2) Extract the associated block or value
// Leading whitespace?
extract_whitespace( ins, line, position );
// EOF?
if (ins.eof())
throw error( "unexpected EOF" );
// Comment?
if (extract_comment( ins, line, position ))
extract_whitespace( ins, line, position );
// End of block?
if (ins.peek() == '}')
throw error( "unexpected }" );
// Begin block data?
if (extract_character( ins, '{' ))
{
auto n = blocks.size();
blocks.push_back({ name, (std::size_t) ins.tellg(), 0 });
parse_to_blocks( ins, blocks, false, line, position );
if (!extract_character( ins, '}' ))
throw error( "missing }" );
blocks[n].end = (std::size_t) ins.tellg() + 1;
continue;
}
// Not block data, must be a value
std::string value;
if (!extract_value( ins, value ))
throw error( "expected value" );
}
}
} // namespace helper
//-------------------------------------------------------------------------------------------------
// Parse a string into a list of blocks
//
auto parse_to_blocks( const std::string & text )
{
block_list blocks;
std::size_t line = 1;
std::size_t position = 0;
std::istringstream iss( text, std::ios::binary );
helper::parse_to_blocks( iss, blocks, true, line, position );
return blocks;
}
//-------------------------------------------------------------------------------------------------
int main( int argc, char ** argv )
//-------------------------------------------------------------------------------------------------
try
{
// (This just helped me play around with it)
const char * filename = (argc > 1)
? argv[1]
: "example.txt";
// As per your example, we wish to load the file into memory
// (presumably for reference later)
//
// We load the file as TEXT (with newline conversions to '\n'),
// but then parse it as binary.
//
// We could technically avoid the whole in-memory buffer and
// just parse directly from stream, but you'd have to make sure
// to open a binary stream for the error reporting to be accurate.
//
auto raw_data = load_file_as_text( filename );
auto blocks = parse_to_blocks( raw_data );
// Let's print our results.
std::cout
<< "block begin end name\n"
<< "----- ----- ----- -------------\n";
std::size_t n = 1;
for (auto block : blocks)
std::cout
<< std::setw(5) << (n++) << " "
<< std::setw(5) << block.begin << " "
<< std::setw(5) << block.end << " "
<< block.name << "\n";
}
// Errors that we got from indexing the blocks with our parse function
catch (const parse_to_blocks_error & e)
{
std::cerr << "[line:" << e.line << ", column:" << e.column << "]: " << e.what() << "\n";
return 1;
}
// Any other random exception, just to be complete
catch (const std::exception & e)
{
std::cerr << e.what() << "\n";
return 1;
}
// meh, if being complete, then exceptions that aren't std::exception-derived objects
// (like if you `throw 42;`)
catch (...)
{
std::cerr << "something failed and I don't know what it is!\n";
return 1;
}
| |