
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// This is the function that RECURSIVELY extracts blocks of name/value pairs.
//
void parse_to_blocks(
std::istream & ins, // text to parse
block_list & blocks, // resulting list of blocks being created
bool first, // true if this is the top-level block
std::size_t & line, // line number
std::size_t & position ) // absolute position in text of first character in current line
{
// Helper to throw parsing errors with line and column information
auto error = [&]( auto message )
{
// heh, I learned something:
// https://stackoverflow.com/questions/13732338/
ins.clear();
return parse_to_blocks_error(
line, (std::size_t) ins.tellg() - position + 1, message );
};
// Our loop has two sequential actions:
// first Extract a name,
// then extract either a value or a nested block
// The two actions are fairly alike, but require different enough
// behaviors that we just unroll the action into two similar pieces of code.
while (true)
{
// . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
// (1) Extract the name
// Leading whitespace?
extract_whitespace( ins, line, position );
// EOF?
if (ins.eof())
{
if (first) return;
throw error( "unexpected EOF" );
}
// Comment?
if (extract_comment( ins, line, position ))
continue;
// End of block?
if (ins.peek() == '}')
{
if (first) throw error( "unexpected }" );
return;
}
// There must be a name here
std::string name;
if (!extract_name( ins, name ))
throw error( "expected name" );
// . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
// (2) Extract the associated block or value
// Leading whitespace?
extract_whitespace( ins, line, position );
// EOF?
if (ins.eof())
throw error( "unexpected EOF" );
// Comment?
if (extract_comment( ins, line, position ))
extract_whitespace( ins, line, position );
// End of block?
if (ins.peek() == '}')
throw error( "unexpected }" );
// Begin block data?
if (extract_character( ins, '{' ))
{
auto n = blocks.size();
blocks.push_back({ name, (std::size_t) ins.tellg(), 0 });
parse_to_blocks( ins, blocks, false, line, position );
if (!extract_character( ins, '}' ))
throw error( "missing }" );
blocks[n].end = (std::size_t) ins.tellg() + 1;
continue;
}
// Not block data, must be a value
std::string value;
if (!extract_value( ins, value ))
throw error( "expected value" );
}
}
} // namespace helper
//-------------------------------------------------------------------------------------------------
// Parse a string into a list of blocks
//
auto parse_to_blocks( const std::string & text )
{
block_list blocks;
std::size_t line = 1;
std::size_t position = 0;
std::istringstream iss( text, std::ios::binary );
helper::parse_to_blocks( iss, blocks, true, line, position );
return blocks;
}
//-------------------------------------------------------------------------------------------------
int main( int argc, char ** argv )
//-------------------------------------------------------------------------------------------------
try
{
// (This just helped me play around with it)
const char * filename = (argc > 1)
? argv[1]
: "example.txt";
// As per your example, we wish to load the file into memory
// (presumably for reference later)
//
// We load the file as TEXT (with newline conversions to '\n'),
// but then parse it as binary.
//
// We could technically avoid the whole in-memory buffer and
// just parse directly from stream, but you'd have to make sure
// to open a binary stream for the error reporting to be accurate.
//
auto raw_data = load_file_as_text( filename );
auto blocks = parse_to_blocks( raw_data );
// Let's print our results.
std::cout
<< "block begin end name\n"
<< "----- ----- ----- -------------\n";
std::size_t n = 1;
for (auto block : blocks)
std::cout
<< std::setw(5) << (n++) << " "
<< std::setw(5) << block.begin << " "
<< std::setw(5) << block.end << " "
<< block.name << "\n";
}
// Errors that we got from indexing the blocks with our parse function
catch (const parse_to_blocks_error & e)
{
std::cerr << "[line:" << e.line << ", column:" << e.column << "]: " << e.what() << "\n";
return 1;
}
// Any other random exception, just to be complete
catch (const std::exception & e)
{
std::cerr << e.what() << "\n";
return 1;
}
// meh, if being complete, then exceptions that aren't std::exception-derived objects
// (like if you `throw 42;`)
catch (...)
{
std::cerr << "something failed and I don't know what it is!\n";
return 1;
}
| |