From edeb6c67dde020876d95d364fcf1ea853b57654a Mon Sep 17 00:00:00 2001 From: Kajari Ghosh Date: Tue, 15 Nov 2016 22:03:08 -0800 Subject: [PATCH] Consolidate file reading through the new FileReader class/interface. --- include/extractor/raster_source.hpp | 15 +-- include/storage/io.hpp | 168 +++++---------------------- include/storage/serialization.hpp | 171 ++++++++++++++++++++++++++++ include/util/name_table.hpp | 2 +- include/util/range_table.hpp | 16 +++ src/benchmarks/static_rtree.cpp | 9 +- src/contractor/contractor.cpp | 163 ++++++++++++-------------- src/extractor/extractor.cpp | 1 + src/storage/storage.cpp | 54 ++++----- src/util/name_table.cpp | 22 +--- unit_tests/util/io.cpp | 109 +++++++++++++++++- 11 files changed, 439 insertions(+), 291 deletions(-) create mode 100644 include/storage/serialization.hpp diff --git a/include/extractor/raster_source.hpp b/include/extractor/raster_source.hpp index 60c7bcb69c2..05f2e4734a9 100644 --- a/include/extractor/raster_source.hpp +++ b/include/extractor/raster_source.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -43,20 +44,14 @@ class RasterGrid ydim = _ydim; _data.reserve(ydim * xdim); - boost::filesystem::ifstream stream(filepath, std::ios::binary); - if (!stream) - { - throw util::exception("Unable to open raster file."); - } + storage::io::FileReader file_reader(filepath, storage::io::FileReader::HasNoFingerprint); - stream.seekg(0, std::ios_base::end); std::string buffer; - buffer.resize(static_cast(stream.tellg())); - - stream.seekg(0, std::ios_base::beg); + buffer.resize(file_reader.Size()); BOOST_ASSERT(buffer.size() > 1); - stream.read(&buffer[0], static_cast(buffer.size())); + + file_reader.ReadInto(&buffer[0], buffer.size()); boost::algorithm::trim(buffer); diff --git a/include/storage/io.hpp b/include/storage/io.hpp index eb973121d02..4b4e66baaeb 100644 --- a/include/storage/io.hpp +++ b/include/storage/io.hpp @@ -1,14 +1,9 @@ #ifndef OSRM_STORAGE_IO_HPP_ #define OSRM_STORAGE_IO_HPP_ -#include "contractor/query_edge.hpp" -#include "extractor/extractor.hpp" -#include "extractor/original_edge_data.hpp" -#include "extractor/query_node.hpp" #include "util/exception.hpp" #include "util/fingerprint.hpp" #include "util/simple_logger.hpp" -#include "util/static_graph.hpp" #include #include @@ -32,6 +27,16 @@ class FileReader boost::filesystem::ifstream input_stream; public: + class LineWrapper : public std::string + { + friend std::istream &operator>>(std::istream &is, LineWrapper &line) + { + return std::getline(is, line); + } + }; + auto GetLineIteratorBegin() { return std::istream_iterator(input_stream); } + auto GetLineIteratorEnd() { return std::istream_iterator(); } + enum FingerprintFlag { VerifyFingerprint, @@ -67,6 +72,7 @@ class FileReader return; const auto &result = input_stream.read(reinterpret_cast(dest), count * sizeof(T)); + if (!result) { if (result.eof()) @@ -78,6 +84,11 @@ class FileReader } } + template void ReadInto(std::vector &target) + { + ReadInto(target.data(), target.size()); + } + template void ReadInto(T &target) { ReadInto(&target, 1); } template T ReadOne() @@ -142,144 +153,23 @@ class FileReader } return result; } -}; - -// To make function calls consistent, this function returns the fixed number of properties -inline std::size_t readPropertiesCount() { return 1; } - -struct HSGRHeader -{ - std::uint32_t checksum; - std::uint64_t number_of_nodes; - std::uint64_t number_of_edges; -}; - -// Reads the checksum, number of nodes and number of edges written in the header file of a `.hsgr` -// file and returns them in a HSGRHeader struct -inline HSGRHeader readHSGRHeader(io::FileReader &input_file) -{ - const util::FingerPrint fingerprint_valid = util::FingerPrint::GetValid(); - const auto fingerprint_loaded = input_file.ReadOne(); - if (!fingerprint_loaded.TestGraphUtil(fingerprint_valid)) - { - util::SimpleLogger().Write(logWARNING) << ".hsgr was prepared with different build.\n" - "Reprocess to get rid of this warning."; - } - - HSGRHeader header; - input_file.ReadInto(header.checksum); - input_file.ReadInto(header.number_of_nodes); - input_file.ReadInto(header.number_of_edges); - - BOOST_ASSERT_MSG(0 != header.number_of_nodes, "number of nodes is zero"); - // number of edges can be zero, this is the case in a few test fixtures - - return header; -} - -// Reads the graph data of a `.hsgr` file into memory -// Needs to be called after readHSGRHeader() to get the correct offset in the stream -using NodeT = typename util::StaticGraph::NodeArrayEntry; -using EdgeT = typename util::StaticGraph::EdgeArrayEntry; -inline void readHSGR(io::FileReader &input_file, - NodeT *node_buffer, - const std::uint64_t number_of_nodes, - EdgeT *edge_buffer, - const std::uint64_t number_of_edges) -{ - BOOST_ASSERT(node_buffer); - BOOST_ASSERT(edge_buffer); - input_file.ReadInto(node_buffer, number_of_nodes); - input_file.ReadInto(edge_buffer, number_of_edges); -} - -// Loads datasource_indexes from .datasource_indexes into memory -// Needs to be called after readElementCount() to get the correct offset in the stream -inline void readDatasourceIndexes(io::FileReader &datasource_indexes_file, - uint8_t *datasource_buffer, - const std::uint64_t number_of_datasource_indexes) -{ - BOOST_ASSERT(datasource_buffer); - datasource_indexes_file.ReadInto(datasource_buffer, number_of_datasource_indexes); -} - -// Loads edge data from .edge files into memory which includes its -// geometry, name ID, turn instruction, lane data ID, travel mode, entry class ID -// Needs to be called after readElementCount() to get the correct offset in the stream -inline void readEdges(io::FileReader &edges_input_file, - GeometryID *geometry_list, - NameID *name_id_list, - extractor::guidance::TurnInstruction *turn_instruction_list, - LaneDataID *lane_data_id_list, - extractor::TravelMode *travel_mode_list, - EntryClassID *entry_class_id_list, - util::guidance::TurnBearing *pre_turn_bearing_list, - util::guidance::TurnBearing *post_turn_bearing_list, - const std::uint64_t number_of_edges) -{ - BOOST_ASSERT(geometry_list); - BOOST_ASSERT(name_id_list); - BOOST_ASSERT(turn_instruction_list); - BOOST_ASSERT(lane_data_id_list); - BOOST_ASSERT(travel_mode_list); - BOOST_ASSERT(entry_class_id_list); - extractor::OriginalEdgeData current_edge_data; - for (std::uint64_t i = 0; i < number_of_edges; ++i) - { - edges_input_file.ReadInto(current_edge_data); - - geometry_list[i] = current_edge_data.via_geometry; - name_id_list[i] = current_edge_data.name_id; - turn_instruction_list[i] = current_edge_data.turn_instruction; - lane_data_id_list[i] = current_edge_data.lane_data_id; - travel_mode_list[i] = current_edge_data.travel_mode; - entry_class_id_list[i] = current_edge_data.entry_classid; - pre_turn_bearing_list[i] = current_edge_data.pre_turn_bearing; - post_turn_bearing_list[i] = current_edge_data.post_turn_bearing; - } -} -// Loads coordinates and OSM node IDs from .nodes files into memory -// Needs to be called after readElementCount() to get the correct offset in the stream -template -void readNodes(io::FileReader &nodes_file, - util::Coordinate *coordinate_list, - OSMNodeIDVectorT &osmnodeid_list, - const std::uint64_t number_of_coordinates) -{ - BOOST_ASSERT(coordinate_list); - extractor::QueryNode current_node; - for (std::uint64_t i = 0; i < number_of_coordinates; ++i) + std::string ReadLine() { - nodes_file.ReadInto(current_node); - coordinate_list[i] = util::Coordinate(current_node.lon, current_node.lat); - osmnodeid_list.push_back(current_node.node_id); - BOOST_ASSERT(coordinate_list[i].IsValid()); + std::string thisline; + try + { + std::getline(input_stream, thisline); + } + catch (const std::ios_base::failure &e) + { + // EOF is OK here, everything else, re-throw + if (!input_stream.eof()) + throw; + } + return thisline; } -} - -// Reads datasource names out of .datasource_names files and metadata such as -// the length and offset of each name -struct DatasourceNamesData -{ - std::vector names; - std::vector offsets; - std::vector lengths; }; -inline DatasourceNamesData readDatasourceNames(io::FileReader &datasource_names_file) -{ - DatasourceNamesData datasource_names_data; - std::vector lines = datasource_names_file.ReadLines(); - for (const auto &name : lines) - { - datasource_names_data.offsets.push_back(datasource_names_data.names.size()); - datasource_names_data.lengths.push_back(name.size()); - std::copy(name.c_str(), - name.c_str() + name.size(), - std::back_inserter(datasource_names_data.names)); - } - return datasource_names_data; -} } } } diff --git a/include/storage/serialization.hpp b/include/storage/serialization.hpp new file mode 100644 index 00000000000..a01d00342cb --- /dev/null +++ b/include/storage/serialization.hpp @@ -0,0 +1,171 @@ +#ifndef OSRM_STORAGE_SERIALIZATION_HPP_ +#define OSRM_STORAGE_SERIALIZATION_HPP_ + +#include "contractor/query_edge.hpp" +#include "extractor/extractor.hpp" +#include "extractor/original_edge_data.hpp" +#include "extractor/query_node.hpp" +#include "storage/io.hpp" +#include "util/exception.hpp" +#include "util/fingerprint.hpp" +#include "util/simple_logger.hpp" +#include "util/static_graph.hpp" + +#include +#include + +#include +#include +#include +#include + +namespace osrm +{ +namespace storage +{ +namespace serialization +{ + +// To make function calls consistent, this function returns the fixed number of properties +inline std::size_t readPropertiesCount() { return 1; } + +struct HSGRHeader +{ + std::uint32_t checksum; + std::uint64_t number_of_nodes; + std::uint64_t number_of_edges; +}; + +// Reads the checksum, number of nodes and number of edges written in the header file of a `.hsgr` +// file and returns them in a HSGRHeader struct +inline HSGRHeader readHSGRHeader(io::FileReader &input_file) +{ + const util::FingerPrint fingerprint_valid = util::FingerPrint::GetValid(); + const auto fingerprint_loaded = input_file.ReadOne(); + if (!fingerprint_loaded.TestGraphUtil(fingerprint_valid)) + { + util::SimpleLogger().Write(logWARNING) << ".hsgr was prepared with different build.\n" + "Reprocess to get rid of this warning."; + } + + HSGRHeader header; + input_file.ReadInto(header.checksum); + input_file.ReadInto(header.number_of_nodes); + input_file.ReadInto(header.number_of_edges); + + // If we have edges, then we must have nodes. + // However, there can be nodes with no edges (some test cases create this) + BOOST_ASSERT_MSG(header.number_of_edges == 0 || header.number_of_nodes > 0, + "edges exist, but there are no nodes"); + + return header; +} + +// Reads the graph data of a `.hsgr` file into memory +// Needs to be called after readHSGRHeader() to get the correct offset in the stream +using NodeT = typename util::StaticGraph::NodeArrayEntry; +using EdgeT = typename util::StaticGraph::EdgeArrayEntry; +inline void readHSGR(io::FileReader &input_file, + NodeT *node_buffer, + const std::uint64_t number_of_nodes, + EdgeT *edge_buffer, + const std::uint64_t number_of_edges) +{ + BOOST_ASSERT(node_buffer); + BOOST_ASSERT(edge_buffer); + input_file.ReadInto(node_buffer, number_of_nodes); + input_file.ReadInto(edge_buffer, number_of_edges); +} + +// Loads datasource_indexes from .datasource_indexes into memory +// Needs to be called after readElementCount() to get the correct offset in the stream +inline void readDatasourceIndexes(io::FileReader &datasource_indexes_file, + uint8_t *datasource_buffer, + const std::uint64_t number_of_datasource_indexes) +{ + BOOST_ASSERT(datasource_buffer); + datasource_indexes_file.ReadInto(datasource_buffer, number_of_datasource_indexes); +} + +// Loads edge data from .edge files into memory which includes its +// geometry, name ID, turn instruction, lane data ID, travel mode, entry class ID +// Needs to be called after readElementCount() to get the correct offset in the stream +inline void readEdges(io::FileReader &edges_input_file, + GeometryID *geometry_list, + NameID *name_id_list, + extractor::guidance::TurnInstruction *turn_instruction_list, + LaneDataID *lane_data_id_list, + extractor::TravelMode *travel_mode_list, + EntryClassID *entry_class_id_list, + util::guidance::TurnBearing *pre_turn_bearing_list, + util::guidance::TurnBearing *post_turn_bearing_list, + const std::uint64_t number_of_edges) +{ + BOOST_ASSERT(geometry_list); + BOOST_ASSERT(name_id_list); + BOOST_ASSERT(turn_instruction_list); + BOOST_ASSERT(lane_data_id_list); + BOOST_ASSERT(travel_mode_list); + BOOST_ASSERT(entry_class_id_list); + extractor::OriginalEdgeData current_edge_data; + for (std::uint64_t i = 0; i < number_of_edges; ++i) + { + edges_input_file.ReadInto(current_edge_data); + + geometry_list[i] = current_edge_data.via_geometry; + name_id_list[i] = current_edge_data.name_id; + turn_instruction_list[i] = current_edge_data.turn_instruction; + lane_data_id_list[i] = current_edge_data.lane_data_id; + travel_mode_list[i] = current_edge_data.travel_mode; + entry_class_id_list[i] = current_edge_data.entry_classid; + pre_turn_bearing_list[i] = current_edge_data.pre_turn_bearing; + post_turn_bearing_list[i] = current_edge_data.post_turn_bearing; + } +} + +// Loads coordinates and OSM node IDs from .nodes files into memory +// Needs to be called after readElementCount() to get the correct offset in the stream +template +void readNodes(io::FileReader &nodes_file, + util::Coordinate *coordinate_list, + OSMNodeIDVectorT &osmnodeid_list, + const std::uint64_t number_of_coordinates) +{ + BOOST_ASSERT(coordinate_list); + extractor::QueryNode current_node; + for (std::uint64_t i = 0; i < number_of_coordinates; ++i) + { + nodes_file.ReadInto(current_node); + coordinate_list[i] = util::Coordinate(current_node.lon, current_node.lat); + osmnodeid_list.push_back(current_node.node_id); + BOOST_ASSERT(coordinate_list[i].IsValid()); + } +} + +// Reads datasource names out of .datasource_names files and metadata such as +// the length and offset of each name +struct DatasourceNamesData +{ + std::vector names; + std::vector offsets; + std::vector lengths; +}; +inline DatasourceNamesData readDatasourceNames(io::FileReader &datasource_names_file) +{ + DatasourceNamesData datasource_names_data; + std::vector lines = datasource_names_file.ReadLines(); + for (const auto &name : lines) + { + datasource_names_data.offsets.push_back(datasource_names_data.names.size()); + datasource_names_data.lengths.push_back(name.size()); + std::copy(name.c_str(), + name.c_str() + name.size(), + std::back_inserter(datasource_names_data.names)); + } + return datasource_names_data; +} +} +} +} + +#endif diff --git a/include/util/name_table.hpp b/include/util/name_table.hpp index a86b1f321d4..e07e17fb22e 100644 --- a/include/util/name_table.hpp +++ b/include/util/name_table.hpp @@ -18,7 +18,7 @@ class NameTable { private: // FIXME should this use shared memory - RangeTable<16, false> m_name_table; + util::RangeTable<16, false> m_name_table; ShM::vector m_names_char_list; public: diff --git a/include/util/range_table.hpp b/include/util/range_table.hpp index c1a12c2ab0d..0d95bbbc174 100644 --- a/include/util/range_table.hpp +++ b/include/util/range_table.hpp @@ -1,6 +1,7 @@ #ifndef RANGE_TABLE_HPP #define RANGE_TABLE_HPP +#include "storage/io.hpp" #include "util/integer_range.hpp" #include "util/shared_memory_vector_wrapper.hpp" @@ -138,6 +139,21 @@ template class RangeTable sum_lengths = lengths_prefix_sum; } + void ReadARangeTable(osrm::storage::io::FileReader &filereader) + { + unsigned number_of_blocks = filereader.ReadElementCount32(); + // read total length + filereader.ReadInto(&sum_lengths, 1); + + block_offsets.resize(number_of_blocks); + diff_blocks.resize(number_of_blocks); + + // read block offsets + filereader.ReadInto(block_offsets.data(), number_of_blocks); + // read blocks + filereader.ReadInto(diff_blocks.data(), number_of_blocks); + } + inline RangeT GetRange(const unsigned id) const { BOOST_ASSERT(id < block_offsets.size() + diff_blocks.size() * BLOCK_SIZE); diff --git a/src/benchmarks/static_rtree.cpp b/src/benchmarks/static_rtree.cpp index 56d0a97ca5a..ea0a489652a 100644 --- a/src/benchmarks/static_rtree.cpp +++ b/src/benchmarks/static_rtree.cpp @@ -2,6 +2,7 @@ #include "extractor/edge_based_node.hpp" #include "extractor/query_node.hpp" #include "mocks/mock_datafacade.hpp" +#include "storage/io.hpp" #include "engine/geospatial_query.hpp" #include "util/coordinate.hpp" #include "util/timing_util.hpp" @@ -31,15 +32,15 @@ using BenchStaticRTree = std::vector loadCoordinates(const boost::filesystem::path &nodes_file) { - boost::filesystem::ifstream nodes_input_stream(nodes_file, std::ios::binary); + osrm::storage::io::FileReader nodes_path_file_reader( + nodes_file, osrm::storage::io::FileReader::HasNoFingerprint); extractor::QueryNode current_node; - std::uint64_t coordinate_count = 0; - nodes_input_stream.read((char *)&coordinate_count, sizeof(std::uint64_t)); + unsigned coordinate_count = nodes_path_file_reader.ReadElementCount32(); std::vector coords(coordinate_count); for (unsigned i = 0; i < coordinate_count; ++i) { - nodes_input_stream.read((char *)¤t_node, sizeof(extractor::QueryNode)); + nodes_path_file_reader.ReadInto(¤t_node, 1); coords[i] = util::Coordinate(current_node.lon, current_node.lat); } return coords; diff --git a/src/contractor/contractor.cpp b/src/contractor/contractor.cpp index fc768e81ad8..5ba2e0cbbb6 100644 --- a/src/contractor/contractor.cpp +++ b/src/contractor/contractor.cpp @@ -6,6 +6,7 @@ #include "extractor/edge_based_graph_factory.hpp" #include "extractor/node_based_edge.hpp" +#include "storage/io.hpp" #include "storage/io.hpp" #include "util/exception.hpp" #include "util/graph_loader.hpp" @@ -299,9 +300,8 @@ parse_segment_lookup_from_csv_files(const std::vector &segment_spee const auto file_id = idx + 1; // starts at one, zero means we assigned the weight const auto filename = segment_speed_filenames[idx]; - std::ifstream segment_speed_file{filename, std::ios::binary}; - if (!segment_speed_file) - throw util::exception{"Unable to open segment speed file " + filename}; + storage::io::FileReader segment_speed_file_reader( + filename, storage::io::FileReader::HasNoFingerprint); SegmentSpeedSourceFlatMap local; @@ -309,30 +309,33 @@ parse_segment_lookup_from_csv_files(const std::vector &segment_spee std::uint64_t to_node_id{}; unsigned speed{}; - for (std::string line; std::getline(segment_speed_file, line);) - { - using namespace boost::spirit::qi; + std::for_each( + segment_speed_file_reader.GetLineIteratorBegin(), + segment_speed_file_reader.GetLineIteratorEnd(), + [&](const std::string &line) { - auto it = begin(line); - const auto last = end(line); + using namespace boost::spirit::qi; - // The ulong_long -> uint64_t will likely break on 32bit platforms - const auto ok = - parse(it, - last, // - (ulong_long >> ',' >> ulong_long >> ',' >> uint_ >> *(',' >> *char_)), // - from_node_id, - to_node_id, - speed); // + auto it = begin(line); + const auto last = end(line); - if (!ok || it != last) - throw util::exception{"Segment speed file " + filename + " malformed"}; + // The ulong_long -> uint64_t will likely break on 32bit platforms + const auto ok = + parse(it, + last, // + (ulong_long >> ',' >> ulong_long >> ',' >> uint_ >> *(',' >> *char_)), // + from_node_id, + to_node_id, + speed); // - SegmentSpeedSource val{{OSMNodeID{from_node_id}, OSMNodeID{to_node_id}}, - {speed, static_cast(file_id)}}; + if (!ok || it != last) + throw util::exception{"Segment speed file " + filename + " malformed"}; - local.push_back(std::move(val)); - } + SegmentSpeedSource val{{OSMNodeID{from_node_id}, OSMNodeID{to_node_id}}, + {speed, static_cast(file_id)}}; + + local.push_back(std::move(val)); + }); util::SimpleLogger().Write() << "Loaded speed file " << filename << " with " << local.size() << " speeds"; @@ -387,10 +390,8 @@ parse_turn_penalty_lookup_from_csv_files(const std::vector &turn_pe const auto file_id = idx + 1; // starts at one, zero means we assigned the weight const auto filename = turn_penalty_filenames[idx]; - std::ifstream turn_penalty_file{filename, std::ios::binary}; - if (!turn_penalty_file) - throw util::exception{"Unable to open turn penalty file " + filename}; - + storage::io::FileReader turn_penalty_file_reader(filename, + storage::io::FileReader::HasNoFingerprint); TurnPenaltySourceFlatMap local; std::uint64_t from_node_id{}; @@ -398,31 +399,34 @@ parse_turn_penalty_lookup_from_csv_files(const std::vector &turn_pe std::uint64_t to_node_id{}; double penalty{}; - for (std::string line; std::getline(turn_penalty_file, line);) - { - using namespace boost::spirit::qi; - - auto it = begin(line); - const auto last = end(line); - - // The ulong_long -> uint64_t will likely break on 32bit platforms - const auto ok = parse(it, - last, // - (ulong_long >> ',' >> ulong_long >> ',' >> ulong_long >> ',' >> - double_ >> *(',' >> *char_)), // - from_node_id, - via_node_id, - to_node_id, - penalty); // - - if (!ok || it != last) - throw util::exception{"Turn penalty file " + filename + " malformed"}; - - TurnPenaltySource val{ - {OSMNodeID{from_node_id}, OSMNodeID{via_node_id}, OSMNodeID{to_node_id}}, - {penalty, static_cast(file_id)}}; - local.push_back(std::move(val)); - } + std::for_each( + turn_penalty_file_reader.GetLineIteratorBegin(), + turn_penalty_file_reader.GetLineIteratorEnd(), + [&](const std::string &line) { + + using namespace boost::spirit::qi; + + auto it = begin(line); + const auto last = end(line); + + // The ulong_long -> uint64_t will likely break on 32bit platforms + const auto ok = parse(it, + last, // + (ulong_long >> ',' >> ulong_long >> ',' >> ulong_long >> + ',' >> double_ >> *(',' >> *char_)), // + from_node_id, + via_node_id, + to_node_id, + penalty); // + + if (!ok || it != last) + throw util::exception{"Turn penalty file " + filename + " malformed"}; + + TurnPenaltySource val{ + {OSMNodeID{from_node_id}, OSMNodeID{via_node_id}, OSMNodeID{to_node_id}}, + {penalty, static_cast(file_id)}}; + local.push_back(std::move(val)); + }); util::SimpleLogger().Write() << "Loaded penalty file " << filename << " with " << local.size() << " turn penalties"; @@ -568,44 +572,24 @@ EdgeID Contractor::LoadEdgeExpandedGraph( if (!(update_edge_weights || update_turn_penalties)) return; - boost::filesystem::ifstream nodes_input_stream(nodes_filename, std::ios::binary); + storage::io::FileReader nodes_file(nodes_filename, + storage::io::FileReader::HasNoFingerprint); - if (!nodes_input_stream) - { - throw util::exception("Failed to open " + nodes_filename); - } - - std::uint64_t number_of_nodes = 0; - nodes_input_stream.read((char *)&number_of_nodes, sizeof(std::uint64_t)); - internal_to_external_node_map.resize(number_of_nodes); + nodes_file.DeserializeVector(internal_to_external_node_map); - // Load all the query nodes into a vector - nodes_input_stream.read(reinterpret_cast(&(internal_to_external_node_map[0])), - number_of_nodes * sizeof(extractor::QueryNode)); }; const auto maybe_load_geometries = [&] { if (!(update_edge_weights || update_turn_penalties)) return; - std::ifstream geometry_stream(geometry_filename, std::ios::binary); - if (!geometry_stream) - { - throw util::exception("Failed to open " + geometry_filename); - } - unsigned number_of_indices = 0; - unsigned number_of_compressed_geometries = 0; - - geometry_stream.read((char *)&number_of_indices, sizeof(unsigned)); - + storage::io::FileReader geometry_file(geometry_filename, + storage::io::FileReader::HasNoFingerprint); + const auto number_of_indices = geometry_file.ReadElementCount32(); m_geometry_indices.resize(number_of_indices); - if (number_of_indices > 0) - { - geometry_stream.read((char *)&(m_geometry_indices[0]), - number_of_indices * sizeof(unsigned)); - } + geometry_file.ReadInto(m_geometry_indices.data(), number_of_indices); - geometry_stream.read((char *)&number_of_compressed_geometries, sizeof(unsigned)); + const auto number_of_compressed_geometries = geometry_file.ReadElementCount32(); BOOST_ASSERT(m_geometry_indices.back() == number_of_compressed_geometries); m_geometry_node_list.resize(number_of_compressed_geometries); @@ -614,14 +598,11 @@ EdgeID Contractor::LoadEdgeExpandedGraph( if (number_of_compressed_geometries > 0) { - geometry_stream.read((char *)&(m_geometry_node_list[0]), - number_of_compressed_geometries * sizeof(NodeID)); - - geometry_stream.read((char *)&(m_geometry_fwd_weight_list[0]), - number_of_compressed_geometries * sizeof(EdgeWeight)); - - geometry_stream.read((char *)&(m_geometry_rev_weight_list[0]), - number_of_compressed_geometries * sizeof(EdgeWeight)); + geometry_file.ReadInto(m_geometry_node_list.data(), number_of_compressed_geometries); + geometry_file.ReadInto(m_geometry_fwd_weight_list.data(), + number_of_compressed_geometries); + geometry_file.ReadInto(m_geometry_rev_weight_list.data(), + number_of_compressed_geometries); } }; @@ -940,12 +921,12 @@ EdgeID Contractor::LoadEdgeExpandedGraph( void Contractor::ReadNodeLevels(std::vector &node_levels) const { - boost::filesystem::ifstream order_input_stream(config.level_output_path, std::ios::binary); + storage::io::FileReader order_file(config.level_output_path, + storage::io::FileReader::HasNoFingerprint); - unsigned level_size; - order_input_stream.read((char *)&level_size, sizeof(unsigned)); + const auto level_size = order_file.ReadElementCount32(); node_levels.resize(level_size); - order_input_stream.read((char *)node_levels.data(), sizeof(float) * node_levels.size()); + order_file.ReadInto(node_levels); } void Contractor::WriteNodeLevels(std::vector &&in_node_levels) const diff --git a/src/extractor/extractor.cpp b/src/extractor/extractor.cpp index 41bd769caee..ac884a801ae 100644 --- a/src/extractor/extractor.cpp +++ b/src/extractor/extractor.cpp @@ -10,6 +10,7 @@ #include "extractor/raster_source.hpp" #include "storage/io.hpp" +#include "storage/io.hpp" #include "util/graph_loader.hpp" #include "util/io.hpp" #include "util/name_table.hpp" diff --git a/src/storage/storage.cpp b/src/storage/storage.cpp index e6b33123c09..e270aaff52f 100644 --- a/src/storage/storage.cpp +++ b/src/storage/storage.cpp @@ -7,6 +7,7 @@ #include "extractor/query_node.hpp" #include "extractor/travel_mode.hpp" #include "storage/io.hpp" +#include "storage/serialization.hpp" #include "storage/shared_barriers.hpp" #include "storage/shared_datatype.hpp" #include "storage/shared_memory.hpp" @@ -308,7 +309,7 @@ void Storage::PopulateLayout(DataLayout &layout) { io::FileReader hsgr_file(config.hsgr_data_path, io::FileReader::HasNoFingerprint); - const auto hsgr_header = io::readHSGRHeader(hsgr_file); + const auto hsgr_header = serialization::readHSGRHeader(hsgr_file); layout.SetBlockSize(DataLayout::HSGR_CHECKSUM, 1); layout.SetBlockSize(DataLayout::GRAPH_NODE_LIST, hsgr_header.number_of_nodes); @@ -326,7 +327,7 @@ void Storage::PopulateLayout(DataLayout &layout) { // allocate space in shared memory for profile properties - const auto properties_size = io::readPropertiesCount(); + const auto properties_size = serialization::readPropertiesCount(); layout.SetBlockSize(DataLayout::PROPERTIES, properties_size); } @@ -390,8 +391,8 @@ void Storage::PopulateLayout(DataLayout &layout) io::FileReader datasource_names_file(config.datasource_names_path, io::FileReader::HasNoFingerprint); - const io::DatasourceNamesData datasource_names_data = - io::readDatasourceNames(datasource_names_file); + const serialization::DatasourceNamesData datasource_names_data = + serialization::readDatasourceNames(datasource_names_file); layout.SetBlockSize(DataLayout::DATASOURCE_NAME_DATA, datasource_names_data.names.size()); @@ -454,7 +455,7 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) // Load the HSGR file { io::FileReader hsgr_file(config.hsgr_data_path, io::FileReader::HasNoFingerprint); - auto hsgr_header = io::readHSGRHeader(hsgr_file); + auto hsgr_header = serialization::readHSGRHeader(hsgr_file); unsigned *checksum_ptr = layout.GetBlockPtr(memory_ptr, DataLayout::HSGR_CHECKSUM); *checksum_ptr = hsgr_header.checksum; @@ -469,11 +470,11 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) layout.GetBlockPtr(memory_ptr, DataLayout::GRAPH_EDGE_LIST); - io::readHSGR(hsgr_file, - graph_node_list_ptr, - hsgr_header.number_of_nodes, - graph_edge_list_ptr, - hsgr_header.number_of_edges); + serialization::readHSGR(hsgr_file, + graph_node_list_ptr, + hsgr_header.number_of_nodes, + graph_edge_list_ptr, + hsgr_header.number_of_edges); } // store the filename of the on-disk portion of the RTree @@ -606,16 +607,16 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) const auto entry_class_id_ptr = layout.GetBlockPtr(memory_ptr, DataLayout::ENTRY_CLASSID); - io::readEdges(edges_input_file, - via_geometry_ptr, - name_id_ptr, - turn_instructions_ptr, - lane_data_id_ptr, - travel_mode_ptr, - entry_class_id_ptr, - pre_turn_bearing_ptr, - post_turn_bearing_ptr, - number_of_original_edges); + serialization::readEdges(edges_input_file, + via_geometry_ptr, + name_id_ptr, + turn_instructions_ptr, + lane_data_id_ptr, + travel_mode_ptr, + entry_class_id_ptr, + pre_turn_bearing_ptr, + post_turn_bearing_ptr, + number_of_original_edges); } // load compressed geometry @@ -659,7 +660,7 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) layout.GetBlockPtr(memory_ptr, DataLayout::DATASOURCES_LIST); if (number_of_compressed_datasources > 0) { - io::readDatasourceIndexes( + serialization::readDatasourceIndexes( geometry_datasource_file, datasources_list_ptr, number_of_compressed_datasources); } } @@ -669,7 +670,8 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) io::FileReader datasource_names_file(config.datasource_names_path, io::FileReader::HasNoFingerprint); - const auto datasource_names_data = io::readDatasourceNames(datasource_names_file); + const auto datasource_names_data = + serialization::readDatasourceNames(datasource_names_file); // load datasource name information (if it exists) const auto datasource_name_data_ptr = @@ -724,10 +726,10 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) osmnodeid_list.reset(osmnodeid_ptr, layout.num_entries[DataLayout::OSM_NODE_ID_LIST]); - io::readNodes(nodes_file, - coordinates_ptr, - osmnodeid_list, - layout.num_entries[DataLayout::COORDINATE_LIST]); + serialization::readNodes(nodes_file, + coordinates_ptr, + osmnodeid_list, + layout.num_entries[DataLayout::COORDINATE_LIST]); } // store timestamp diff --git a/src/util/name_table.cpp b/src/util/name_table.cpp index 1f034f140dd..42437cc644e 100644 --- a/src/util/name_table.cpp +++ b/src/util/name_table.cpp @@ -15,27 +15,20 @@ namespace util NameTable::NameTable(const std::string &filename) { - boost::filesystem::ifstream name_stream(filename, std::ios::binary); + storage::io::FileReader name_stream_file_reader(filename, + storage::io::FileReader::HasNoFingerprint); - if (!name_stream) - throw exception("Failed to open " + filename + " for reading."); + // name_stream >> m_name_table; - name_stream >> m_name_table; + m_name_table.ReadARangeTable(name_stream_file_reader); - if (!name_stream) - throw exception("Unable to deserialize RangeTable for NameTable"); - - unsigned number_of_chars = 0; - name_stream.read(reinterpret_cast(&number_of_chars), sizeof(number_of_chars)); - if (!name_stream) - throw exception("Encountered invalid file, failed to read number of contained chars"); + unsigned number_of_chars = name_stream_file_reader.ReadElementCount32(); m_names_char_list.resize(number_of_chars + 1); //+1 gives sentinel element m_names_char_list.back() = 0; if (number_of_chars > 0) { - name_stream.read(reinterpret_cast(&m_names_char_list[0]), - number_of_chars * sizeof(m_names_char_list[0])); + name_stream_file_reader.ReadInto(&m_names_char_list[0], number_of_chars); } else { @@ -43,9 +36,6 @@ NameTable::NameTable(const std::string &filename) << "list of street names is empty in construction of name table from: \"" << filename << "\""; } - if (!name_stream) - throw exception("Failed to read " + std::to_string(number_of_chars) + " characters from " + - filename); } std::string NameTable::GetNameForID(const unsigned name_id) const diff --git a/unit_tests/util/io.cpp b/unit_tests/util/io.cpp index 416bb1e8342..6073970bdf4 100644 --- a/unit_tests/util/io.cpp +++ b/unit_tests/util/io.cpp @@ -1,22 +1,27 @@ #include "util/io.hpp" #include "storage/io.hpp" +#include "util/exception.hpp" #include "util/typedefs.hpp" #include #include +#include +#include #include const static std::string IO_TMP_FILE = "test_io.tmp"; +const static std::string IO_NONEXISTENT_FILE = "non_existent_test_io.tmp"; +const static std::string IO_TOO_SMALL_FILE = "file_too_small_test_io.tmp"; +const static std::string IO_CORRUPT_FINGERPRINT_FILE = "corrupt_fingerprint_file_test_io.tmp"; +const static std::string IO_TEXT_FILE = "plain_text_file.tmp"; BOOST_AUTO_TEST_SUITE(osrm_io) BOOST_AUTO_TEST_CASE(io_data) { - std::vector data_in, data_out; - data_in.resize(53); - for (std::size_t i = 0; i < data_in.size(); ++i) - data_in[i] = i; + std::vector data_in(53), data_out; + std::iota(begin(data_in), end(data_in), 0); osrm::util::serializeVector(IO_TMP_FILE, data_in); @@ -28,4 +33,100 @@ BOOST_AUTO_TEST_CASE(io_data) BOOST_CHECK_EQUAL_COLLECTIONS(data_out.begin(), data_out.end(), data_in.begin(), data_in.end()); } +BOOST_AUTO_TEST_CASE(io_nonexistent_file) +{ + try + { + osrm::storage::io::FileReader infile(IO_NONEXISTENT_FILE, + osrm::storage::io::FileReader::VerifyFingerprint); + BOOST_REQUIRE_MESSAGE(false, "Should not get here"); + } + catch (const osrm::util::exception &e) + { + std::cout << e.what() << std::endl; + BOOST_REQUIRE(std::string(e.what()) == + "Error opening non_existent_test_io.tmp:No such file or directory"); + } +} + +BOOST_AUTO_TEST_CASE(file_too_small) +{ + { + std::vector v(53); + std::iota(begin(v), end(v), 0); + + osrm::util::serializeVector(IO_TOO_SMALL_FILE, v); + + std::ofstream f(IO_TOO_SMALL_FILE); + f.seekp(0, std::ios_base::beg); + std::uint64_t garbage = 0xDEADBEEFCAFEFACE; + f.write(reinterpret_cast(&garbage), sizeof(garbage)); + } + + try + { + osrm::storage::io::FileReader infile(IO_TOO_SMALL_FILE, + osrm::storage::io::FileReader::VerifyFingerprint); + BOOST_REQUIRE_MESSAGE(false, "Should not get here"); + } + catch (const osrm::util::exception &e) + { + std::cout << e.what() << std::endl; + BOOST_REQUIRE(std::string(e.what()) == + "Error reading from file_too_small_test_io.tmp: Unexpected end of file"); + } +} + +BOOST_AUTO_TEST_CASE(io_corrupt_fingerprint) +{ + { + std::vector v(153); + std::iota(begin(v), end(v), 0); + osrm::util::serializeVector(IO_CORRUPT_FINGERPRINT_FILE, v); + + std::fstream f(IO_CORRUPT_FINGERPRINT_FILE); + f.seekp(0, std::ios_base::beg); + std::uint64_t garbage = 0xDEADBEEFCAFEFACE; + f.write(reinterpret_cast(&garbage), sizeof(garbage)); + } + + try + { + osrm::storage::io::FileReader infile(IO_CORRUPT_FINGERPRINT_FILE, + osrm::storage::io::FileReader::VerifyFingerprint); + BOOST_REQUIRE_MESSAGE(false, "Should not get here"); + } + catch (const osrm::util::exception &e) + { + std::cout << e.what() << std::endl; + BOOST_REQUIRE(std::string(e.what()) == + "Fingerprint mismatch in corrupt_fingerprint_file_test_io.tmp"); + } +} + +BOOST_AUTO_TEST_CASE(io_read_lines) +{ + { + std::ofstream f(IO_TEXT_FILE); + char str[] = "A\nB\nC\nD"; + f.write(str, strlen(str)); + } + { + osrm::storage::io::FileReader infile(IO_TEXT_FILE, + osrm::storage::io::FileReader::HasNoFingerprint); + auto startiter = infile.GetLineIteratorBegin(); + auto enditer = infile.GetLineIteratorEnd(); + std::vector resultlines; + while (startiter != enditer) { + resultlines.push_back(*startiter); + ++startiter; + } + BOOST_REQUIRE_MESSAGE(resultlines.size() == 4, "Expected 4 lines of text"); + BOOST_REQUIRE_MESSAGE(resultlines[0] == "A", "Expected the first line to be A"); + BOOST_REQUIRE_MESSAGE(resultlines[1] == "B", "Expected the first line to be B"); + BOOST_REQUIRE_MESSAGE(resultlines[2] == "C", "Expected the first line to be C"); + BOOST_REQUIRE_MESSAGE(resultlines[3] == "D", "Expected the first line to be D"); + } +} + BOOST_AUTO_TEST_SUITE_END()