Skip to content

Commit

Permalink
Address reviewer comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
emilyfertig committed Oct 2, 2024
1 parent 8649774 commit ccb2b73
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 17 deletions.
25 changes: 12 additions & 13 deletions cxx/gendb.hh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

class GenDB {
public:

GenDB(std::mt19937* prng, const PCleanSchema& schema,
bool _only_final_emissions = false, bool _record_class_is_clean = true);

Expand Down Expand Up @@ -43,21 +42,21 @@ class GenDB {
void sample_and_incorporate_reference(

std::mt19937* prng, const std::string& class_name,
const std::pair<std::string, int>& ref_key,
const std::string& ref_class, bool new_rows_have_unique_entities);
const std::pair<std::string, int>& ref_key, const std::string& ref_class,
bool new_rows_have_unique_entities);

// Samples a set of entities in the domains of the relation corresponding to
// class_path.
T_items sample_entities_relation(
std::mt19937* prng, const std::string& class_name,
std::vector<std::string>::const_iterator class_path_start,
std::vector<std::string>::const_iterator class_path_end,
int class_item, bool new_rows_have_unique_entities);
std::vector<std::string>::const_iterator class_path_end, int class_item,
bool new_rows_have_unique_entities);

// Sample items from a class' ancestors (recursive reference fields).
T_items sample_class_ancestors(
std::mt19937* prng, const std::string& class_name, int class_item,
bool new_rows_have_unique_entities);
T_items sample_class_ancestors(std::mt19937* prng,
const std::string& class_name, int class_item,
bool new_rows_have_unique_entities);

// Populates "items" with entities by walking the DAG of reference indices,
// starting with "ind".
Expand Down Expand Up @@ -216,15 +215,16 @@ class GenDB {

// Make the relations associated with QueryField f and put them into
// schema.
void make_relations_for_queryfield(
const QueryField& f, const PCleanClass& record_class, T_schema* schema);
void make_relations_for_queryfield(const QueryField& f,
const PCleanClass& record_class,
T_schema* schema);

// Member variables
const PCleanSchema& schema;

// This data structure contains entity sets and linkages. Semantics are
// map<tuple<class_name, reference_field_name, class_primary_key> ref_val>>,
// where primary_key and ref_val are (integer) entity IDs.
// map<class_name, map<pair<reference_field_name, class_primary_key>
// ref_val>>, where primary_key and ref_val are (integer) entity IDs.
std::map<std::string, std::map<std::pair<std::string, int>, int>>
reference_values;

Expand All @@ -239,7 +239,6 @@ class GenDB {
bool record_class_is_clean;
std::map<std::string, std::vector<std::string>> domains;


// Maps class names to relations corresponding to attributes of the class.
std::map<std::string, std::vector<std::string>> class_to_relations;

Expand Down
18 changes: 18 additions & 0 deletions cxx/gendb_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,25 @@ BOOST_AUTO_TEST_CASE(test_transition_reference_class) {
std::mt19937 prng;
GenDB gendb(&prng, schema);
setup_gendb(&prng, gendb, 20);
auto init_phys_assignments = gendb.domain_crps.at("Physician").assignments;
gendb.transition_reference_class_and_ancestors(&prng, "Record");
auto final_phys_assignments = gendb.domain_crps.at("Physician").assignments;
// Check that at least some tables were updated.
bool is_same = false;
if (init_phys_assignments.size() == final_phys_assignments.size()) {
is_same = true;
for (auto [i, t] : init_phys_assignments) {
if (!final_phys_assignments.contains(i)) {
is_same = false;
break;
}
if (!(final_phys_assignments.at(i) != t)) {
is_same = false;
break;
}
}
}
BOOST_TEST(!is_same);
}

BOOST_AUTO_TEST_SUITE_END()
18 changes: 14 additions & 4 deletions cxx/inference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,19 @@ void inference_hirm(std::mt19937* prng, HIRM* hirm, int iters, int timeout,
}
}

void inference_gendb(std::mt19937* prng, GenDB* gendb, int iters, int timeout,
void inference_gendb(std::mt19937* prng, GenDB* gendb, int iters,
int hirm_iters_per_entity_iter, int timeout,
bool verbose) {
inference_hirm(prng, gendb->hirm, iters, timeout, verbose);
gendb->transition_reference_class_and_ancestors(
prng, gendb->schema.query.record_class);
for (int i = 0; i < iters; ++i) {
// TRANSITION HIRM
printf("Starting iteration %d, model score = %f\n", i + 1,
gendb->logp_score());
inference_hirm(prng, gendb->hirm, hirm_iters_per_entity_iter, timeout,
verbose);

// TRANSITION ENTITIES
gendb->transition_reference_class_and_ancestors(
prng, gendb->schema.query.record_class);
CHECK_TIMEOUT(timeout, t_begin);
}
}

0 comments on commit ccb2b73

Please sign in to comment.