Skip to content

Commit f35a2a1

Browse files
jeffhostetlerdscho
authored andcommitted
survey: add pathname of blob or tree to large_item_vec
Include the pathname of each blob or tree in the large_item_vec to help identify the file or directory associated with the OID and size information. This pathname is computed during the path walk, so it reflects the first observed pathname seen for that OID during the traversal over all of the refs. Since the file or directory could have moved (without being modified), there may be multiple "correct" pathnames for a particular OID. Since we do not control the ref traversal order, we should consider it to be a "suggested pathname" for the OID. Signed-off-by: Jeff Hostetler <jeffhostetler@github.com>
1 parent 73e6cd5 commit f35a2a1

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

Diff for: builtin/survey.c

+32-11
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
156156
struct large_item {
157157
uint64_t size;
158158
struct object_id oid;
159+
struct strbuf name;
159160
};
160161

161162
struct large_item_vec {
@@ -171,6 +172,7 @@ static struct large_item_vec *alloc_large_item_vec(const char *dimension_label,
171172
{
172173
struct large_item_vec *vec;
173174
size_t flex_len = nr_items * sizeof(struct large_item);
175+
size_t k;
174176

175177
if (!nr_items)
176178
return NULL;
@@ -180,6 +182,9 @@ static struct large_item_vec *alloc_large_item_vec(const char *dimension_label,
180182
vec->item_label = strdup(item_label);
181183
vec->nr_items = nr_items;
182184

185+
for (k = 0; k < nr_items; k++)
186+
strbuf_init(&vec->items[k].name, 0);
187+
183188
return vec;
184189
}
185190

@@ -188,14 +193,18 @@ static void free_large_item_vec(struct large_item_vec *vec)
188193
if (!vec)
189194
return;
190195

196+
for (size_t k = 0; k < vec->nr_items; k++)
197+
strbuf_release(&vec->items[k].name);
198+
191199
free(vec->dimension_label);
192200
free(vec->item_label);
193201
free(vec);
194202
}
195203

196204
static void maybe_insert_large_item(struct large_item_vec *vec,
197205
uint64_t size,
198-
struct object_id *oid)
206+
struct object_id *oid,
207+
const char *name)
199208
{
200209
size_t rest_len;
201210
size_t k;
@@ -215,7 +224,14 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
215224
if (size < vec->items[k].size)
216225
continue;
217226

218-
/* push items[k..] down one and insert it here */
227+
/*
228+
* The last large_item in the vector is about to be
229+
* overwritten by the previous one during the shift.
230+
* Steal its allocated strbuf and reuse it.
231+
*/
232+
strbuf_release(&vec->items[vec->nr_items - 1].name);
233+
234+
/* push items[k..] down one and insert data for this item here */
219235

220236
rest_len = (vec->nr_items - k - 1) * sizeof(struct large_item);
221237
if (rest_len)
@@ -224,6 +240,10 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
224240
memset(&vec->items[k], 0, sizeof(struct large_item));
225241
vec->items[k].size = size;
226242
oidcpy(&vec->items[k].oid, oid);
243+
strbuf_init(&vec->items[k].name, 0);
244+
if (name && *name)
245+
strbuf_addstr(&vec->items[k].name, name);
246+
227247
return;
228248
}
229249
}
@@ -728,15 +748,15 @@ static void survey_report_largest_vec(struct large_item_vec *vec)
728748
return;
729749

730750
table.table_name = vec->dimension_label;
731-
strvec_pushl(&table.header, "Size", "OID", NULL);
751+
strvec_pushl(&table.header, "Size", "OID", "Name", NULL);
732752

733753
for (size_t k = 0; k < vec->nr_items; k++) {
734754
struct large_item *pk = &vec->items[k];
735755
if (!is_null_oid(&pk->oid)) {
736756
strbuf_reset(&size);
737757
strbuf_addf(&size, "%"PRIuMAX, (uintmax_t)pk->size);
738758

739-
insert_table_rowv(&table, size.buf, oid_to_hex(&pk->oid), NULL);
759+
insert_table_rowv(&table, size.buf, oid_to_hex(&pk->oid), pk->name.buf, NULL);
740760
}
741761
}
742762
strbuf_release(&size);
@@ -1197,7 +1217,8 @@ static void increment_object_counts(
11971217

11981218
static void increment_totals(struct survey_context *ctx,
11991219
struct oid_array *oids,
1200-
struct survey_report_object_size_summary *summary)
1220+
struct survey_report_object_size_summary *summary,
1221+
const char *path)
12011222
{
12021223
for (size_t i = 0; i < oids->nr; i++) {
12031224
struct object_info oi = OBJECT_INFO_INIT;
@@ -1233,8 +1254,8 @@ static void increment_totals(struct survey_context *ctx,
12331254
ctx->report.reachable_objects.commits.parent_cnt_pbin[k]++;
12341255
base = &ctx->report.reachable_objects.commits.base;
12351256

1236-
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents, k, &commit->object.oid);
1237-
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes, object_length, &commit->object.oid);
1257+
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents, k, &commit->object.oid, NULL);
1258+
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes, object_length, &commit->object.oid, NULL);
12381259
break;
12391260
}
12401261
case OBJ_TREE: {
@@ -1254,8 +1275,8 @@ static void increment_totals(struct survey_context *ctx,
12541275

12551276
pst->sum_entries += nr_entries;
12561277

1257-
maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &tree->object.oid);
1258-
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &tree->object.oid);
1278+
maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &tree->object.oid, path);
1279+
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &tree->object.oid, path);
12591280

12601281
qb = qbin(nr_entries);
12611282
incr_obj_hist_bin(&pst->entry_qbin[qb], object_length, disk_sizep);
@@ -1266,7 +1287,7 @@ static void increment_totals(struct survey_context *ctx,
12661287
case OBJ_BLOB:
12671288
base = &ctx->report.reachable_objects.blobs.base;
12681289

1269-
maybe_insert_large_item(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes, object_length, &oids->oid[i]);
1290+
maybe_insert_large_item(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes, object_length, &oids->oid[i], path);
12701291
break;
12711292
default:
12721293
continue;
@@ -1306,7 +1327,7 @@ static void increment_object_totals(struct survey_context *ctx,
13061327
struct survey_report_object_size_summary *total;
13071328
struct survey_report_object_size_summary summary = { 0 };
13081329

1309-
increment_totals(ctx, oids, &summary);
1330+
increment_totals(ctx, oids, &summary, path);
13101331

13111332
switch (type) {
13121333
case OBJ_COMMIT:

0 commit comments

Comments
 (0)