Skip to content

Commit

Permalink
Merge pull request #2 from dataesr/patentAggregation
Browse files Browse the repository at this point in the history
fix: change map, add export, filters
  • Loading branch information
Mihoub2 authored Mar 11, 2024
2 parents a313fd4 + 4ad24b1 commit 95c2cf6
Show file tree
Hide file tree
Showing 25 changed files with 923 additions and 728 deletions.
219 changes: 76 additions & 143 deletions client/src/api/patents/aggregate/index.ts
Original file line number Diff line number Diff line change
@@ -1,145 +1,78 @@
// import { patentsIndex, postHeaders } from "../../../config/api";
// import { AggregationArgs } from "../../../types/commons";
// import { PatentAggregations } from "../../../types/patent";
// import { DEFAULT_FILTERS, FIELDS } from "../_utils/constants";
import { postHeaders, patentsIndex } from "../../../config/api";
import { AggregationArgs } from "../../../types/commons";
import { PatentAggregations } from "../../../types/patent";
import { fillWithMissingYears } from "../../utils/years";
import { FIELDS } from "../_utils/constants";

// export async function aggregatePatents({
// query,
// filters = [],
// }: AggregationArgs): Promise<PatentAggregations> {
// const body: any = {
// size: 0,
// query: {
// bool: {
// must: [
// {
// query_string: {
// query: query || "*",
// fields: FIELDS,
// },
// },
// ],
// },
// },
// aggs: {
// byNature: {
// terms: {
// field: "nature.keyword",
// size: 50,
// },
// },
// byLevel: {
// terms: {
// field: "level.keyword",
// size: 50,
// },
// },
// byKind: {
// terms: {
// field: "kind.keyword",
// },
// },
// byLocalization: {
// terms: {
// field: "address.urbanUnitLabel.keyword",
// size: 10,
// },
// },
// byGPS: {
// terms: {
// field: "address.gps",
// size: 5000,
// },
// },
// byFundings: {
// terms: {
// field: "projects.type.keyword",
// size: 100,
// },
// },
// byTags: {
// terms: {
// field: "badges.label.fr.keyword",
// size: 100,
// },
// },
// },
// };
// if (filters.length > 0) {
// body.query.bool.filter = [...filters, ...DEFAULT_FILTERS];
// } else {
// body.query.bool.filter = DEFAULT_FILTERS;
// }
// const res = await fetch(`${patentsIndex}/_search`, {
// method: "POST",
// body: JSON.stringify(body),
// headers: postHeaders,
// });
// const result = await res.json();
// const { aggregations: data } = result;
export async function aggregatePatents({
query,
filters = [],
}: AggregationArgs): Promise<PatentAggregations> {
const body: any = {
size: 0,
query: {
bool: {
must: [
{
query_string: {
query: query || "*",
fields: FIELDS,
},
},
],
},
},
aggs: {
byType: {
terms: {
field: "type.keyword",
size: 500,
},
},
byYear: {
terms: {
field: "year",
size: 25,
},
},
},
};
if (filters.length > 0) {
body.query.bool.filter = filters;
}
console.log(body);
const res = await fetch(`${patentsIndex}/_search`, {
method: "POST",
body: JSON.stringify(body),
headers: postHeaders,
});
const result = await res.json();
const { aggregations: data } = result;
const _100Year =
data?.byYear?.buckets &&
Math.max(...data.byYear.buckets.map((el) => el.doc_count));
const byYear =
data?.byYear?.buckets
?.map((element) => {
return {
value: element.key,
label: element.key,
count: element.doc_count,
normalizedCount: (element.doc_count * 100) / _100Year,
};
})
.sort((a, b) => a.label - b.label)
.reduce(fillWithMissingYears, []) || [];

// const byKind =
// data?.byKind?.buckets?.map((element) => {
// return {
// value: element.key,
// label: element.key,
// count: element.doc_count,
// };
// }) || [];
// const byNature =
// data?.byNature?.buckets
// ?.map((element) => {
// return {
// value: element.key,
// label: element.key,
// count: element.doc_count,
// };
// })
// .filter((el) => el) || [];

// const byLevel =
// data?.byLevel?.buckets
// ?.map((element) => {
// return {
// value: element.key,
// label: element.key,
// count: element.doc_count,
// };
// })
// .filter((el) => el) || [];

// const byFundings =
// data?.byFundings?.buckets
// ?.map((element) => {
// return {
// value: element.key,
// label: element.key,
// count: element.doc_count,
// };
// })
// .filter((el) => el) || [];

// const byTags =
// data?.byTags?.buckets
// ?.map((element) => {
// return {
// value: element.key,
// label: element.key,
// count: element.doc_count,
// };
// })
// .filter((el) => el) || [];

// const byLocalization =
// data?.byLocalization?.buckets
// ?.map((element) => {
// return {
// value: element.key,
// label: element.key,
// count: element.doc_count,
// };
// })
// .filter((el) => el) || [];

// return { byKind, byNature, byLevel, byLocalization, byFundings, byTags };
// }
const byType =
data?.byType?.buckets
?.map((element) => {
return {
value: element.key,
label: element.key,
count: element.doc_count,
};
})
.filter((el) => el) || [];
return { byYear, byType };
}
94 changes: 94 additions & 0 deletions client/src/api/patents/export/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { postHeaders, patentsIndex } from "../../../config/api";
import { ExportArgs } from "../../../types/commons";
import { ExportPatent } from "../../../types/patent";
import csvify from "../../utils/csvify";
import { FIELDS } from "../_utils/constants";

const EXPORT_SOURCE = [
"id",
"inpadocFamilyId",
"patentType",
"publicationDate",
"authors",
"domains",
"patents",
"title",
"summary",
"submissionDate",
"grantedDate",
"fullName",
"isInternational",
"affiliations",
];

const CSVFormatter = (data: ExportPatent[], ctx: string) => {
const cols = [
"Identifiant",
"Date de publication",
"submissionDate",
"International",
"Nombre d'auteurs",
"Nombre de domaines",
"Nombre de brevets",
"Titre",
];

const rows = data.map((item) => [
item.id,
item.publicationDate,
item.submissionDate,
item.isInternational,
item.authors ? item.authors.length : 0,
item.domains ? item.domains.length : 0,
item.patents ? item.patents.length : 0,
item.title.fr || item.title.default || item.title.en || "",
ctx,
]);
return new Blob([csvify(rows, cols)], { type: "text/csv" });
};

const JSONFormatter = (data: ExportPatent[]) => {
return new Blob([JSON.stringify(data, null, 2)], {
type: "application/json",
});
};

const exporter = (format) => (format === "csv" ? CSVFormatter : JSONFormatter);

export async function exportPatents({
query,
filters,
format = "csv",
ctx,
}: ExportArgs): Promise<Blob> {
const body: any = {
_source: EXPORT_SOURCE,
size: 1000,
query: {
bool: {
must: [
{
query_string: {
query: query || "*",
fields: FIELDS,
},
},
],
},
},
};
if (filters) body.query.bool.filter = filters;
const res = await fetch(`${patentsIndex}/_search`, {
method: "POST",
body: JSON.stringify(body),
headers: postHeaders,
});
if (res.status !== 200) {
throw new Error(`Elasticsearch error: ${res.status}`);
}
const json = await res.json();
const data: ExportPatent[] = json?.hits?.hits.map((hit) => hit._source) || [];
console.log(data);
const blob = exporter(format)(data, ctx);
return blob;
}
Loading

0 comments on commit 95c2cf6

Please sign in to comment.