-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from dataesr/patentAggregation
fix: change map, add export, filters
- Loading branch information
Showing
25 changed files
with
923 additions
and
728 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,145 +1,78 @@ | ||
// import { patentsIndex, postHeaders } from "../../../config/api"; | ||
// import { AggregationArgs } from "../../../types/commons"; | ||
// import { PatentAggregations } from "../../../types/patent"; | ||
// import { DEFAULT_FILTERS, FIELDS } from "../_utils/constants"; | ||
import { postHeaders, patentsIndex } from "../../../config/api"; | ||
import { AggregationArgs } from "../../../types/commons"; | ||
import { PatentAggregations } from "../../../types/patent"; | ||
import { fillWithMissingYears } from "../../utils/years"; | ||
import { FIELDS } from "../_utils/constants"; | ||
|
||
// export async function aggregatePatents({ | ||
// query, | ||
// filters = [], | ||
// }: AggregationArgs): Promise<PatentAggregations> { | ||
// const body: any = { | ||
// size: 0, | ||
// query: { | ||
// bool: { | ||
// must: [ | ||
// { | ||
// query_string: { | ||
// query: query || "*", | ||
// fields: FIELDS, | ||
// }, | ||
// }, | ||
// ], | ||
// }, | ||
// }, | ||
// aggs: { | ||
// byNature: { | ||
// terms: { | ||
// field: "nature.keyword", | ||
// size: 50, | ||
// }, | ||
// }, | ||
// byLevel: { | ||
// terms: { | ||
// field: "level.keyword", | ||
// size: 50, | ||
// }, | ||
// }, | ||
// byKind: { | ||
// terms: { | ||
// field: "kind.keyword", | ||
// }, | ||
// }, | ||
// byLocalization: { | ||
// terms: { | ||
// field: "address.urbanUnitLabel.keyword", | ||
// size: 10, | ||
// }, | ||
// }, | ||
// byGPS: { | ||
// terms: { | ||
// field: "address.gps", | ||
// size: 5000, | ||
// }, | ||
// }, | ||
// byFundings: { | ||
// terms: { | ||
// field: "projects.type.keyword", | ||
// size: 100, | ||
// }, | ||
// }, | ||
// byTags: { | ||
// terms: { | ||
// field: "badges.label.fr.keyword", | ||
// size: 100, | ||
// }, | ||
// }, | ||
// }, | ||
// }; | ||
// if (filters.length > 0) { | ||
// body.query.bool.filter = [...filters, ...DEFAULT_FILTERS]; | ||
// } else { | ||
// body.query.bool.filter = DEFAULT_FILTERS; | ||
// } | ||
// const res = await fetch(`${patentsIndex}/_search`, { | ||
// method: "POST", | ||
// body: JSON.stringify(body), | ||
// headers: postHeaders, | ||
// }); | ||
// const result = await res.json(); | ||
// const { aggregations: data } = result; | ||
export async function aggregatePatents({ | ||
query, | ||
filters = [], | ||
}: AggregationArgs): Promise<PatentAggregations> { | ||
const body: any = { | ||
size: 0, | ||
query: { | ||
bool: { | ||
must: [ | ||
{ | ||
query_string: { | ||
query: query || "*", | ||
fields: FIELDS, | ||
}, | ||
}, | ||
], | ||
}, | ||
}, | ||
aggs: { | ||
byType: { | ||
terms: { | ||
field: "type.keyword", | ||
size: 500, | ||
}, | ||
}, | ||
byYear: { | ||
terms: { | ||
field: "year", | ||
size: 25, | ||
}, | ||
}, | ||
}, | ||
}; | ||
if (filters.length > 0) { | ||
body.query.bool.filter = filters; | ||
} | ||
console.log(body); | ||
const res = await fetch(`${patentsIndex}/_search`, { | ||
method: "POST", | ||
body: JSON.stringify(body), | ||
headers: postHeaders, | ||
}); | ||
const result = await res.json(); | ||
const { aggregations: data } = result; | ||
const _100Year = | ||
data?.byYear?.buckets && | ||
Math.max(...data.byYear.buckets.map((el) => el.doc_count)); | ||
const byYear = | ||
data?.byYear?.buckets | ||
?.map((element) => { | ||
return { | ||
value: element.key, | ||
label: element.key, | ||
count: element.doc_count, | ||
normalizedCount: (element.doc_count * 100) / _100Year, | ||
}; | ||
}) | ||
.sort((a, b) => a.label - b.label) | ||
.reduce(fillWithMissingYears, []) || []; | ||
|
||
// const byKind = | ||
// data?.byKind?.buckets?.map((element) => { | ||
// return { | ||
// value: element.key, | ||
// label: element.key, | ||
// count: element.doc_count, | ||
// }; | ||
// }) || []; | ||
// const byNature = | ||
// data?.byNature?.buckets | ||
// ?.map((element) => { | ||
// return { | ||
// value: element.key, | ||
// label: element.key, | ||
// count: element.doc_count, | ||
// }; | ||
// }) | ||
// .filter((el) => el) || []; | ||
|
||
// const byLevel = | ||
// data?.byLevel?.buckets | ||
// ?.map((element) => { | ||
// return { | ||
// value: element.key, | ||
// label: element.key, | ||
// count: element.doc_count, | ||
// }; | ||
// }) | ||
// .filter((el) => el) || []; | ||
|
||
// const byFundings = | ||
// data?.byFundings?.buckets | ||
// ?.map((element) => { | ||
// return { | ||
// value: element.key, | ||
// label: element.key, | ||
// count: element.doc_count, | ||
// }; | ||
// }) | ||
// .filter((el) => el) || []; | ||
|
||
// const byTags = | ||
// data?.byTags?.buckets | ||
// ?.map((element) => { | ||
// return { | ||
// value: element.key, | ||
// label: element.key, | ||
// count: element.doc_count, | ||
// }; | ||
// }) | ||
// .filter((el) => el) || []; | ||
|
||
// const byLocalization = | ||
// data?.byLocalization?.buckets | ||
// ?.map((element) => { | ||
// return { | ||
// value: element.key, | ||
// label: element.key, | ||
// count: element.doc_count, | ||
// }; | ||
// }) | ||
// .filter((el) => el) || []; | ||
|
||
// return { byKind, byNature, byLevel, byLocalization, byFundings, byTags }; | ||
// } | ||
const byType = | ||
data?.byType?.buckets | ||
?.map((element) => { | ||
return { | ||
value: element.key, | ||
label: element.key, | ||
count: element.doc_count, | ||
}; | ||
}) | ||
.filter((el) => el) || []; | ||
return { byYear, byType }; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import { postHeaders, patentsIndex } from "../../../config/api"; | ||
import { ExportArgs } from "../../../types/commons"; | ||
import { ExportPatent } from "../../../types/patent"; | ||
import csvify from "../../utils/csvify"; | ||
import { FIELDS } from "../_utils/constants"; | ||
|
||
const EXPORT_SOURCE = [ | ||
"id", | ||
"inpadocFamilyId", | ||
"patentType", | ||
"publicationDate", | ||
"authors", | ||
"domains", | ||
"patents", | ||
"title", | ||
"summary", | ||
"submissionDate", | ||
"grantedDate", | ||
"fullName", | ||
"isInternational", | ||
"affiliations", | ||
]; | ||
|
||
const CSVFormatter = (data: ExportPatent[], ctx: string) => { | ||
const cols = [ | ||
"Identifiant", | ||
"Date de publication", | ||
"submissionDate", | ||
"International", | ||
"Nombre d'auteurs", | ||
"Nombre de domaines", | ||
"Nombre de brevets", | ||
"Titre", | ||
]; | ||
|
||
const rows = data.map((item) => [ | ||
item.id, | ||
item.publicationDate, | ||
item.submissionDate, | ||
item.isInternational, | ||
item.authors ? item.authors.length : 0, | ||
item.domains ? item.domains.length : 0, | ||
item.patents ? item.patents.length : 0, | ||
item.title.fr || item.title.default || item.title.en || "", | ||
ctx, | ||
]); | ||
return new Blob([csvify(rows, cols)], { type: "text/csv" }); | ||
}; | ||
|
||
const JSONFormatter = (data: ExportPatent[]) => { | ||
return new Blob([JSON.stringify(data, null, 2)], { | ||
type: "application/json", | ||
}); | ||
}; | ||
|
||
const exporter = (format) => (format === "csv" ? CSVFormatter : JSONFormatter); | ||
|
||
export async function exportPatents({ | ||
query, | ||
filters, | ||
format = "csv", | ||
ctx, | ||
}: ExportArgs): Promise<Blob> { | ||
const body: any = { | ||
_source: EXPORT_SOURCE, | ||
size: 1000, | ||
query: { | ||
bool: { | ||
must: [ | ||
{ | ||
query_string: { | ||
query: query || "*", | ||
fields: FIELDS, | ||
}, | ||
}, | ||
], | ||
}, | ||
}, | ||
}; | ||
if (filters) body.query.bool.filter = filters; | ||
const res = await fetch(`${patentsIndex}/_search`, { | ||
method: "POST", | ||
body: JSON.stringify(body), | ||
headers: postHeaders, | ||
}); | ||
if (res.status !== 200) { | ||
throw new Error(`Elasticsearch error: ${res.status}`); | ||
} | ||
const json = await res.json(); | ||
const data: ExportPatent[] = json?.hits?.hits.map((hit) => hit._source) || []; | ||
console.log(data); | ||
const blob = exporter(format)(data, ctx); | ||
return blob; | ||
} |
Oops, something went wrong.