Skip to content

Commit

Permalink
Create and use tsv instead of csv map #1058
Browse files Browse the repository at this point in the history
  • Loading branch information
TobiasNx committed Jun 6, 2024
1 parent c0e2966 commit 7dc8230
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public void run() {
fixVariables.put("classification.tsv", "./maps/classification.tsv");
fixVariables.put("formangabe.tsv", "./maps/formangabe.tsv");
fixVariables.put("almaMmsId2rpbId", "../../../../../../lookup-tables/data/almaMmsId2rpbId.tsv");
fixVariables.put("rvk.csv", "../../../../../../lookup-tables/data/rvk.csv");
fixVariables.put("rvk.tsv", "../../../../../../lookup-tables/data/rvk.tsv");
fixVariables.put("lobidOrganisationsMapping.tsv", "./maps/lobidOrganisationsMapping.tsv");
fixVariables.put("hbzowner2sigel.tsv", "./maps/hbzowner2sigel.tsv");
fixVariables.put("rpb2.ttl", "../../../../../../vocabs/rpb/rpb2.ttl");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* @author Tobias Bülte (TobiasNx)
**/
public final class CulturegraphXmlFilterHbzRvkToCsv {
private static String OUTPUT_FILE="cg-concordance.csv";
private static String OUTPUT_FILE="cg-concordance.tsv";

public static void main(String... args) {
String XML_INPUT_FILE = new File(args[0]).getAbsolutePath();
Expand All @@ -32,12 +32,16 @@ public static void main(String... args) {
final FileOpener opener = new FileOpener();
JsonDecoder jsonDecoder = new JsonDecoder();
jsonDecoder.setRecordPath("records");
CsvEncoder csvEncoder = new CsvEncoder();
csvEncoder.setSeparator("\t");
csvEncoder.setNoQuotes(true);

try {
opener.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler())
.setReceiver(new Metafix("src/main/resources/rvk/cg-to-rvk-csv.fix"))
.setReceiver(new JsonEncoder())
.setReceiver(jsonDecoder)
.setReceiver(new CsvEncoder())
.setReceiver(csvEncoder)
.setReceiver(new ObjectWriter<>(OUTPUT_FILE));
} catch (IOException e) {
e.printStackTrace();
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/alma/fix/maps.fix
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ put_filemap("$[classification.tsv]","classificationCode2Label", sep_char:"\t",ke
put_filemap("$[classification.tsv]","classificationCode2Uri", sep_char:"\t",key_column:"0",value_column:"2",expected_columns:"-1")

# RVK via Culturegraph
put_filemap("$[rvk.csv]","rvk", sep_char:",")
put_filemap("$[rvk.tsv]","rvk", sep_char:"\t")

# RPB SKOS Maps
put_rdfmap("$[rpb2.ttl]", "rpb2", target: "skos:prefLabel", select_language: "de") # LBZ-Notationen aka rpb2 fka RPB-Sachgruppen und Zusätze
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/alma/fix/subjects.fix
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ end

copy_field("almaMmsId","@rvkNotations")
lookup("@rvkNotations","rvk",delete:"true")
split_field("@rvkNotations", ", ")
split_field("@rvkNotations", ",")
do list(path: "@rvkNotations","var":"$i")
copy_field("$i","subject[].$append.notation")
set_array("subject[].$last.type[]","Concept")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public void setup() {
fixVariables.put("picaCreatorId2Isil.tsv", "src/main/resources/alma/maps/picaCreatorId2Isil.tsv");
fixVariables.put("nwbibWikidataLabelTypeCoords.tsv", "src/main/resources/alma/maps/nwbibWikidataLabelTypeCoords.tsv");
fixVariables.put("almaMmsId2rpbId", "src/test/resources/alma/maps/almaMmsId2rpbId.tsv");
fixVariables.put("rvk.csv", "src/test/resources/cg/rvk.csv");
fixVariables.put("rvk.tsv", "src/test/resources/cg/rvk.tsv");
fixVariables.put("lobidOrganisationsMapping.tsv", "src/test/resources/alma/maps/lobidOrganisationsMapping.tsv");
fixVariables.put("hbzowner2sigel.tsv", "src/main/resources/alma/maps/hbzowner2sigel.tsv");
fixVariables.put("rpb2.ttl", "src/test/resources/alma/maps/rpb2.ttl");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public final class CulturegraphXmlFilterHbzRvkToCsvTest {

private static final String PATH_TO_TEST = "src/test/resources/";
public static final String OUTPUT_FILE =
PATH_TO_TEST + "cg/rvk.csv";
PATH_TO_TEST + "cg/rvk.tsv";

private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml";

Expand Down
21 changes: 21 additions & 0 deletions src/test/resources/alma-fix/990367731740206441.json
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,27 @@
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"notation" : "NQ 2360",
"type" : [ "Concept" ],
"source" : {
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"notation" : "NY 4760",
"type" : [ "Concept" ],
"source" : {
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"notation" : "BD 7100",
"type" : [ "Concept" ],
"source" : {
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"type" : [ "ComplexSubject" ],
"label" : "Pandemie | Gesellschaft | Geschichte",
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/cg/rvk.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
990067531130206441 MG 11380,MF 3390,MC 7200
990019247190206441 NQ 2360,NY 4760,BD 7100
990367731740206441 NQ 2360,NY 4760,BD 7100
990063668050206441 NQ 2360,NY 4760,BD 7100

0 comments on commit 7dc8230

Please sign in to comment.