Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ODRE data post-processing: Replace super-csv by apache commons-csv #3075

Merged
merged 3 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions iidm/iidm-geodata/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@
</plugins>
</build>

<properties>
<supercsv.version>2.4.0</supercsv.version>
</properties>

<dependencies>
<!-- compile dependencies -->
<dependency>
Expand All @@ -61,9 +57,9 @@
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>net.sf.supercsv</groupId>
<artifactId>super-csv</artifactId>
<version>${supercsv.version}</version>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>${commons-csv.version}</version>
</dependency>

<!-- test dependencies -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,18 @@
package com.powsybl.iidm.geodata.odre;

import com.powsybl.iidm.geodata.utils.InputUtils;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.supercsv.io.CsvMapReader;
import org.supercsv.prefs.CsvPreference;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;

Expand All @@ -39,20 +35,41 @@ private FileValidator() {
private static final String HEADERS_OF_FILE_HAS_CHANGED = "Invalid file, Headers of file {} has changed, header(s) not found: {}";
private static final Logger LOGGER = LoggerFactory.getLogger(FileValidator.class);
static final String COUNTRY_FR = "FR";
static final CsvPreference CSV_PREFERENCE = new CsvPreference.Builder('"', ';', System.lineSeparator()).build();

private static CSVFormat.Builder createCsvFormatBuilder() {
return CSVFormat.DEFAULT.builder()
.setQuote('"')
.setDelimiter(";")
.setRecordSeparator(System.lineSeparator());
}

static final CSVFormat CSV_FORMAT = createCsvFormatBuilder()
.setHeader()
.setSkipHeaderRecord(true)
.build();
static final CSVFormat CSV_FORMAT_FOR_HEADER = createCsvFormatBuilder().build();
public static final String SUBSTATIONS = "substations";
public static final String AERIAL_LINES = "aerial-lines";
public static final String UNDERGROUND_LINES = "underground-lines";

public static boolean validateSubstations(Path path, OdreConfig odreConfig) {
try (BufferedReader fileReader = new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path)), StandardCharsets.UTF_8));
CsvMapReader mapReader = new CsvMapReader(fileReader, CSV_PREFERENCE)) {
final List<String> headers = List.of(mapReader.getHeader(true));
try (Reader reader = new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path)), StandardCharsets.UTF_8))) {
Iterator<CSVRecord> records = CSVParser.parse(reader, FileValidator.CSV_FORMAT_FOR_HEADER).iterator();

List<String> headers;
if (records.hasNext()) {
CSVRecord headersRecord = records.next();
headers = headersRecord.toList();
} else {
LOGGER.error("The file {} is empty", path.getFileName());
return false;
}

if (new HashSet<>(headers).containsAll(odreConfig.substationsExpectedHeaders())) {
return true;
} else {
List<String> notFoundHeaders = odreConfig.substationsExpectedHeaders().stream().filter(isChangedHeaders(headers)).collect(Collectors.toList());
LOGGER.error(HEADERS_OF_FILE_HAS_CHANGED, path.getFileName(), notFoundHeaders);
logHeaderError(path, notFoundHeaders);
}
} catch (IOException e) {
LOGGER.error(e.getMessage());
Expand All @@ -61,14 +78,29 @@ public static boolean validateSubstations(Path path, OdreConfig odreConfig) {
return false;
}

public static Map<String, BufferedReader> validateLines(List<Path> paths, OdreConfig odreConfig) {
Map<String, BufferedReader> mapResult = new HashMap<>();
public static Map<String, Reader> validateLines(List<Path> paths, OdreConfig odreConfig) {
Map<String, Reader> mapResult = new HashMap<>();
paths.forEach(path -> {
try (CsvMapReader mapReader = new CsvMapReader(new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path)), StandardCharsets.UTF_8)), CSV_PREFERENCE)) {
final String[] headersString = mapReader.getHeader(true);
final List<String> headers = List.of(headersString);
Map<String, String> row = mapReader.read(headersString);
String equipmentType = row.get(odreConfig.equipmentTypeColumn());
try (Reader reader = new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path)), StandardCharsets.UTF_8))) {
Iterator<CSVRecord> records = CSVParser.parse(reader, FileValidator.CSV_FORMAT_FOR_HEADER).iterator();

final List<String> headers;
if (records.hasNext()) {
CSVRecord headersRecord = records.next();
headers = headersRecord.toList();
} else {
headers = null;
LOGGER.error("The file {} is empty", path.getFileName());
}

String equipmentType = null;
if (headers != null && records.hasNext()) {
CSVRecord firstRow = records.next();
equipmentType = readEquipmentType(firstRow, headers, odreConfig);
} else {
LOGGER.error("The file {} has no data", path.getFileName());
}

String type = equipmentType != null ? equipmentType : odreConfig.nullEquipmentType();
if (type.equals(odreConfig.nullEquipmentType())) {
getIfSubstationsOrLogError(mapResult, path, headers, equipmentType, odreConfig);
Expand All @@ -80,20 +112,30 @@ public static Map<String, BufferedReader> validateLines(List<Path> paths, OdreCo
LOGGER.error("The file {} has no known equipment type : {}", path.getFileName(), equipmentType);
}
} catch (IOException e) {
mapResult.values().forEach(IOUtils::closeQuietly);
throw new UncheckedIOException(e);
}
});
return mapResult;
}

private static void getIfSubstationsOrLogError(Map<String, BufferedReader> mapResult, Path path, List<String> headers, String typeOuvrage, OdreConfig odreConfig) throws IOException {
private static String readEquipmentType(CSVRecord firstRow, List<String> headers, OdreConfig odreConfig) {
String equipmentType = null;
int index = headers.indexOf(odreConfig.equipmentTypeColumn());
if (index != -1) {
equipmentType = firstRow.get(index);
}
return equipmentType;
}

private static void getIfSubstationsOrLogError(Map<String, Reader> mapResult, Path path, List<String> headers, String typeOuvrage, OdreConfig odreConfig) throws IOException {
if (new HashSet<>(headers).containsAll(odreConfig.substationsExpectedHeaders())) {
mapResult.putIfAbsent(SUBSTATIONS, new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path)), StandardCharsets.UTF_8)));
} else if (isAerialOrUnderground(headers, odreConfig)) {
LOGGER.error("The file {} has no equipment type : {}", path.getFileName(), typeOuvrage);
} else {
List<String> notFoundHeaders = odreConfig.substationsExpectedHeaders().stream().filter(isChangedHeaders(headers)).collect(Collectors.toList());
LOGGER.error(HEADERS_OF_FILE_HAS_CHANGED, path.getFileName(), notFoundHeaders);
logHeaderError(path, notFoundHeaders);
}
}

Expand All @@ -106,13 +148,17 @@ private static boolean isAerialOrUnderground(List<String> headers, OdreConfig od
new HashSet<>(headers).containsAll(odreConfig.undergroundLinesExpectedHeaders());
}

private static void getResultOrLogError(List<String> headers, List<String> expectedHeaders, Map<String, BufferedReader> mapResult, String fileType, Path path) throws IOException {
private static void getResultOrLogError(List<String> headers, List<String> expectedHeaders, Map<String, Reader> mapResult, String fileType, Path path) throws IOException {
if (new HashSet<>(headers).containsAll(expectedHeaders)) {
mapResult.putIfAbsent(fileType, new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path)), StandardCharsets.UTF_8)));
} else {
List<String> notFoundHeaders = expectedHeaders.stream().filter(isChangedHeaders(headers)).collect(Collectors.toList());
LOGGER.error(HEADERS_OF_FILE_HAS_CHANGED, path.getFileName(), notFoundHeaders);
logHeaderError(path, notFoundHeaders);
}
}

private static void logHeaderError(Path path, List<String> notFoundHeaders) {
LOGGER.error(HEADERS_OF_FILE_HAS_CHANGED, path.getFileName(), notFoundHeaders);
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import com.powsybl.iidm.geodata.utils.GeoShapeDeserializer;
import com.powsybl.iidm.geodata.utils.LineGraph;
import com.powsybl.iidm.network.extensions.Coordinate;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.commons.lang3.tuple.Pair;
import org.jgrapht.Graph;
Expand All @@ -23,13 +25,11 @@
import org.jgrapht.traverse.BreadthFirstIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.supercsv.io.CsvMapReader;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.Collections.min;
Expand All @@ -47,16 +47,14 @@ private GeographicDataParser() {
private static final Logger LOGGER = LoggerFactory.getLogger(GeographicDataParser.class);
private static final int THRESHOLD = 5;

public static Map<String, SubstationGeoData> parseSubstations(BufferedReader bufferedReader, OdreConfig odreConfig) {
public static Map<String, SubstationGeoData> parseSubstations(Reader reader, OdreConfig odreConfig) {
Map<String, SubstationGeoData> substations = new HashMap<>();
StopWatch stopWatch = new StopWatch();
stopWatch.start();
int substationCount = 0;

try (CsvMapReader mapReader = new CsvMapReader(bufferedReader, FileValidator.CSV_PREFERENCE)) {
final String[] headers = mapReader.getHeader(true);
Map<String, String> row;
while ((row = mapReader.read(headers)) != null) {
try {
Iterable<CSVRecord> records = CSVParser.parse(reader, FileValidator.CSV_FORMAT);
for (CSVRecord row : records) {
String id = row.get(odreConfig.substationIdColumn());
double lon = Double.parseDouble(row.get(odreConfig.substationLongitudeColumn()));
double lat = Double.parseDouble(row.get(odreConfig.substationLatitudeColumn()));
Expand All @@ -70,6 +68,7 @@ public static Map<String, SubstationGeoData> parseSubstations(BufferedReader buf
} catch (IOException e) {
throw new UncheckedIOException(e);
}

LOGGER.info("{} substations read in {} ms", substationCount, stopWatch.getTime());
return substations;
}
Expand Down Expand Up @@ -97,15 +96,15 @@ public static Pair<String, String> substationOrder(Map<String, SubstationGeoData
}
}

public static Map<String, LineGeoData> parseLines(BufferedReader aerialLinesBr, BufferedReader undergroundLinesBr,
public static Map<String, LineGeoData> parseLines(Reader aerialLinesReader, Reader undergroundLinesReader,
Map<String, SubstationGeoData> stringSubstationGeoDataMap, OdreConfig odreConfig) {
StopWatch stopWatch = new StopWatch();
stopWatch.start();

Map<String, Graph<Coordinate, Object>> graphByLine = new HashMap<>();

parseLine(graphByLine, aerialLinesBr, odreConfig);
parseLine(graphByLine, undergroundLinesBr, odreConfig);
parseLine(graphByLine, aerialLinesReader, odreConfig);
parseLine(graphByLine, undergroundLinesReader, odreConfig);

Map<String, LineGeoData> lines = new HashMap<>();

Expand Down Expand Up @@ -159,19 +158,18 @@ public static Map<String, LineGeoData> parseLines(BufferedReader aerialLinesBr,
return lines;
}

private static void parseLine(Map<String, Graph<Coordinate, Object>> graphByLine, BufferedReader br, OdreConfig odreConfig) {

try (CsvMapReader mapReader = new CsvMapReader(br, FileValidator.CSV_PREFERENCE)) {
final String[] headers = mapReader.getHeader(true);
Map<String, String> row;
while ((row = mapReader.read(headers)) != null) {
Map<String, String> idsColumnNames = odreConfig.idsColumnNames();
private static void parseLine(Map<String, Graph<Coordinate, Object>> graphByLine, Reader reader, OdreConfig odreConfig) {
try {
Iterable<CSVRecord> records = CSVParser.parse(reader, FileValidator.CSV_FORMAT);
Map<String, String> idsColumnNames = odreConfig.idsColumnNames();
for (CSVRecord row : records) {
List<String> ids = Stream.of(row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_1)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_2)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_3)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_4)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_5))).filter(Objects::nonNull).collect(Collectors.toList());
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_5))).filter(Objects::nonNull).toList();
GeoShape geoShape = GeoShapeDeserializer.read(row.get(odreConfig.geoShapeColumn()));

if (ids.isEmpty() || geoShape.coordinates().isEmpty()) {
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@
import com.powsybl.iidm.geodata.elements.LineGeoData;
import com.powsybl.iidm.geodata.elements.SubstationGeoData;
import com.powsybl.iidm.geodata.utils.InputUtils;
import org.apache.commons.io.IOUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
Expand All @@ -31,9 +29,9 @@ protected OdreGeoDataCsvLoader() {
}

public static List<SubstationGeoData> getSubstationsGeoData(Path path, OdreConfig odreConfig) {
try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path))))) {
try (Reader reader = new BufferedReader(new InputStreamReader(InputUtils.toBomInputStream(Files.newInputStream(path))))) {
if (FileValidator.validateSubstations(path, odreConfig)) {
return new ArrayList<>(GeographicDataParser.parseSubstations(bufferedReader, odreConfig).values());
return new ArrayList<>(GeographicDataParser.parseSubstations(reader, odreConfig).values());
} else {
return Collections.emptyList();
}
Expand All @@ -44,14 +42,18 @@ public static List<SubstationGeoData> getSubstationsGeoData(Path path, OdreConfi

public static List<LineGeoData> getLinesGeoData(Path aerialLinesFilePath, Path undergroundLinesFilePath,
Path substationPath, OdreConfig odreConfig) {
Map<String, BufferedReader> mapValidation = FileValidator.validateLines(List.of(substationPath,
Map<String, Reader> mapValidation = FileValidator.validateLines(List.of(substationPath,
aerialLinesFilePath, undergroundLinesFilePath), odreConfig);
if (mapValidation.size() == 3) {
return new ArrayList<>(GeographicDataParser.parseLines(mapValidation.get(FileValidator.AERIAL_LINES),
mapValidation.get(FileValidator.UNDERGROUND_LINES),
GeographicDataParser.parseSubstations(mapValidation.get(FileValidator.SUBSTATIONS), odreConfig), odreConfig).values());
} else {
return Collections.emptyList();
List<LineGeoData> result = Collections.emptyList();
try {
if (mapValidation.size() == 3) {
result = new ArrayList<>(GeographicDataParser.parseLines(mapValidation.get(FileValidator.AERIAL_LINES),
mapValidation.get(FileValidator.UNDERGROUND_LINES),
GeographicDataParser.parseSubstations(mapValidation.get(FileValidator.SUBSTATIONS), odreConfig), odreConfig).values());
}
} finally {
mapValidation.values().forEach(IOUtils::closeQuietly);
}
return result;
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
code_poste;nom_poste;fonction;etat;tension;longitude_poste;geo_point_poste
V.SEP;VILLERS-SAINT-SEPULCRE;POSTE DE TRANSFORMATION;E;225kV;2.2014874715868253;49.35864164277698,2.2014874715868253
V.POR;VIEUX-PORT;POSTE DE TRANSFORMATION;E;225kV;5.369362995699234;43.28964383589903,5.369362995699234
MONDI;MONDION;POSTE DE TRANSFORMATION;E;225kV;0.45310103783373573;46.92751912107706,0.45310103783373573
B.THO;BARRE-THOMAS (LA);POSTE DE TRANSFORMATION;E;90kV;-1.72574736112968;48.11054982058195,-1.72574736112968
ZV.BE;PIQUAGE A VILLIERS LE BEL;POINT DE PIQUAGE;E;225kV;2.408444606752255;49.010164497815914,2.408444606752255
1AVAL;AVALATS(LES);POSTE DE TRANSFORMATION;E;<45kV;2.239481110152893;43.94243916390505,2.239481110152893
1LART;LARTIGE /1ZVLE 30KV;POSTE DE TRANSFORMATION;E;<45kV;1.5316152230911364;45.80749288269255,1.5316152230911364
1ONER;ONERA /AVRIEUX 10KV;POSTE DE TRANSFORMATION;E;<45kV;6.712988386882246;45.21385121248377,6.712988386882246
1SSFO;SAINT-FONS /BELLE-ETOILE;POSTE DE TRANSFORMATION;E;<45kV;4.8510704590363405;45.697346813171535,4.8510704590363405
A.ADO;AIRE-SUR-ADOUR;POSTE DE TRANSFORMATION;E;63kV;-0.2621029649557296;43.70687313781712,-0.2621029649557296
SUB.1;SUBSTATION 1;POSTE DE TRANSFORMATION;E;225kV;2.2;49.3,2.2
SUB.2;SUBSTATION 2;POSTE DE TRANSFORMATION;E;225kV;5.3;43.2,5.3
SUB.3;SUBSTATION 3;POSTE DE TRANSFORMATION;E;225kV;0.4;46.9,0.4
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
<commonscli.version>1.8.0</commonscli.version>
<commonscompress.version>1.26.2</commonscompress.version>
<commons-configuration2.version>2.10.1</commons-configuration2.version>
<commons-csv.version>1.11.0</commons-csv.version>
<commonsio.version>2.16.1</commonsio.version>
<commonslang3.version>3.14.0</commonslang3.version>
<commonsmath3.version>3.6.1</commonsmath3.version>
Expand Down