[feat] Functionnal extractor
This commit is contained in:
parent
93d0df7759
commit
d4e04eaef6
2 changed files with 275 additions and 1 deletions
5
pom.xml
5
pom.xml
|
@ -28,6 +28,11 @@
|
|||
<artifactId>opencsv</artifactId>
|
||||
<version>5.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
<version>20230618</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -11,10 +11,28 @@ import java.io.InputStreamReader;
|
|||
import java.io.Reader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.Format;
|
||||
import java.text.MessageFormat;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.opencsv.CSVParserBuilder;
|
||||
import com.opencsv.CSVReader;
|
||||
import com.opencsv.CSVReaderBuilder;
|
||||
|
@ -23,6 +41,9 @@ import com.opencsv.ICSVParser;
|
|||
import com.opencsv.ICSVWriter;
|
||||
import com.opencsv.exceptions.CsvValidationException;
|
||||
|
||||
import fr.u_paris.gla.project.io.NetworkFormat;
|
||||
import fr.u_paris.gla.project.utils.GPS;
|
||||
|
||||
/** Code of an extractor for the data from IDF mobilite.
|
||||
*
|
||||
* @author Emmanuel Bigeon */
|
||||
|
@ -35,6 +56,76 @@ public class IDFMNetworkExtractor {
|
|||
private static final String TRACE_FILE_URL = "https://data.iledefrance-mobilites.fr/api/explore/v2.1/catalog/datasets/traces-des-lignes-de-transport-en-commun-idfm/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B";
|
||||
private static final String STOPS_FILE_URL = "https://data.iledefrance-mobilites.fr/api/explore/v2.1/catalog/datasets/arrets-lignes/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B";
|
||||
|
||||
// IDF mobilite csv formats
|
||||
private static final int IDFM_TRACE_ID_INDEX = 0;
|
||||
private static final int IDFM_TRACE_SNAME_INDEX = 1;
|
||||
private static final int IDFM_TRACE_SHAPE_INDEX = 6;
|
||||
|
||||
private static final int IDFM_STOPS_RID_INDEX = 0;
|
||||
private static final int IDFM_STOPS_NAME_INDEX = 5;
|
||||
private static final int IDFM_STOPS_LON_INDEX = 6;
|
||||
private static final int IDFM_STOPS_LAT_INDEX = 7;
|
||||
|
||||
// Magically chosen values
|
||||
/** A number of stops on each line */
|
||||
private static final int GUESS_STOPS_BY_LINE = 5;
|
||||
/** Maximal speed in km/h */
|
||||
private static final double MAX_SPEED = 5;
|
||||
/** Distance to reach maximal speed in km */
|
||||
private static final double ACCELERATION_DISTANCE = 0.1;
|
||||
|
||||
// Well named constants
|
||||
private static final double _250_METERS = .25;
|
||||
private static final long SECONDS_IN_HOURS = 3_600;
|
||||
|
||||
private static final Format GPS_FORMATTER = NetworkFormat.getGPSFormatter();
|
||||
|
||||
public static class StopEntry {
|
||||
private String lname;
|
||||
public final double longitude;
|
||||
public final double latitude;
|
||||
|
||||
/** Create the stop
|
||||
*
|
||||
* @param lname
|
||||
* @param longitude
|
||||
* @param latitude */
|
||||
public StopEntry(String lname, double longitude, double latitude) {
|
||||
super();
|
||||
this.lname = lname;
|
||||
this.longitude = longitude;
|
||||
this.latitude = latitude;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MessageFormat.format("{0} [{1}, {2}]", this.lname, this.longitude,
|
||||
this.latitude);
|
||||
}
|
||||
}
|
||||
|
||||
public static final class UnidentifiedStopEntry extends StopEntry {
|
||||
/** Create the stop
|
||||
*
|
||||
* @param longitude
|
||||
* @param latitude */
|
||||
public UnidentifiedStopEntry(double longitude, double latitude) {
|
||||
super("Unidentified", longitude, latitude);
|
||||
}
|
||||
|
||||
List<StopEntry> candidates = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "UnidentifiedStop [candidates=" + this.candidates + "]";
|
||||
}
|
||||
}
|
||||
|
||||
private static final class TraceEntry {
|
||||
String lname;
|
||||
List<List<StopEntry>> stops = new ArrayList<>();
|
||||
}
|
||||
|
||||
public static void readCSVFromURL(String url, Consumer<String[]> contentLineConsumer)
|
||||
throws IOException {
|
||||
ICSVParser parser = new CSVParserBuilder().withSeparator(';').build();
|
||||
|
@ -66,14 +157,192 @@ public class IDFMNetworkExtractor {
|
|||
return;
|
||||
}
|
||||
|
||||
Map<String, TraceEntry> traces = new HashMap<>();
|
||||
try {
|
||||
readCSVFromURL(TRACE_FILE_URL, (String[] line) -> {
|
||||
TraceEntry entry = new TraceEntry();
|
||||
entry.lname = line[IDFM_TRACE_SNAME_INDEX];
|
||||
List<List<StopEntry>> buildPaths = buildPaths(
|
||||
line[IDFM_TRACE_SHAPE_INDEX]);
|
||||
entry.stops.addAll(buildPaths);
|
||||
if (buildPaths.isEmpty()) {
|
||||
LOGGER.severe(() -> MessageFormat.format(
|
||||
"Line {0} has no provided itinerary and was ignored",
|
||||
entry.lname));
|
||||
} else {
|
||||
traces.put(line[IDFM_TRACE_ID_INDEX], entry);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
LOGGER.log(Level.SEVERE, "Error while reading the line paths", e);
|
||||
}
|
||||
|
||||
List<StopEntry> stops = new ArrayList<>(traces.size() * GUESS_STOPS_BY_LINE);
|
||||
try {
|
||||
readCSVFromURL(STOPS_FILE_URL, (String[] line) -> {
|
||||
StopEntry entry = new StopEntry(line[IDFM_STOPS_NAME_INDEX],
|
||||
Double.parseDouble(line[IDFM_STOPS_LON_INDEX]),
|
||||
Double.parseDouble(line[IDFM_STOPS_LAT_INDEX]));
|
||||
String rid = line[IDFM_STOPS_RID_INDEX];
|
||||
BiFunction<? super String, ? super TraceEntry, ? extends TraceEntry> func = (
|
||||
k, trace) -> {
|
||||
for (List<StopEntry> path : trace.stops) {
|
||||
for (StopEntry stopEntry : path) {
|
||||
if (stopEntry instanceof UnidentifiedStopEntry
|
||||
&& GPS.distance(entry.latitude, entry.longitude,
|
||||
stopEntry.latitude,
|
||||
stopEntry.longitude) < _250_METERS) {
|
||||
((UnidentifiedStopEntry) stopEntry).candidates.add(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
return trace;
|
||||
};
|
||||
traces.computeIfPresent(rid, func);
|
||||
stops.add(entry);
|
||||
});
|
||||
} catch (IOException e) {
|
||||
LOGGER.log(Level.SEVERE, "Error while reading the stops", e);
|
||||
}
|
||||
|
||||
Set<String> toRemove = new HashSet<>();
|
||||
for (Entry<String, TraceEntry> traceEntry : traces.entrySet()) {
|
||||
TraceEntry trace = traceEntry.getValue();
|
||||
for (List<StopEntry> path : trace.stops) {
|
||||
for (int i = 0; i < path.size(); i++) {
|
||||
StopEntry stop = path.get(i);
|
||||
if (stop instanceof UnidentifiedStopEntry)
|
||||
stop = resolve((UnidentifiedStopEntry) stop);
|
||||
if (stop instanceof UnidentifiedStopEntry
|
||||
&& ((UnidentifiedStopEntry) stop).candidates.isEmpty()) {
|
||||
LOGGER.severe("Missing stop for line " + trace.lname
|
||||
+ ". Line will be removed");
|
||||
toRemove.add(traceEntry.getKey());
|
||||
continue;
|
||||
}
|
||||
path.set(i, stop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (String string : toRemove) {
|
||||
traces.remove(string);
|
||||
}
|
||||
|
||||
// Export content in required format
|
||||
try (FileWriter writer = new FileWriter(args[0], StandardCharsets.UTF_8)) {
|
||||
CSVWriterBuilder wBuilder = new CSVWriterBuilder(writer).withSeparator(';');
|
||||
try (ICSVWriter csv = wBuilder.build()) {
|
||||
// TODO write the content
|
||||
for (Entry<String, TraceEntry> traceEntry : traces.entrySet()) {
|
||||
Map<StopEntry, Set<StopEntry>> lineSegments = new HashMap<>();
|
||||
String[] nextLine = new String[NetworkFormat.NUMBER_COLUMNS];
|
||||
nextLine[NetworkFormat.LINE_INDEX] = traceEntry.getValue().lname;
|
||||
for (List<StopEntry> path : traceEntry.getValue().stops) {
|
||||
for (int i = 0; i < path.size() - 1; i++) {
|
||||
StopEntry stop1 = path.get(i);
|
||||
lineSegments.putIfAbsent(stop1, new HashSet<>());
|
||||
StopEntry stop2 = path.get(i + 1);
|
||||
if (!lineSegments.get(stop1).contains(stop2)) {
|
||||
fillStation(stop1, nextLine, NetworkFormat.START_INDEX);
|
||||
fillStation(stop2, nextLine, NetworkFormat.STOP_INDEX);
|
||||
double distance = GPS.distance(stop1.latitude,
|
||||
stop1.longitude, stop2.latitude, stop2.longitude);
|
||||
nextLine[NetworkFormat.DISTANCE_INDEX] = NumberFormat
|
||||
.getInstance(Locale.ENGLISH).format(distance);
|
||||
nextLine[NetworkFormat.DURATION_INDEX] = formatTime(
|
||||
(long) Math.ceil(distanceToTime(distance)
|
||||
* SECONDS_IN_HOURS));
|
||||
nextLine[NetworkFormat.VARIANT_INDEX] = Integer
|
||||
.toString(lineSegments.get(stop1).size());
|
||||
csv.writeNext(nextLine);
|
||||
lineSegments.get(stop1).add(stop2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOGGER.log(Level.SEVERE, e, () -> "Could not write in file " + args[1]);
|
||||
}
|
||||
}
|
||||
|
||||
/** @param distanceToTime
|
||||
* @return */
|
||||
private static String formatTime(long time) {
|
||||
NumberFormat format = NumberFormat.getInstance(Locale.ENGLISH);
|
||||
format.setMinimumIntegerDigits(2);
|
||||
return MessageFormat.format("{0}:{1}", format.format(time / 60),
|
||||
format.format(time % 60));
|
||||
}
|
||||
|
||||
/** A tool method to give a delay to go through a certain distance.
|
||||
* <p>
|
||||
* This is a model with an linear acceleration and deceleration periods and a
|
||||
* constant speed in between.
|
||||
*
|
||||
* @param distance the distance (in km)
|
||||
* @return the duration of the trip (in hours) */
|
||||
private static double distanceToTime(double distance) {
|
||||
return Math.max(0, distance - 2 * ACCELERATION_DISTANCE) / MAX_SPEED
|
||||
+ Math.pow(Math.min(distance, 2 * ACCELERATION_DISTANCE) / MAX_SPEED, 2);
|
||||
}
|
||||
|
||||
/** @param stop1
|
||||
* @param nextLine
|
||||
* @param i */
|
||||
private static void fillStation(StopEntry stop, String[] nextLine, int index) {
|
||||
nextLine[index] = stop.lname;
|
||||
nextLine[index + 1] = MessageFormat.format("{0}, {1}",
|
||||
GPS_FORMATTER.format(stop.latitude),
|
||||
GPS_FORMATTER.format(stop.longitude));
|
||||
|
||||
}
|
||||
|
||||
/** @param stop
|
||||
* @return */
|
||||
private static StopEntry resolve(UnidentifiedStopEntry stop) {
|
||||
|
||||
if (stop.candidates.isEmpty()) {
|
||||
LOGGER.severe("Unable to find stop name, will use a placeholder");
|
||||
return stop;
|
||||
}
|
||||
if (stop.candidates.size() == 1) {
|
||||
return stop.candidates.get(0);
|
||||
}
|
||||
Collections.sort(stop.candidates,
|
||||
(Comparator<? super StopEntry>) (StopEntry s1,
|
||||
StopEntry s2) -> (int) Math.signum((GPS.distance(stop.latitude,
|
||||
stop.longitude, s1.latitude, s1.longitude)
|
||||
- GPS.distance(stop.latitude, stop.longitude, s2.latitude,
|
||||
s2.longitude))));
|
||||
|
||||
return stop.candidates.get(0);
|
||||
}
|
||||
|
||||
private static List<List<StopEntry>> buildPaths(String pathsJSON) {
|
||||
List<List<StopEntry>> all = new ArrayList<>();
|
||||
try {
|
||||
JSONObject json = new JSONObject(pathsJSON);
|
||||
JSONArray paths = json.getJSONArray("coordinates");
|
||||
for (int i = 0; i < paths.length(); i++) {
|
||||
JSONArray path = paths.getJSONArray(i);
|
||||
List<StopEntry> stopsPath = new ArrayList<>();
|
||||
for (int j = 0; j < path.length(); j++) {
|
||||
JSONArray coordinates = path.getJSONArray(j);
|
||||
|
||||
StopEntry entry = new UnidentifiedStopEntry(coordinates.getDouble(0),
|
||||
coordinates.getDouble(1));
|
||||
|
||||
stopsPath.add(entry);
|
||||
}
|
||||
|
||||
all.add(stopsPath);
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
// Ignoring invalid element!
|
||||
LOGGER.log(Level.FINE, "Invalid json element " + pathsJSON, e); //$NON-NLS-1$
|
||||
}
|
||||
return all;
|
||||
}
|
||||
}
|
||||
|
|
Reference in a new issue