Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Some other syntax/semantics errors to bypass or correct:

placemarks coordinates without content
placemarks with no content
references to gx tags without xml namespace declaration
xml UTF-8 encoding declared but windows-1252 used
Document tag inside other document tag
  • Loading branch information
patrickdalla committed May 16, 2024
1 parent 05a096e commit 4ecdbb1
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 46 deletions.
94 changes: 52 additions & 42 deletions iped-geo/src/main/java/iped/geo/parsers/GeofileParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,23 +75,26 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
EmbeddedDocumentExtractor extractor = context.get(EmbeddedDocumentExtractor.class, new ParsingEmbeddedDocumentExtractor(context));
List<Object> featureList = FeatureListFactoryRegister.getFeatureList(mimeType).parseFeatureList(file);

int cont = 1;
int virtualId = 0;
for (Iterator<Object> iterator = featureList.iterator(); iterator.hasNext();) {
Object o = iterator.next();
if (o instanceof SimpleFeature) {
SimpleFeature feature = (SimpleFeature) o;
String name = feature.getName().getLocalPart();
if (name == null)
name = "marcador";
featureParser(feature, -1, name + cont, handler, metadata, extractor);
cont++;
}
if (o instanceof Folder) {
Folder folder = (Folder) o;
if (featureList != null && !featureList.isEmpty()) {// no supported features found in KML
int cont = 1;
int virtualId = 0;
for (Iterator<Object> iterator = featureList.iterator(); iterator.hasNext();) {
Object o = iterator.next();
if (o instanceof SimpleFeature) {
SimpleFeature feature = (SimpleFeature) o;
String name = feature.getName().getLocalPart();
if (name == null)
name = "marcador";
featureParser(feature, -1, name + cont, handler, metadata, extractor);
cont++;
}
if (o instanceof Folder) {
Folder folder = (Folder) o;

virtualId = folderParser(folder, -1, handler, metadata, extractor, virtualId);
virtualId = recursiveFolderParse(virtualId, folder, handler, metadata, extractor, context, virtualId);
virtualId = folderParser(folder, -1, handler, metadata, extractor, virtualId);
virtualId = recursiveFolderParse(virtualId, folder, handler, metadata, extractor, context,
virtualId);
}
}
}
} catch (Throwable e) {
Expand Down Expand Up @@ -201,31 +204,36 @@ private void featureParser(SimpleFeature feature, int parentId, String name, Con
Double lat = null;
Double alt = null;
Geometry g = (Geometry) feature.getDefaultGeometry();
Point p = null;
if (g instanceof Point) {
p = (Point) g;
} else {
p = g.getCentroid();
FeatureJSON fjson = new FeatureJSON();
StringWriter writer = new StringWriter();

feature.setAttribute("description", StringEscapeUtils.escapeJavaScript(feature.getAttribute("description").toString()));
fjson.writeFeature(feature, writer);

String str = writer.toString();
kmeta.set(FEATURE_STRING, str);
}
Coordinate[] coords = p.getCoordinates();
lon = coords[0].x;
lat = coords[0].y;
alt = coords[0].z;
if (g != null) {// empty placemark tags returns null
Point p = null;
if (g instanceof Point) {
p = (Point) g;
} else {
p = g.getCentroid();
FeatureJSON fjson = new FeatureJSON();
StringWriter writer = new StringWriter();

if (lat != null && lat != 0.0 && lon != null && lon != 0.0) {
kmeta.set(ExtraProperties.LOCATIONS, lat + ";" + lon);
}
feature.setAttribute("description",
StringEscapeUtils.escapeJavaScript(feature.getAttribute("description").toString()));
fjson.writeFeature(feature, writer);

if (alt != null) {
kmeta.set(Metadata.ALTITUDE, alt);
String str = writer.toString();
kmeta.set(FEATURE_STRING, str);
}
Coordinate[] coords = p.getCoordinates();
if (coords != null && coords.length > 0) {
lon = coords[0].x;
lat = coords[0].y;
alt = coords[0].z;
}

if (lat != null && lat != 0.0 && lon != null && lon != 0.0) {
kmeta.set(ExtraProperties.LOCATIONS, lat + ";" + lon);
}

if (alt != null) {
kmeta.set(Metadata.ALTITUDE, alt);
}
}

extractor.parseEmbedded(featureStream, handler, kmeta, false);
Expand All @@ -239,9 +247,11 @@ private byte[] generateFeatureHtml(SimpleFeature feat) throws UnsupportedEncodin
ByteArrayOutputStream bout = new ByteArrayOutputStream();
PrintWriter out = new PrintWriter(new OutputStreamWriter(bout, "UTF-8"));
Object o = feat.getDefaultGeometryProperty().getValue();
out.println(o.toString());
out.println(feat.toString());
out.flush();
if (o != null) {
out.println(o.toString());
out.println(feat.toString());
out.flush();
}
return bout.toByteArray();
}
}
40 changes: 36 additions & 4 deletions iped-geo/src/main/java/iped/geo/parsers/kmlstore/KMLParser.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package iped.geo.parsers.kmlstore;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
Expand All @@ -27,6 +30,34 @@ public class KMLParser {
private static org.slf4j.Logger LOGGER = LoggerFactory.getLogger(KMLParser.class);

public static List<Object> parse(File file) throws SchemaException, IOException, JDOMException {
try (FileInputStream fis = new FileInputStream(file)) {
return parse(fis);
} catch (Exception e) {
if (e.getMessage().contains("The prefix \"gx\" for element \"gx:altitudeMode\" is not bound")) {
try (FileInputStream fis = new FileInputStream(file)) {
byte[] bytes = fis.readAllBytes();
String s = new String(bytes);
s = s.replace("<kml ", "<kml xmlns:gx=\"http://www.google.com/kml/ext/2.2\" ");
return KMLParser.parse(new ByteArrayInputStream(s.getBytes()));
} catch (Exception e2) {
throw e2;
}
}
if (e.getMessage().contains("byte UTF-8 sequence")) {
try (FileInputStream fis = new FileInputStream(file)) {
byte[] bytes = fis.readAllBytes();
String s = new String(bytes);
s = s.replace("encoding=\"UTF-8\"", "encoding=\"windows-1252\"");
return KMLParser.parse(new ByteArrayInputStream(s.getBytes()));
} catch (Exception e2) {
throw e2;
}
}
throw e;
}
}

public static List<Object> parse(InputStream is) throws SchemaException, IOException, JDOMException {
/*
* A list to collect features as we create them.
*/
Expand All @@ -49,10 +80,11 @@ public static List<Object> parse(File file) throws SchemaException, IOException,
SimpleFeatureBuilder featureBuilder = new SimpleFeatureBuilder(placemarkFeatureType);

SAXBuilder saxBuilder = new SAXBuilder();
Document document = saxBuilder.build(file);
Document document = saxBuilder.build(is);

Element kml = document.getRootElement();
Element placemarks = kml.getChildren().get(0);

List<Element> pms = placemarks.getChildren();

ArrayList<Exception> exList = new ArrayList<Exception>();
Expand All @@ -67,7 +99,7 @@ public static List<Object> parse(File file) throws SchemaException, IOException,
features.add(parsePlacemark(ele, featureBuilder, exList));
}
}
if (ele.getName().toLowerCase().equals("folder")) {
if (ele.getName().toLowerCase().equals("folder") || ele.getName().toLowerCase().equals("document")) {
Folder f = parseFolder(ele, featureBuilder, exList);
features.add(f);
}
Expand Down Expand Up @@ -246,7 +278,7 @@ public static Geometry parseGeometry(Element ele, GeometryFactory geometryFactor
i++;
}

if (coords.length < 3) {
if (coords.length < 4) {
geo = geometryFactory.createLineString(coords);
} else {
if (ele.getName().toLowerCase().equals("linestring")) {
Expand Down Expand Up @@ -317,7 +349,7 @@ public static Folder parseFolder(Element pm, SimpleFeatureBuilder featureBuilder
if (ele.getName().toLowerCase().equals("placemark")) {
features.add(parsePlacemark(ele, featureBuilder, exList));
}
if (ele.getName().toLowerCase().equals("folder")) {
if (ele.getName().toLowerCase().equals("folder") || ele.getName().toLowerCase().equals("document")) {
features.add(parseFolder(ele, featureBuilder, exList));
}
if (ele.getName().toLowerCase().equals("name")) {
Expand Down

0 comments on commit 4ecdbb1

Please sign in to comment.