commit ca9ee470f4174e301c1d2f2e61c13dd149cc99f2
parent 2f5345071c0eb8d8a91ecec66fbbc7abaf8cda65
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Thu, 9 Apr 2026 18:48:36 +0200
fix(parser): validate xyz rows and continue batch imports
Diffstat:
4 files changed, 254 insertions(+), 139 deletions(-)
diff --git a/README.md b/README.md
@@ -30,6 +30,7 @@ Each file gets its own layer group. Layers are styled with pre-built QML styles
3. Select one or more `.xyz` inversion files.
4. If the file metadata declares an EPSG code, imported layers use that CRS; otherwise the loader falls back to the project CRS, then to EPSG:4326.
5. Three CSV files (`.points.csv`, `.doi.csv`, `.layers.csv`) are written beside each source file, and the corresponding layers are added to the project with `points` above `doi` above `layers`.
+6. Invalid or malformed `.xyz` inputs are rejected with explicit validation errors, and when multiple files are selected the loader continues with the remaining files while reporting the failing input filename.
## XYZ File Format
diff --git a/tem_loader/core.py b/tem_loader/core.py
@@ -47,17 +47,6 @@ def is_header_line(line):
return detect_format(normalize_header_tokens(line)) is not None
-def count_header_lines(path, comment_char='/'):
- with open(path, 'r') as f:
- for i, line in enumerate(f):
- stripped = line.lstrip()
- if is_header_line(stripped):
- return i
- if not stripped.startswith(comment_char):
- return i
- return 0
-
-
def detect_source_epsg(path, comment_char='/'):
with open(path, 'r') as f:
for line in f:
@@ -108,135 +97,145 @@ def count_valid_layers(row, res_cols, thick_cols):
def process_xyz(path):
- skiprows = count_header_lines(path)
- with open(path, 'r') as f:
- lines = f.readlines()
-
- data_lines = lines[skiprows:]
- headers = normalize_header_tokens(data_lines[0])
- source_format = detect_format(headers)
- if source_format is None:
- raise ValueError('Unsupported XYZ header format')
- data_rows = []
- for line in data_lines[1:]:
- line = line.strip()
- if not line:
- continue
- values = line.split()
- # When the data has one more column than the header, pandas would
- # treat the first column as the row index. Replicate that here.
- if len(values) == len(headers) + 1:
- values = values[1:]
- data_rows.append(dict(zip(headers, values)))
-
- if source_format == 'temimage':
- res_cols = get_numbered_columns(headers, 'Res_')
- thick_cols = get_numbered_columns(headers, 'Thick_')
- else:
- res_cols = get_numbered_columns(headers, 'RHO_')
- thick_cols = get_numbered_columns(headers, 'THK_')
- dep_top_cols = get_numbered_columns(headers, 'DEP_TOP_')
- dep_bot_cols = get_numbered_columns(headers, 'DEP_BOT_')
-
points = []
doi_points = []
layers = []
+ headers = None
- for row in data_rows:
- x = float(row['X'])
- y = float(row['Y'])
-
- if source_format == 'temimage':
- z = float(row['Z'])
- doi = float(row['DOI'])
- data_residual = float(row['DataResidual'])
- n_layers = parse_num_layers(row['NumLayers'])
- line = row['Line']
- station_no = row['StationNo']
- else:
- z = float(row['ELEVATION'])
- doi = float(row['DOI_STANDARD'])
- data_residual = float(row['RESDATA'])
- line = str(int(float(row['LINE_NO'])))
- record = int(float(row['RECORD']))
- station_no = f'{line}_{record:05d}'
- n_layers = count_valid_layers(row, res_cols, thick_cols)
-
- z_doi = z - doi
- point_wkt = f'POINT Z ({x} {y} {z})'
- doi_wkt = f'POINT Z ({x} {y} {z_doi})'
-
- points.append({
- 'X': x,
- 'Y': y,
- 'Z': z,
- 'Line': line,
- 'StationNo': station_no,
- 'DataResidual': data_residual,
- 'NumLayers': n_layers,
- 'Geometry': point_wkt,
- })
- doi_points.append({
- 'X': x,
- 'Y': y,
- 'Z': z_doi,
- 'DOI': doi,
- 'ZDOI': z_doi,
- 'Geometry': doi_wkt,
- })
-
- max_layers = min(len(res_cols), len(thick_cols))
- if n_layers is not None:
- max_layers = min(max_layers, n_layers)
-
- cum_depth = 0.0
- for i in range(max_layers):
- res_col = res_cols[i]
- thick_col = thick_cols[i]
- res_val = row.get(res_col, '')
- thick_val = row.get(thick_col, '')
- try:
- res = float(res_val)
- thick = float(thick_val)
- except (ValueError, TypeError):
- break
- if math.isnan(res) or math.isnan(thick):
- break
+ with open(path, 'r') as f:
+ for line_number, raw_line in enumerate(f, start=1):
+ stripped = raw_line.strip()
+ left_stripped = raw_line.lstrip()
+
+ if headers is None:
+ if is_header_line(left_stripped):
+ headers = normalize_header_tokens(left_stripped)
+ source_format = detect_format(headers)
+ if source_format == 'temimage':
+ res_cols = get_numbered_columns(headers, 'Res_')
+ thick_cols = get_numbered_columns(headers, 'Thick_')
+ else:
+ res_cols = get_numbered_columns(headers, 'RHO_')
+ thick_cols = get_numbered_columns(headers, 'THK_')
+ dep_top_cols = get_numbered_columns(headers, 'DEP_TOP_')
+ dep_bot_cols = get_numbered_columns(headers, 'DEP_BOT_')
+ continue
+ if not left_stripped.startswith('/'):
+ raise ValueError('XYZ file does not contain a supported header row')
+ continue
+
+ if not stripped:
+ continue
+
+ values = stripped.split()
+ # When the data has one more column than the header, pandas would
+ # treat the first column as the row index. Replicate that here.
+ if len(values) == len(headers) + 1:
+ values = values[1:]
+ if len(values) != len(headers):
+ raise ValueError(
+ f'Row {line_number} has {len(values)} columns, '
+ f'expected {len(headers)}'
+ )
+
+ row = dict(zip(headers, values))
+ x = float(row['X'])
+ y = float(row['Y'])
+
+ if source_format == 'temimage':
+ z = float(row['Z'])
+ doi = float(row['DOI'])
+ data_residual = float(row['DataResidual'])
+ n_layers = parse_num_layers(row['NumLayers'])
+ line = row['Line']
+ station_no = row['StationNo']
+ else:
+ z = float(row['ELEVATION'])
+ doi = float(row['DOI_STANDARD'])
+ data_residual = float(row['RESDATA'])
+ line = str(int(float(row['LINE_NO'])))
+ record = int(float(row['RECORD']))
+ station_no = f'{line}_{record:05d}'
+ n_layers = count_valid_layers(row, res_cols, thick_cols)
+
+ z_doi = z - doi
+ point_wkt = f'POINT Z ({x} {y} {z})'
+ doi_wkt = f'POINT Z ({x} {y} {z_doi})'
+
+ points.append({
+ 'X': x,
+ 'Y': y,
+ 'Z': z,
+ 'Line': line,
+ 'StationNo': station_no,
+ 'DataResidual': data_residual,
+ 'NumLayers': n_layers,
+ 'Geometry': point_wkt,
+ })
+ doi_points.append({
+ 'X': x,
+ 'Y': y,
+ 'Z': z_doi,
+ 'DOI': doi,
+ 'ZDOI': z_doi,
+ 'Geometry': doi_wkt,
+ })
+
+ max_layers = min(len(res_cols), len(thick_cols))
+ if n_layers is not None:
+ max_layers = min(max_layers, n_layers)
- dep_top_col = dep_top_cols[i] if i < len(dep_top_cols) else None
- dep_bot_col = dep_bot_cols[i] if i < len(dep_bot_cols) else None
- if dep_top_col and dep_bot_col:
+ cum_depth = 0.0
+ for i in range(max_layers):
+ res_col = res_cols[i]
+ thick_col = thick_cols[i]
+ res_val = row.get(res_col, '')
+ thick_val = row.get(thick_col, '')
try:
- depth_top = float(row[dep_top_col])
- depth_bottom = float(row[dep_bot_col])
- if math.isnan(depth_top) or math.isnan(depth_bottom):
- raise ValueError
+ res = float(res_val)
+ thick = float(thick_val)
except (ValueError, TypeError):
+ break
+ if math.isnan(res) or math.isnan(thick):
+ break
+
+ dep_top_col = dep_top_cols[i] if i < len(dep_top_cols) else None
+ dep_bot_col = dep_bot_cols[i] if i < len(dep_bot_cols) else None
+ if dep_top_col and dep_bot_col:
+ try:
+ depth_top = float(row[dep_top_col])
+ depth_bottom = float(row[dep_bot_col])
+ if math.isnan(depth_top) or math.isnan(depth_bottom):
+ raise ValueError
+ except (ValueError, TypeError):
+ depth_top = cum_depth
+ depth_bottom = cum_depth + thick
+ else:
depth_top = cum_depth
depth_bottom = cum_depth + thick
- else:
- depth_top = cum_depth
- depth_bottom = cum_depth + thick
-
- z_top = z - depth_top
- z_bot = z - depth_bottom
- z_mid = (z_top + z_bot) / 2
- cum_depth = depth_bottom
- layer_wkt = f'LINESTRING Z ({x} {y} {z_top}, {x} {y} {z_bot})'
- layers.append({
- 'X': x,
- 'Y': y,
- 'Z': z,
- 'ZTop': z_top,
- 'ZMid': z_mid,
- 'ZBottom': z_bot,
- 'DepthTop': depth_top,
- 'DepthBottom': depth_bottom,
- 'Resistivity': res,
- 'Layer': i + 1,
- 'Geometry': layer_wkt,
- })
+ z_top = z - depth_top
+ z_bot = z - depth_bottom
+ z_mid = (z_top + z_bot) / 2
+ cum_depth = depth_bottom
+
+ layer_wkt = f'LINESTRING Z ({x} {y} {z_top}, {x} {y} {z_bot})'
+ layers.append({
+ 'X': x,
+ 'Y': y,
+ 'Z': z,
+ 'ZTop': z_top,
+ 'ZMid': z_mid,
+ 'ZBottom': z_bot,
+ 'DepthTop': depth_top,
+ 'DepthBottom': depth_bottom,
+ 'Resistivity': res,
+ 'Layer': i + 1,
+ 'Geometry': layer_wkt,
+ })
+
+ if headers is None:
+ raise ValueError('XYZ file does not contain a supported header row')
return points, doi_points, layers
diff --git a/tem_loader/tem_loader.py b/tem_loader/tem_loader.py
@@ -5,7 +5,6 @@ from qgis.core import (
QgsProject,
QgsVectorLayer,
QgsCoordinateReferenceSystem,
- QgsLayerTreeGroup,
)
from . import core
@@ -35,8 +34,19 @@ class TEMLoaderPlugin:
'',
'XYZ files (*.xyz);;All files (*)',
)
+ failed = []
for path in paths:
- self._load_xyz(Path(path))
+ filepath = Path(path)
+ try:
+ self._load_xyz(filepath)
+ except Exception as exc:
+ failed.append(f'{filepath.name}: {exc}')
+ if failed:
+ QMessageBox.warning(
+ self.iface.mainWindow(),
+ 'TEM Loader',
+ '\n'.join(failed),
+ )
def _load_xyz(self, filepath):
points, doi_points, layers = core.process_xyz(filepath)
@@ -59,11 +69,6 @@ class TEMLoaderPlugin:
crs.createFromString('EPSG:4326')
crs_str = crs.authid()
- group_name = filepath.stem
- root = project.layerTreeRoot()
- group = root.insertGroup(0, group_name)
-
- failed = []
loaded_layers = {}
source_layers = [
('layers', lyr_csv, 'LineString'),
@@ -81,7 +86,6 @@ class TEMLoaderPlugin:
)
layer = QgsVectorLayer(uri, name, 'delimitedtext')
if not layer.isValid():
- failed.append(name)
continue
qml = STYLES_DIR / f'{name}.qml'
@@ -91,6 +95,12 @@ class TEMLoaderPlugin:
project.addMapLayer(layer, False)
loaded_layers[name] = layer
+ if not loaded_layers:
+ raise ValueError('failed to load any layers')
+
+ group_name = filepath.stem
+ root = project.layerTreeRoot()
+ group = root.insertGroup(0, group_name)
insert_index = 0
for name in ('points', 'doi', 'layers'):
layer = loaded_layers.get(name)
@@ -99,9 +109,10 @@ class TEMLoaderPlugin:
group.insertLayer(insert_index, layer)
insert_index += 1
+ failed = [name for name, _, _ in source_layers if name not in loaded_layers]
if failed:
QMessageBox.warning(
self.iface.mainWindow(),
'TEM Loader',
- f'Failed to load layers: {", ".join(failed)}',
+ f'{filepath.name}: failed to load layers: {", ".join(failed)}',
)
diff --git a/test/test_core.py b/test/test_core.py
@@ -1,7 +1,11 @@
+import importlib
from pathlib import Path
from tempfile import TemporaryDirectory
import shutil
+import sys
+import types
import unittest
+from unittest.mock import Mock, patch
import xml.etree.ElementTree as ET
from tem_loader.core import detect_source_epsg, process_xyz, write_csv
@@ -130,6 +134,39 @@ class ProcessXYZTests(unittest.TestCase):
self.assertTrue(out_path.exists())
self.assertIn("StationNo", out_path.read_text().splitlines()[0])
+ def test_process_xyz_rejects_metadata_only_file(self):
+ with TemporaryDirectory() as tmp:
+ path = Path(tmp) / "metadata_only.xyz"
+ path.write_text("/ epsg:32632\n/ no header here\n")
+
+ with self.assertRaisesRegex(
+ ValueError, "supported header row"
+ ):
+ process_xyz(path)
+
+ def test_process_xyz_rejects_unsupported_header(self):
+ with TemporaryDirectory() as tmp:
+ path = Path(tmp) / "unsupported.xyz"
+ path.write_text("A B C\n1 2 3\n")
+
+ with self.assertRaisesRegex(
+ ValueError, "supported header row"
+ ):
+ process_xyz(path)
+
+ def test_process_xyz_rejects_mismatched_row_length(self):
+ with TemporaryDirectory() as tmp:
+ path = Path(tmp) / "broken.xyz"
+ path.write_text(
+ "/ X Y Z DOI DataResidual NumLayers Line StationNo\n"
+ "1 2 3 4 5 6 7\n"
+ )
+
+ with self.assertRaisesRegex(
+ ValueError, r"Row 2 has 7 columns, expected 8"
+ ):
+ process_xyz(path)
+
def test_fixture_doi_values_fit_fixed_scale(self):
for path in sorted(FIXTURE_DIR.glob("*.xyz")):
_, doi_points, _ = process_xyz(path)
@@ -167,3 +204,70 @@ class ProcessXYZTests(unittest.TestCase):
method = renderer.find("./classificationMethod")
self.assertIsNotNone(method)
self.assertEqual(method.attrib["id"], "EqualInterval")
+
+
+class PluginTests(unittest.TestCase):
+ def _import_plugin_module(self):
+ class FakeSignal:
+ def connect(self, _callback):
+ pass
+
+ class FakeAction:
+ def __init__(self, *_args, **_kwargs):
+ self.triggered = FakeSignal()
+
+ class FakeFileDialog:
+ paths = []
+
+ @staticmethod
+ def getOpenFileNames(*_args, **_kwargs):
+ return FakeFileDialog.paths, ""
+
+ class FakeMessageBox:
+ warnings = []
+
+ @staticmethod
+ def warning(*args):
+ FakeMessageBox.warnings.append(args)
+
+ qtwidgets = types.ModuleType("qgis.PyQt.QtWidgets")
+ qtwidgets.QAction = FakeAction
+ qtwidgets.QFileDialog = FakeFileDialog
+ qtwidgets.QMessageBox = FakeMessageBox
+
+ qgis_core = types.ModuleType("qgis.core")
+ qgis_core.QgsProject = type("QgsProject", (), {})
+ qgis_core.QgsVectorLayer = type("QgsVectorLayer", (), {})
+ qgis_core.QgsCoordinateReferenceSystem = type(
+ "QgsCoordinateReferenceSystem", (), {}
+ )
+
+ module_map = {
+ "qgis": types.ModuleType("qgis"),
+ "qgis.PyQt": types.ModuleType("qgis.PyQt"),
+ "qgis.PyQt.QtWidgets": qtwidgets,
+ "qgis.core": qgis_core,
+ }
+
+ with patch.dict(sys.modules, module_map):
+ sys.modules.pop("tem_loader.tem_loader", None)
+ module = importlib.import_module("tem_loader.tem_loader")
+
+ return module, FakeFileDialog, FakeMessageBox
+
+ def test_run_continues_after_failed_file_and_shows_filename(self):
+ module, file_dialog, message_box = self._import_plugin_module()
+ file_dialog.paths = ["/tmp/bad.xyz", "/tmp/good.xyz"]
+ iface = Mock()
+ iface.mainWindow.return_value = object()
+ plugin = module.TEMLoaderPlugin(iface)
+ plugin._load_xyz = Mock(
+ side_effect=[ValueError("Row 3 has 4 columns, expected 6"), None]
+ )
+
+ plugin.run()
+
+ self.assertEqual(plugin._load_xyz.call_count, 2)
+ self.assertEqual(len(message_box.warnings), 1)
+ self.assertIn("bad.xyz", message_box.warnings[0][2])
+ self.assertIn("Row 3 has 4 columns, expected 6", message_box.warnings[0][2])