fix(parser): validate xyz rows and continue batch imports - qgis-tem-loader - qgis plugin for loading TEM geophysical inversion XYZ files as 3D objects

commit ca9ee470f4174e301c1d2f2e61c13dd149cc99f2
parent 2f5345071c0eb8d8a91ecec66fbbc7abaf8cda65
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date:   Thu,  9 Apr 2026 18:48:36 +0200

fix(parser): validate xyz rows and continue batch imports

Diffstat:
M README.md  | 1 +
M tem_loader/core.py  | 259 +++++++++++++++++++++++++++++++++++++++----------------------------------------
M tem_loader/tem_loader.py  | 29 ++++++++++++++++++++---------
M test/test_core.py  | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

4 files changed, 254 insertions(+), 139 deletions(-)
diff --git a/README.md b/README.md
@@ -30,6 +30,7 @@ Each file gets its own layer group. Layers are styled with pre-built QML styles 
 3. Select one or more `.xyz` inversion files.
 4. If the file metadata declares an EPSG code, imported layers use that CRS; otherwise the loader falls back to the project CRS, then to EPSG:4326.
 5. Three CSV files (`.points.csv`, `.doi.csv`, `.layers.csv`) are written beside each source file, and the corresponding layers are added to the project with `points` above `doi` above `layers`.
+6. Invalid or malformed `.xyz` inputs are rejected with explicit validation errors, and when multiple files are selected the loader continues with the remaining files while reporting the failing input filename.
 
 ## XYZ File Format
 
diff --git a/tem_loader/core.py b/tem_loader/core.py
@@ -47,17 +47,6 @@ def is_header_line(line):
     return detect_format(normalize_header_tokens(line)) is not None
 
 
-def count_header_lines(path, comment_char='/'):
-    with open(path, 'r') as f:
-        for i, line in enumerate(f):
-            stripped = line.lstrip()
-            if is_header_line(stripped):
-                return i
-            if not stripped.startswith(comment_char):
-                return i
-    return 0
-
-
 def detect_source_epsg(path, comment_char='/'):
     with open(path, 'r') as f:
         for line in f:
@@ -108,135 +97,145 @@ def count_valid_layers(row, res_cols, thick_cols):
 
 
 def process_xyz(path):
-    skiprows = count_header_lines(path)
-    with open(path, 'r') as f:
-        lines = f.readlines()
-
-    data_lines = lines[skiprows:]
-    headers = normalize_header_tokens(data_lines[0])
-    source_format = detect_format(headers)
-    if source_format is None:
-        raise ValueError('Unsupported XYZ header format')
-    data_rows = []
-    for line in data_lines[1:]:
-        line = line.strip()
-        if not line:
-            continue
-        values = line.split()
-        # When the data has one more column than the header, pandas would
-        # treat the first column as the row index.  Replicate that here.
-        if len(values) == len(headers) + 1:
-            values = values[1:]
-        data_rows.append(dict(zip(headers, values)))
-
-    if source_format == 'temimage':
-        res_cols = get_numbered_columns(headers, 'Res_')
-        thick_cols = get_numbered_columns(headers, 'Thick_')
-    else:
-        res_cols = get_numbered_columns(headers, 'RHO_')
-        thick_cols = get_numbered_columns(headers, 'THK_')
-    dep_top_cols = get_numbered_columns(headers, 'DEP_TOP_')
-    dep_bot_cols = get_numbered_columns(headers, 'DEP_BOT_')
-
     points = []
     doi_points = []
     layers = []
+    headers = None
 
-    for row in data_rows:
-        x = float(row['X'])
-        y = float(row['Y'])
-
-        if source_format == 'temimage':
-            z = float(row['Z'])
-            doi = float(row['DOI'])
-            data_residual = float(row['DataResidual'])
-            n_layers = parse_num_layers(row['NumLayers'])
-            line = row['Line']
-            station_no = row['StationNo']
-        else:
-            z = float(row['ELEVATION'])
-            doi = float(row['DOI_STANDARD'])
-            data_residual = float(row['RESDATA'])
-            line = str(int(float(row['LINE_NO'])))
-            record = int(float(row['RECORD']))
-            station_no = f'{line}_{record:05d}'
-            n_layers = count_valid_layers(row, res_cols, thick_cols)
-
-        z_doi = z - doi
-        point_wkt = f'POINT Z ({x} {y} {z})'
-        doi_wkt = f'POINT Z ({x} {y} {z_doi})'
-
-        points.append({
-            'X': x,
-            'Y': y,
-            'Z': z,
-            'Line': line,
-            'StationNo': station_no,
-            'DataResidual': data_residual,
-            'NumLayers': n_layers,
-            'Geometry': point_wkt,
-        })
-        doi_points.append({
-            'X': x,
-            'Y': y,
-            'Z': z_doi,
-            'DOI': doi,
-            'ZDOI': z_doi,
-            'Geometry': doi_wkt,
-        })
-
-        max_layers = min(len(res_cols), len(thick_cols))
-        if n_layers is not None:
-            max_layers = min(max_layers, n_layers)
-
-        cum_depth = 0.0
-        for i in range(max_layers):
-            res_col = res_cols[i]
-            thick_col = thick_cols[i]
-            res_val = row.get(res_col, '')
-            thick_val = row.get(thick_col, '')
-            try:
-                res = float(res_val)
-                thick = float(thick_val)
-            except (ValueError, TypeError):
-                break
-            if math.isnan(res) or math.isnan(thick):
-                break
+    with open(path, 'r') as f:
+        for line_number, raw_line in enumerate(f, start=1):
+            stripped = raw_line.strip()
+            left_stripped = raw_line.lstrip()
+
+            if headers is None:
+                if is_header_line(left_stripped):
+                    headers = normalize_header_tokens(left_stripped)
+                    source_format = detect_format(headers)
+                    if source_format == 'temimage':
+                        res_cols = get_numbered_columns(headers, 'Res_')
+                        thick_cols = get_numbered_columns(headers, 'Thick_')
+                    else:
+                        res_cols = get_numbered_columns(headers, 'RHO_')
+                        thick_cols = get_numbered_columns(headers, 'THK_')
+                    dep_top_cols = get_numbered_columns(headers, 'DEP_TOP_')
+                    dep_bot_cols = get_numbered_columns(headers, 'DEP_BOT_')
+                    continue
+                if not left_stripped.startswith('/'):
+                    raise ValueError('XYZ file does not contain a supported header row')
+                continue
+
+            if not stripped:
+                continue
+
+            values = stripped.split()
+            # When the data has one more column than the header, pandas would
+            # treat the first column as the row index. Replicate that here.
+            if len(values) == len(headers) + 1:
+                values = values[1:]
+            if len(values) != len(headers):
+                raise ValueError(
+                    f'Row {line_number} has {len(values)} columns, '
+                    f'expected {len(headers)}'
+                )
+
+            row = dict(zip(headers, values))
+            x = float(row['X'])
+            y = float(row['Y'])
+
+            if source_format == 'temimage':
+                z = float(row['Z'])
+                doi = float(row['DOI'])
+                data_residual = float(row['DataResidual'])
+                n_layers = parse_num_layers(row['NumLayers'])
+                line = row['Line']
+                station_no = row['StationNo']
+            else:
+                z = float(row['ELEVATION'])
+                doi = float(row['DOI_STANDARD'])
+                data_residual = float(row['RESDATA'])
+                line = str(int(float(row['LINE_NO'])))
+                record = int(float(row['RECORD']))
+                station_no = f'{line}_{record:05d}'
+                n_layers = count_valid_layers(row, res_cols, thick_cols)
+
+            z_doi = z - doi
+            point_wkt = f'POINT Z ({x} {y} {z})'
+            doi_wkt = f'POINT Z ({x} {y} {z_doi})'
+
+            points.append({
+                'X': x,
+                'Y': y,
+                'Z': z,
+                'Line': line,
+                'StationNo': station_no,
+                'DataResidual': data_residual,
+                'NumLayers': n_layers,
+                'Geometry': point_wkt,
+            })
+            doi_points.append({
+                'X': x,
+                'Y': y,
+                'Z': z_doi,
+                'DOI': doi,
+                'ZDOI': z_doi,
+                'Geometry': doi_wkt,
+            })
+
+            max_layers = min(len(res_cols), len(thick_cols))
+            if n_layers is not None:
+                max_layers = min(max_layers, n_layers)
 
-            dep_top_col = dep_top_cols[i] if i < len(dep_top_cols) else None
-            dep_bot_col = dep_bot_cols[i] if i < len(dep_bot_cols) else None
-            if dep_top_col and dep_bot_col:
+            cum_depth = 0.0
+            for i in range(max_layers):
+                res_col = res_cols[i]
+                thick_col = thick_cols[i]
+                res_val = row.get(res_col, '')
+                thick_val = row.get(thick_col, '')
                 try:
-                    depth_top = float(row[dep_top_col])
-                    depth_bottom = float(row[dep_bot_col])
-                    if math.isnan(depth_top) or math.isnan(depth_bottom):
-                        raise ValueError
+                    res = float(res_val)
+                    thick = float(thick_val)
                 except (ValueError, TypeError):
+                    break
+                if math.isnan(res) or math.isnan(thick):
+                    break
+
+                dep_top_col = dep_top_cols[i] if i < len(dep_top_cols) else None
+                dep_bot_col = dep_bot_cols[i] if i < len(dep_bot_cols) else None
+                if dep_top_col and dep_bot_col:
+                    try:
+                        depth_top = float(row[dep_top_col])
+                        depth_bottom = float(row[dep_bot_col])
+                        if math.isnan(depth_top) or math.isnan(depth_bottom):
+                            raise ValueError
+                    except (ValueError, TypeError):
+                        depth_top = cum_depth
+                        depth_bottom = cum_depth + thick
+                else:
                     depth_top = cum_depth
                     depth_bottom = cum_depth + thick
-            else:
-                depth_top = cum_depth
-                depth_bottom = cum_depth + thick
-
-            z_top = z - depth_top
-            z_bot = z - depth_bottom
-            z_mid = (z_top + z_bot) / 2
-            cum_depth = depth_bottom
 
-            layer_wkt = f'LINESTRING Z ({x} {y} {z_top}, {x} {y} {z_bot})'
-            layers.append({
-                'X': x,
-                'Y': y,
-                'Z': z,
-                'ZTop': z_top,
-                'ZMid': z_mid,
-                'ZBottom': z_bot,
-                'DepthTop': depth_top,
-                'DepthBottom': depth_bottom,
-                'Resistivity': res,
-                'Layer': i + 1,
-                'Geometry': layer_wkt,
-            })
+                z_top = z - depth_top
+                z_bot = z - depth_bottom
+                z_mid = (z_top + z_bot) / 2
+                cum_depth = depth_bottom
+
+                layer_wkt = f'LINESTRING Z ({x} {y} {z_top}, {x} {y} {z_bot})'
+                layers.append({
+                    'X': x,
+                    'Y': y,
+                    'Z': z,
+                    'ZTop': z_top,
+                    'ZMid': z_mid,
+                    'ZBottom': z_bot,
+                    'DepthTop': depth_top,
+                    'DepthBottom': depth_bottom,
+                    'Resistivity': res,
+                    'Layer': i + 1,
+                    'Geometry': layer_wkt,
+                })
+
+    if headers is None:
+        raise ValueError('XYZ file does not contain a supported header row')
 
     return points, doi_points, layers
 
diff --git a/tem_loader/tem_loader.py b/tem_loader/tem_loader.py
@@ -5,7 +5,6 @@ from qgis.core import (
     QgsProject,
     QgsVectorLayer,
     QgsCoordinateReferenceSystem,
-    QgsLayerTreeGroup,
 )
 
 from . import core
@@ -35,8 +34,19 @@ class TEMLoaderPlugin:
             '',
             'XYZ files (*.xyz);;All files (*)',
         )
+        failed = []
         for path in paths:
-            self._load_xyz(Path(path))
+            filepath = Path(path)
+            try:
+                self._load_xyz(filepath)
+            except Exception as exc:
+                failed.append(f'{filepath.name}: {exc}')
+        if failed:
+            QMessageBox.warning(
+                self.iface.mainWindow(),
+                'TEM Loader',
+                '\n'.join(failed),
+            )
 
     def _load_xyz(self, filepath):
         points, doi_points, layers = core.process_xyz(filepath)
@@ -59,11 +69,6 @@ class TEMLoaderPlugin:
             crs.createFromString('EPSG:4326')
         crs_str = crs.authid()
 
-        group_name = filepath.stem
-        root = project.layerTreeRoot()
-        group = root.insertGroup(0, group_name)
-
-        failed = []
         loaded_layers = {}
         source_layers = [
             ('layers', lyr_csv, 'LineString'),
@@ -81,7 +86,6 @@ class TEMLoaderPlugin:
             )
             layer = QgsVectorLayer(uri, name, 'delimitedtext')
             if not layer.isValid():
-                failed.append(name)
                 continue
 
             qml = STYLES_DIR / f'{name}.qml'
@@ -91,6 +95,12 @@ class TEMLoaderPlugin:
             project.addMapLayer(layer, False)
             loaded_layers[name] = layer
 
+        if not loaded_layers:
+            raise ValueError('failed to load any layers')
+
+        group_name = filepath.stem
+        root = project.layerTreeRoot()
+        group = root.insertGroup(0, group_name)
         insert_index = 0
         for name in ('points', 'doi', 'layers'):
             layer = loaded_layers.get(name)
@@ -99,9 +109,10 @@ class TEMLoaderPlugin:
             group.insertLayer(insert_index, layer)
             insert_index += 1
 
+        failed = [name for name, _, _ in source_layers if name not in loaded_layers]
         if failed:
             QMessageBox.warning(
                 self.iface.mainWindow(),
                 'TEM Loader',
-                f'Failed to load layers: {", ".join(failed)}',
+                f'{filepath.name}: failed to load layers: {", ".join(failed)}',
             )
diff --git a/test/test_core.py b/test/test_core.py
@@ -1,7 +1,11 @@
+import importlib
 from pathlib import Path
 from tempfile import TemporaryDirectory
 import shutil
+import sys
+import types
 import unittest
+from unittest.mock import Mock, patch
 import xml.etree.ElementTree as ET
 
 from tem_loader.core import detect_source_epsg, process_xyz, write_csv
@@ -130,6 +134,39 @@ class ProcessXYZTests(unittest.TestCase):
             self.assertTrue(out_path.exists())
             self.assertIn("StationNo", out_path.read_text().splitlines()[0])
 
+    def test_process_xyz_rejects_metadata_only_file(self):
+        with TemporaryDirectory() as tmp:
+            path = Path(tmp) / "metadata_only.xyz"
+            path.write_text("/ epsg:32632\n/ no header here\n")
+
+            with self.assertRaisesRegex(
+                ValueError, "supported header row"
+            ):
+                process_xyz(path)
+
+    def test_process_xyz_rejects_unsupported_header(self):
+        with TemporaryDirectory() as tmp:
+            path = Path(tmp) / "unsupported.xyz"
+            path.write_text("A B C\n1 2 3\n")
+
+            with self.assertRaisesRegex(
+                ValueError, "supported header row"
+            ):
+                process_xyz(path)
+
+    def test_process_xyz_rejects_mismatched_row_length(self):
+        with TemporaryDirectory() as tmp:
+            path = Path(tmp) / "broken.xyz"
+            path.write_text(
+                "/ X Y Z DOI DataResidual NumLayers Line StationNo\n"
+                "1 2 3 4 5 6 7\n"
+            )
+
+            with self.assertRaisesRegex(
+                ValueError, r"Row 2 has 7 columns, expected 8"
+            ):
+                process_xyz(path)
+
     def test_fixture_doi_values_fit_fixed_scale(self):
         for path in sorted(FIXTURE_DIR.glob("*.xyz")):
             _, doi_points, _ = process_xyz(path)
@@ -167,3 +204,70 @@ class ProcessXYZTests(unittest.TestCase):
         method = renderer.find("./classificationMethod")
         self.assertIsNotNone(method)
         self.assertEqual(method.attrib["id"], "EqualInterval")
+
+
+class PluginTests(unittest.TestCase):
+    def _import_plugin_module(self):
+        class FakeSignal:
+            def connect(self, _callback):
+                pass
+
+        class FakeAction:
+            def __init__(self, *_args, **_kwargs):
+                self.triggered = FakeSignal()
+
+        class FakeFileDialog:
+            paths = []
+
+            @staticmethod
+            def getOpenFileNames(*_args, **_kwargs):
+                return FakeFileDialog.paths, ""
+
+        class FakeMessageBox:
+            warnings = []
+
+            @staticmethod
+            def warning(*args):
+                FakeMessageBox.warnings.append(args)
+
+        qtwidgets = types.ModuleType("qgis.PyQt.QtWidgets")
+        qtwidgets.QAction = FakeAction
+        qtwidgets.QFileDialog = FakeFileDialog
+        qtwidgets.QMessageBox = FakeMessageBox
+
+        qgis_core = types.ModuleType("qgis.core")
+        qgis_core.QgsProject = type("QgsProject", (), {})
+        qgis_core.QgsVectorLayer = type("QgsVectorLayer", (), {})
+        qgis_core.QgsCoordinateReferenceSystem = type(
+            "QgsCoordinateReferenceSystem", (), {}
+        )
+
+        module_map = {
+            "qgis": types.ModuleType("qgis"),
+            "qgis.PyQt": types.ModuleType("qgis.PyQt"),
+            "qgis.PyQt.QtWidgets": qtwidgets,
+            "qgis.core": qgis_core,
+        }
+
+        with patch.dict(sys.modules, module_map):
+            sys.modules.pop("tem_loader.tem_loader", None)
+            module = importlib.import_module("tem_loader.tem_loader")
+
+        return module, FakeFileDialog, FakeMessageBox
+
+    def test_run_continues_after_failed_file_and_shows_filename(self):
+        module, file_dialog, message_box = self._import_plugin_module()
+        file_dialog.paths = ["/tmp/bad.xyz", "/tmp/good.xyz"]
+        iface = Mock()
+        iface.mainWindow.return_value = object()
+        plugin = module.TEMLoaderPlugin(iface)
+        plugin._load_xyz = Mock(
+            side_effect=[ValueError("Row 3 has 4 columns, expected 6"), None]
+        )
+
+        plugin.run()
+
+        self.assertEqual(plugin._load_xyz.call_count, 2)
+        self.assertEqual(len(message_box.warnings), 1)
+        self.assertIn("bad.xyz", message_box.warnings[0][2])
+        self.assertIn("Row 3 has 4 columns, expected 6", message_box.warnings[0][2])

	qgis-tem-loader qgis plugin for loading TEM geophysical inversion XYZ files as 3D objects
	git clone git://src.adamsgaard.dk/qgis-tem-loader # fast git clone https://src.adamsgaard.dk/qgis-tem-loader.git # slow
	Log \| Files \| Refs \| README \| LICENSE	Back to index

M	README.md	\|	1	+
M	tem_loader/core.py	\|	259	+++++++++++++++++++++++++++++++++++++++----------------------------------------
M	tem_loader/tem_loader.py	\|	29	++++++++++++++++++++---------
M	test/test_core.py	\|	104	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++