qgis-tem-loader

qgis plugin for loading TEM geophysical inversion XYZ files as 3D objects
git clone git://src.adamsgaard.dk/qgis-tem-loader # fast
git clone https://src.adamsgaard.dk/qgis-tem-loader.git # slow
Log | Files | Refs | README | LICENSE Back to index

commit ca9ee470f4174e301c1d2f2e61c13dd149cc99f2
parent 2f5345071c0eb8d8a91ecec66fbbc7abaf8cda65
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date:   Thu,  9 Apr 2026 18:48:36 +0200

fix(parser): validate xyz rows and continue batch imports

Diffstat:
MREADME.md | 1+
Mtem_loader/core.py | 259+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mtem_loader/tem_loader.py | 29++++++++++++++++++++---------
Mtest/test_core.py | 104+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 254 insertions(+), 139 deletions(-)

diff --git a/README.md b/README.md @@ -30,6 +30,7 @@ Each file gets its own layer group. Layers are styled with pre-built QML styles 3. Select one or more `.xyz` inversion files. 4. If the file metadata declares an EPSG code, imported layers use that CRS; otherwise the loader falls back to the project CRS, then to EPSG:4326. 5. Three CSV files (`.points.csv`, `.doi.csv`, `.layers.csv`) are written beside each source file, and the corresponding layers are added to the project with `points` above `doi` above `layers`. +6. Invalid or malformed `.xyz` inputs are rejected with explicit validation errors, and when multiple files are selected the loader continues with the remaining files while reporting the failing input filename. ## XYZ File Format diff --git a/tem_loader/core.py b/tem_loader/core.py @@ -47,17 +47,6 @@ def is_header_line(line): return detect_format(normalize_header_tokens(line)) is not None -def count_header_lines(path, comment_char='/'): - with open(path, 'r') as f: - for i, line in enumerate(f): - stripped = line.lstrip() - if is_header_line(stripped): - return i - if not stripped.startswith(comment_char): - return i - return 0 - - def detect_source_epsg(path, comment_char='/'): with open(path, 'r') as f: for line in f: @@ -108,135 +97,145 @@ def count_valid_layers(row, res_cols, thick_cols): def process_xyz(path): - skiprows = count_header_lines(path) - with open(path, 'r') as f: - lines = f.readlines() - - data_lines = lines[skiprows:] - headers = normalize_header_tokens(data_lines[0]) - source_format = detect_format(headers) - if source_format is None: - raise ValueError('Unsupported XYZ header format') - data_rows = [] - for line in data_lines[1:]: - line = line.strip() - if not line: - continue - values = line.split() - # When the data has one more column than the header, pandas would - # treat the first column as the row index. Replicate that here. - if len(values) == len(headers) + 1: - values = values[1:] - data_rows.append(dict(zip(headers, values))) - - if source_format == 'temimage': - res_cols = get_numbered_columns(headers, 'Res_') - thick_cols = get_numbered_columns(headers, 'Thick_') - else: - res_cols = get_numbered_columns(headers, 'RHO_') - thick_cols = get_numbered_columns(headers, 'THK_') - dep_top_cols = get_numbered_columns(headers, 'DEP_TOP_') - dep_bot_cols = get_numbered_columns(headers, 'DEP_BOT_') - points = [] doi_points = [] layers = [] + headers = None - for row in data_rows: - x = float(row['X']) - y = float(row['Y']) - - if source_format == 'temimage': - z = float(row['Z']) - doi = float(row['DOI']) - data_residual = float(row['DataResidual']) - n_layers = parse_num_layers(row['NumLayers']) - line = row['Line'] - station_no = row['StationNo'] - else: - z = float(row['ELEVATION']) - doi = float(row['DOI_STANDARD']) - data_residual = float(row['RESDATA']) - line = str(int(float(row['LINE_NO']))) - record = int(float(row['RECORD'])) - station_no = f'{line}_{record:05d}' - n_layers = count_valid_layers(row, res_cols, thick_cols) - - z_doi = z - doi - point_wkt = f'POINT Z ({x} {y} {z})' - doi_wkt = f'POINT Z ({x} {y} {z_doi})' - - points.append({ - 'X': x, - 'Y': y, - 'Z': z, - 'Line': line, - 'StationNo': station_no, - 'DataResidual': data_residual, - 'NumLayers': n_layers, - 'Geometry': point_wkt, - }) - doi_points.append({ - 'X': x, - 'Y': y, - 'Z': z_doi, - 'DOI': doi, - 'ZDOI': z_doi, - 'Geometry': doi_wkt, - }) - - max_layers = min(len(res_cols), len(thick_cols)) - if n_layers is not None: - max_layers = min(max_layers, n_layers) - - cum_depth = 0.0 - for i in range(max_layers): - res_col = res_cols[i] - thick_col = thick_cols[i] - res_val = row.get(res_col, '') - thick_val = row.get(thick_col, '') - try: - res = float(res_val) - thick = float(thick_val) - except (ValueError, TypeError): - break - if math.isnan(res) or math.isnan(thick): - break + with open(path, 'r') as f: + for line_number, raw_line in enumerate(f, start=1): + stripped = raw_line.strip() + left_stripped = raw_line.lstrip() + + if headers is None: + if is_header_line(left_stripped): + headers = normalize_header_tokens(left_stripped) + source_format = detect_format(headers) + if source_format == 'temimage': + res_cols = get_numbered_columns(headers, 'Res_') + thick_cols = get_numbered_columns(headers, 'Thick_') + else: + res_cols = get_numbered_columns(headers, 'RHO_') + thick_cols = get_numbered_columns(headers, 'THK_') + dep_top_cols = get_numbered_columns(headers, 'DEP_TOP_') + dep_bot_cols = get_numbered_columns(headers, 'DEP_BOT_') + continue + if not left_stripped.startswith('/'): + raise ValueError('XYZ file does not contain a supported header row') + continue + + if not stripped: + continue + + values = stripped.split() + # When the data has one more column than the header, pandas would + # treat the first column as the row index. Replicate that here. + if len(values) == len(headers) + 1: + values = values[1:] + if len(values) != len(headers): + raise ValueError( + f'Row {line_number} has {len(values)} columns, ' + f'expected {len(headers)}' + ) + + row = dict(zip(headers, values)) + x = float(row['X']) + y = float(row['Y']) + + if source_format == 'temimage': + z = float(row['Z']) + doi = float(row['DOI']) + data_residual = float(row['DataResidual']) + n_layers = parse_num_layers(row['NumLayers']) + line = row['Line'] + station_no = row['StationNo'] + else: + z = float(row['ELEVATION']) + doi = float(row['DOI_STANDARD']) + data_residual = float(row['RESDATA']) + line = str(int(float(row['LINE_NO']))) + record = int(float(row['RECORD'])) + station_no = f'{line}_{record:05d}' + n_layers = count_valid_layers(row, res_cols, thick_cols) + + z_doi = z - doi + point_wkt = f'POINT Z ({x} {y} {z})' + doi_wkt = f'POINT Z ({x} {y} {z_doi})' + + points.append({ + 'X': x, + 'Y': y, + 'Z': z, + 'Line': line, + 'StationNo': station_no, + 'DataResidual': data_residual, + 'NumLayers': n_layers, + 'Geometry': point_wkt, + }) + doi_points.append({ + 'X': x, + 'Y': y, + 'Z': z_doi, + 'DOI': doi, + 'ZDOI': z_doi, + 'Geometry': doi_wkt, + }) + + max_layers = min(len(res_cols), len(thick_cols)) + if n_layers is not None: + max_layers = min(max_layers, n_layers) - dep_top_col = dep_top_cols[i] if i < len(dep_top_cols) else None - dep_bot_col = dep_bot_cols[i] if i < len(dep_bot_cols) else None - if dep_top_col and dep_bot_col: + cum_depth = 0.0 + for i in range(max_layers): + res_col = res_cols[i] + thick_col = thick_cols[i] + res_val = row.get(res_col, '') + thick_val = row.get(thick_col, '') try: - depth_top = float(row[dep_top_col]) - depth_bottom = float(row[dep_bot_col]) - if math.isnan(depth_top) or math.isnan(depth_bottom): - raise ValueError + res = float(res_val) + thick = float(thick_val) except (ValueError, TypeError): + break + if math.isnan(res) or math.isnan(thick): + break + + dep_top_col = dep_top_cols[i] if i < len(dep_top_cols) else None + dep_bot_col = dep_bot_cols[i] if i < len(dep_bot_cols) else None + if dep_top_col and dep_bot_col: + try: + depth_top = float(row[dep_top_col]) + depth_bottom = float(row[dep_bot_col]) + if math.isnan(depth_top) or math.isnan(depth_bottom): + raise ValueError + except (ValueError, TypeError): + depth_top = cum_depth + depth_bottom = cum_depth + thick + else: depth_top = cum_depth depth_bottom = cum_depth + thick - else: - depth_top = cum_depth - depth_bottom = cum_depth + thick - - z_top = z - depth_top - z_bot = z - depth_bottom - z_mid = (z_top + z_bot) / 2 - cum_depth = depth_bottom - layer_wkt = f'LINESTRING Z ({x} {y} {z_top}, {x} {y} {z_bot})' - layers.append({ - 'X': x, - 'Y': y, - 'Z': z, - 'ZTop': z_top, - 'ZMid': z_mid, - 'ZBottom': z_bot, - 'DepthTop': depth_top, - 'DepthBottom': depth_bottom, - 'Resistivity': res, - 'Layer': i + 1, - 'Geometry': layer_wkt, - }) + z_top = z - depth_top + z_bot = z - depth_bottom + z_mid = (z_top + z_bot) / 2 + cum_depth = depth_bottom + + layer_wkt = f'LINESTRING Z ({x} {y} {z_top}, {x} {y} {z_bot})' + layers.append({ + 'X': x, + 'Y': y, + 'Z': z, + 'ZTop': z_top, + 'ZMid': z_mid, + 'ZBottom': z_bot, + 'DepthTop': depth_top, + 'DepthBottom': depth_bottom, + 'Resistivity': res, + 'Layer': i + 1, + 'Geometry': layer_wkt, + }) + + if headers is None: + raise ValueError('XYZ file does not contain a supported header row') return points, doi_points, layers diff --git a/tem_loader/tem_loader.py b/tem_loader/tem_loader.py @@ -5,7 +5,6 @@ from qgis.core import ( QgsProject, QgsVectorLayer, QgsCoordinateReferenceSystem, - QgsLayerTreeGroup, ) from . import core @@ -35,8 +34,19 @@ class TEMLoaderPlugin: '', 'XYZ files (*.xyz);;All files (*)', ) + failed = [] for path in paths: - self._load_xyz(Path(path)) + filepath = Path(path) + try: + self._load_xyz(filepath) + except Exception as exc: + failed.append(f'{filepath.name}: {exc}') + if failed: + QMessageBox.warning( + self.iface.mainWindow(), + 'TEM Loader', + '\n'.join(failed), + ) def _load_xyz(self, filepath): points, doi_points, layers = core.process_xyz(filepath) @@ -59,11 +69,6 @@ class TEMLoaderPlugin: crs.createFromString('EPSG:4326') crs_str = crs.authid() - group_name = filepath.stem - root = project.layerTreeRoot() - group = root.insertGroup(0, group_name) - - failed = [] loaded_layers = {} source_layers = [ ('layers', lyr_csv, 'LineString'), @@ -81,7 +86,6 @@ class TEMLoaderPlugin: ) layer = QgsVectorLayer(uri, name, 'delimitedtext') if not layer.isValid(): - failed.append(name) continue qml = STYLES_DIR / f'{name}.qml' @@ -91,6 +95,12 @@ class TEMLoaderPlugin: project.addMapLayer(layer, False) loaded_layers[name] = layer + if not loaded_layers: + raise ValueError('failed to load any layers') + + group_name = filepath.stem + root = project.layerTreeRoot() + group = root.insertGroup(0, group_name) insert_index = 0 for name in ('points', 'doi', 'layers'): layer = loaded_layers.get(name) @@ -99,9 +109,10 @@ class TEMLoaderPlugin: group.insertLayer(insert_index, layer) insert_index += 1 + failed = [name for name, _, _ in source_layers if name not in loaded_layers] if failed: QMessageBox.warning( self.iface.mainWindow(), 'TEM Loader', - f'Failed to load layers: {", ".join(failed)}', + f'{filepath.name}: failed to load layers: {", ".join(failed)}', ) diff --git a/test/test_core.py b/test/test_core.py @@ -1,7 +1,11 @@ +import importlib from pathlib import Path from tempfile import TemporaryDirectory import shutil +import sys +import types import unittest +from unittest.mock import Mock, patch import xml.etree.ElementTree as ET from tem_loader.core import detect_source_epsg, process_xyz, write_csv @@ -130,6 +134,39 @@ class ProcessXYZTests(unittest.TestCase): self.assertTrue(out_path.exists()) self.assertIn("StationNo", out_path.read_text().splitlines()[0]) + def test_process_xyz_rejects_metadata_only_file(self): + with TemporaryDirectory() as tmp: + path = Path(tmp) / "metadata_only.xyz" + path.write_text("/ epsg:32632\n/ no header here\n") + + with self.assertRaisesRegex( + ValueError, "supported header row" + ): + process_xyz(path) + + def test_process_xyz_rejects_unsupported_header(self): + with TemporaryDirectory() as tmp: + path = Path(tmp) / "unsupported.xyz" + path.write_text("A B C\n1 2 3\n") + + with self.assertRaisesRegex( + ValueError, "supported header row" + ): + process_xyz(path) + + def test_process_xyz_rejects_mismatched_row_length(self): + with TemporaryDirectory() as tmp: + path = Path(tmp) / "broken.xyz" + path.write_text( + "/ X Y Z DOI DataResidual NumLayers Line StationNo\n" + "1 2 3 4 5 6 7\n" + ) + + with self.assertRaisesRegex( + ValueError, r"Row 2 has 7 columns, expected 8" + ): + process_xyz(path) + def test_fixture_doi_values_fit_fixed_scale(self): for path in sorted(FIXTURE_DIR.glob("*.xyz")): _, doi_points, _ = process_xyz(path) @@ -167,3 +204,70 @@ class ProcessXYZTests(unittest.TestCase): method = renderer.find("./classificationMethod") self.assertIsNotNone(method) self.assertEqual(method.attrib["id"], "EqualInterval") + + +class PluginTests(unittest.TestCase): + def _import_plugin_module(self): + class FakeSignal: + def connect(self, _callback): + pass + + class FakeAction: + def __init__(self, *_args, **_kwargs): + self.triggered = FakeSignal() + + class FakeFileDialog: + paths = [] + + @staticmethod + def getOpenFileNames(*_args, **_kwargs): + return FakeFileDialog.paths, "" + + class FakeMessageBox: + warnings = [] + + @staticmethod + def warning(*args): + FakeMessageBox.warnings.append(args) + + qtwidgets = types.ModuleType("qgis.PyQt.QtWidgets") + qtwidgets.QAction = FakeAction + qtwidgets.QFileDialog = FakeFileDialog + qtwidgets.QMessageBox = FakeMessageBox + + qgis_core = types.ModuleType("qgis.core") + qgis_core.QgsProject = type("QgsProject", (), {}) + qgis_core.QgsVectorLayer = type("QgsVectorLayer", (), {}) + qgis_core.QgsCoordinateReferenceSystem = type( + "QgsCoordinateReferenceSystem", (), {} + ) + + module_map = { + "qgis": types.ModuleType("qgis"), + "qgis.PyQt": types.ModuleType("qgis.PyQt"), + "qgis.PyQt.QtWidgets": qtwidgets, + "qgis.core": qgis_core, + } + + with patch.dict(sys.modules, module_map): + sys.modules.pop("tem_loader.tem_loader", None) + module = importlib.import_module("tem_loader.tem_loader") + + return module, FakeFileDialog, FakeMessageBox + + def test_run_continues_after_failed_file_and_shows_filename(self): + module, file_dialog, message_box = self._import_plugin_module() + file_dialog.paths = ["/tmp/bad.xyz", "/tmp/good.xyz"] + iface = Mock() + iface.mainWindow.return_value = object() + plugin = module.TEMLoaderPlugin(iface) + plugin._load_xyz = Mock( + side_effect=[ValueError("Row 3 has 4 columns, expected 6"), None] + ) + + plugin.run() + + self.assertEqual(plugin._load_xyz.call_count, 2) + self.assertEqual(len(message_box.warnings), 1) + self.assertIn("bad.xyz", message_box.warnings[0][2]) + self.assertIn("Row 3 has 4 columns, expected 6", message_box.warnings[0][2])