wip non-determinism in make test/offline

author Shane Jaroch <chown_tee@proton.me>

Thu, 22 Jan 2026 23:05:35 +0000 (18:05 -0500)

committer Shane Jaroch <chown_tee@proton.me>

Thu, 22 Jan 2026 23:09:24 +0000 (18:09 -0500)
author Shane Jaroch <chown_tee@proton.me>
Thu, 22 Jan 2026 23:05:35 +0000 (18:05 -0500)
committer Shane Jaroch <chown_tee@proton.me>
Thu, 22 Jan 2026 23:09:24 +0000 (18:09 -0500)
diff --git a/getmyancestors/mergemyanc.py b/getmyancestors/mergemyanc.py

index 0d5b7c25d4f21d8a565cb51457169d01f68ab62d..0db172480505ab7f794570e4b032b17691631c66 100755 (executable)
--- a/getmyancestors/mergemyanc.py
+++ b/getmyancestors/mergemyanc.py
@@ -109,13 +109,28 @@ def main(
  
              ged = Gedcom(file, tree)
  
-            # Deduplicate names by string representation
+            # Deduplicate names by string representation (deterministic: first alphabetically wins)
              def merge_names(target_set, source_set):
-                existing_names = {str(n) for n in target_set}
-                for n in source_set:
-                    if str(n) not in existing_names:
+                # Combine all names and sort deterministically
+                all_names = list(target_set) + list(source_set)
+                all_names.sort(key=lambda x: (
+                    str(x),
+                    x.given or "",
+                    x.surname or "",
+                    x.prefix or "",
+                    x.suffix or "",
+                    x.kind or "",
+                    str(x.alternative) if hasattr(x, 'alternative') else "",
+                    x.note.text if hasattr(x, 'note') and x.note else "",
+                ))
+                # Rebuild target_set keeping first occurrence by string
+                target_set.clear()
+                seen = set()
+                for n in all_names:
+                    s = str(n)
+                    if s not in seen:
                          target_set.add(n)
-                        existing_names.add(str(n))
+                        seen.add(s)
  
              # Helper for whitespace normalization in quotes
              def norm_space(s):
diff --git a/tests/fixtures.env b/tests/fixtures.env

index a1d4c9b706c46ea3922e8a41227689fba2e30270..26b281323c4775b5d6cdcc6635bbc69f92d804d3 100644 (file)
--- a/tests/fixtures.env
+++ b/tests/fixtures.env
@@ -4,4 +4,4 @@
  # and should be updated whenever fixtures are regenerated.
  export EXPECTED_ADA_LINES=11587
  export EXPECTED_MARIE_LINES=3698
-export EXPECTED_MERGED_LINES=14481
+export EXPECTED_MERGED_LINES=14483
diff --git a/tests/offline_test.py b/tests/offline_test.py

index f97151b4be06f5f22c54d8bdf94f0da85cf5f978..34e268d38950c7490fa062dd0486f2b541c22b68 100644 (file)
--- a/tests/offline_test.py
+++ b/tests/offline_test.py
@@ -301,11 +301,15 @@ def test_offline():
      else:
          print(f"✓ Marie Curie (Part 2) lines verified ({l_part2}).")
  
-    if l_merged != exp_merged:
-        print(f"❌ Assertion Failed: Merged line count {l_merged} != {exp_merged}")
+    # Check merged file with exact diff (no line count tolerance)
+    diff_result = subprocess.run(
+        ["git", "diff", "--no-index", "--exit-code", "--color=always", str(merged), str(ARTIFACTS_DIR / "merged_scientists.ged")],
+    )
+    if diff_result.returncode != 0:
+        print(f"❌ Merged file differs from artifact (see diff above)")
          failed = True
      else:
-        print(f"✓ Merged lines verified ({l_merged}).")
+        print(f"✓ Merged file matches artifact exactly ({l_merged} lines).")
  
      if failed:
          sys.exit(1)
author	Shane Jaroch <chown_tee@proton.me>
	Thu, 22 Jan 2026 23:05:35 +0000 (18:05 -0500)
committer	Shane Jaroch <chown_tee@proton.me>
	Thu, 22 Jan 2026 23:09:24 +0000 (18:09 -0500)
getmyancestors/mergemyanc.py		patch \| blob \| history
tests/fixtures.env		patch \| blob \| history
tests/offline_test.py		patch \| blob \| history