# Default to stdin
input_handles.append(sys.stdin)
+ # Helper for whitespace normalization in quotes
+ def norm_space(s):
+ return " ".join(s.split()) if s else ""
+
+ # Deduplicate names by string representation (deterministic: first alphabetically wins)
+ def merge_names(target_set, source_set):
+ # Combine all names and sort deterministically
+ all_names = list(target_set) + list(source_set)
+ all_names.sort(
+ key=lambda x: (
+ str(x),
+ x.given or "",
+ x.surname or "",
+ x.prefix or "",
+ x.suffix or "",
+ x.kind or "",
+ str(x.alternative) if hasattr(x, "alternative") else "",
+ x.note.text if hasattr(x, "note") and x.note else "",
+ )
+ )
+ # Rebuild target_set keeping first occurrence by string
+ target_set.clear()
+ seen = set()
+ for n in all_names:
+ s = str(n)
+ if s not in seen:
+ target_set.add(n)
+ seen.add(s)
+
try:
# read the GEDCOM data
for file in input_handles:
ged = Gedcom(file, tree)
- # Deduplicate names by string representation (deterministic: first alphabetically wins)
- def merge_names(target_set, source_set):
- # Combine all names and sort deterministically
- all_names = list(target_set) + list(source_set)
- all_names.sort(key=lambda x: (
- str(x),
- x.given or "",
- x.surname or "",
- x.prefix or "",
- x.suffix or "",
- x.kind or "",
- str(x.alternative) if hasattr(x, 'alternative') else "",
- x.note.text if hasattr(x, 'note') and x.note else "",
- ))
- # Rebuild target_set keeping first occurrence by string
- target_set.clear()
- seen = set()
- for n in all_names:
- s = str(n)
- if s not in seen:
- target_set.add(n)
- seen.add(s)
-
- # Helper for whitespace normalization in quotes
- def norm_space(s):
- return " ".join(s.split()) if s else ""
-
# add information about individuals
new_indi = 0
merged_indi = 0
expectations = load_expectations()
exp_ada = expectations.get("EXPECTED_ADA_LINES", 0)
exp_marie = expectations.get("EXPECTED_MARIE_LINES", 0)
- exp_merged = expectations.get("EXPECTED_MERGED_LINES", 0)
# 2. Setup Cache
setup_cache()
# Check merged file with exact diff (no line count tolerance)
diff_result = subprocess.run(
- ["git", "diff", "--no-index", "--exit-code", "--color=always", str(merged), str(ARTIFACTS_DIR / "merged_scientists.ged")],
+ [
+ "git",
+ "diff",
+ "--no-index",
+ "--exit-code",
+ "--color=always",
+ str(merged),
+ str(ARTIFACTS_DIR / "merged_scientists.ged"),
+ ],
+ check=False,
)
if diff_result.returncode != 0:
- print(f"❌ Merged file differs from artifact (see diff above)")
+ print("❌ Merged file differs from artifact (see diff above)")
failed = True
else:
print(f"✓ Merged file matches artifact exactly ({l_merged} lines).")