name: ci
"on":
- push:
- branches: [master, main]
- pull_request:
- branches: [master, main]
+ push: {}
permissions:
contents: read
run: make format
if: runner.os == 'Linux'
- - name: Check Clean After Format
- if: runner.os == 'Linux'
- run: |
- git diff --exit-code || (echo "❌ Code not formatted. Run 'make format' locally." && exit 1)
-
- name: Lint
run: make lint
if: runner.os == 'Linux'
!.geminiignore
/test_debug.py
-
-# Local tmp dir
.tmp/
+.tmp
tests/cache/
.PHONY: test/unit
test/unit: ##H@@ Run Unit tests only
- $(PYTHON) -m coverage run -p -m pytest -svv getmyancestors/tests
+ $(PYTHON) -m coverage run -p -m pytest getmyancestors/tests
# Installation
.PHONY: deps
# Installation tests
.PHONY: test/install
test/install: ##H@@ Run installation tests
- $(PYTHON) -m coverage run -p -m pytest -svv tests/test_installation.py
+ $(PYTHON) -m coverage run -p -m pytest tests/test_installation.py
.PHONY: test/offline
test/offline: ##H@@ Run offline verification (requires fixtures)
- $(PYTHON) -m pytest -svv tests/offline_test.py
+ $(PYTHON) -m pytest tests/offline_test.py
# Generate targets for all test files (enables autocomplete)
.PHONY: $(TEST_TARGETS)
$(TEST_TARGETS): test/unit/%:
- $(PYTHON) -m pytest -svv getmyancestors/tests/$*.py
+ pytest getmyancestors/tests/$*.py -v
.PHONY: test/
test/: ##H@@ Run unit & E2E tests
REMOTE_HEAD ?= origin/master
-REMOTE_HEAD_SUBMODULE ?= origin/master
-
-PY_CHANGED_FILES_BASE ?= $(shell git diff --name-only --diff-filter=MACRU $(REMOTE_HEAD) '*.py')
-PY_CHANGED_FILES_SUBMODULE ?= $(shell cd res/testdata && git diff --name-only --diff-filter=MACRU HEAD $(REMOTE_HEAD_SUBMODULE)'*.py')
-PY_CHANGED_FILES ?= $(sort $(PY_CHANGED_FILES_BASE) $(PY_CHANGED_FILES_SUBMODULE))
-PY_CHANGED_FILES_FLAG ?= $(if $(strip $(PY_CHANGED_FILES)),1,)
-
+PY_CHANGED_FILES ?= $(shell git diff --name-only --diff-filter=MACU $(REMOTE_HEAD) '*.py')
+PY_CHANGED_FILES_FLAG ?= $(if $(PY_CHANGED_FILES),1,)
SH_ALL_FILES ?= $(shell git ls-files '*.sh')
PRETTIER_ALL_FILES ?= $(shell git ls-files '*.js' '*.css' '*.html' '*.md' '*.yaml' '*.yml')
+import os
+import sys
from typing import Optional
from getmyancestors.classes.constants import FACT_TYPES, ORDINANCES
Ordinance,
Source,
)
-from getmyancestors.classes.tree.utils import warn
+
+
+def _warn(msg: str):
+ """Write a warning message to stderr with optional color (if TTY)."""
+ use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "")
+ if use_color:
+ sys.stderr.write(f"\033[33m{msg}\033[0m\n")
+ else:
+ sys.stderr.write(f"{msg}\n")
class Gedcom:
for num, indi in self.indi.items():
if indi.fid is None:
name_str = str(indi.name) if indi.name else "Unknown"
- warn(
+ _warn(
f"Warning: Individual @I{num}@ ({name_str}) missing _FSFTID tag, "
f"using GEDCOM pointer as fallback."
)
w = self.indi[fam.wife_num]
wife_name = str(w.name) if w.name else "Unknown"
- warn(
+ _warn(
f"Warning: Family @F{num}@ ({husb_name} & {wife_name}) missing _FSFTID tag, "
f"using GEDCOM pointer as fallback."
)
================================================================================
This program is free software: you can redistribute it and/or modify it under
-the terms of the GNU Affero General Public License as published by the Free Software
+the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
By using this software to access FamilySearch, you also agree to:
5. Respect the privacy of living individuals in any downloaded data
6. Accept that FamilySearch may revoke API access for violations
-DO NOT USE THE TOOL EXCESSIVELY! DOWNLOAD YOUR FAMILY'S GEDCOM AND USE IT OFFLINE.
+DO NOT USE THE TOOL EXCESSIVELY!
+DOWNLOAD YOUR FAMILY'S GEDCOM AND USE IT OFFLINE.
BE RESPECTFUL OF FAMILYSEARCH'S SERVERS AND RESPECT THEIR TERMS OF USE.
-ONLY DEVELOP THE TOOL INSOFAR AS IS NECESSARY TO EITHER:
- 1. REDUCE OVERALL LOAD ON FAMILYSEARCH'S SERVERS.
- 2. IMPROVE THE USER EXPERIENCE/RESOLVE BUGS.
-DO NOT DEVELOP THE TOOL IN A WAY THAT AIDS THOSE SEEKING TO SCRAPE OR BULK EXTRACT DATA.
-
================================================================================
"""
)
os.makedirs(cache_dir, exist_ok=True)
self.db_path = os.path.join(cache_dir, "session.sqlite")
- # Cookie file is now stored in cache directory
+ # Cookie file is now stored in cache directory too
self.cookie_file = os.path.join(cache_dir, "cookies.json")
self._init_db()
self.check_license()
except Exception as e:
self.write_log("Error loading session from JSON: " + str(e))
+ # 2. Legacy Migration: checking old cookie file if it exists
+ if os.path.exists(self.cookie_file):
+ try:
+ with open(self.cookie_file, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ self._apply_session_data(data)
+ # We do NOT auto-save to new JSON here to respect read-only/security.
+ # It will save to new JSON only on next login/save_cookies call.
+ if self.verbose:
+ self.write_log(
+ "Session loaded (migrated) from legacy JSON: "
+ + self.cookie_file
+ )
+ return True
+ except Exception as e:
+ self.write_log("Error loading legacy cookie file: " + str(e))
+
return False
def _apply_session_data(self, data):
"""Core classes: Indi, Fam, Tree"""
+# pylint: disable=too-many-lines
+
import asyncio
import hashlib
import os
import time
import xml.etree.ElementTree as ET
from datetime import datetime
-from enum import Enum
from typing import Any, BinaryIO, Dict, Iterable, List, Optional, Set, Tuple, Union
# global imports
from getmyancestors import __version__
from getmyancestors.classes.constants import MAX_PERSONS
from getmyancestors.classes.session import GMASession
-from getmyancestors.classes.tree.utils import warn
from .elements import Citation, Name, Ordinance, Place
from .records import Fact, Memorie, Note, Source
from .utils import GEONAME_FEATURE_MAP, cont
-# pylint: disable=too-many-lines
-
-
-class ParentRelType(Enum):
- """Parent-child relationship type for GEDCOM PEDI tag"""
-
- BIRTH = "birth"
- ADOPTED = "adopted"
- STEP = "step"
- FOSTER = "foster"
-
- @classmethod
- def from_fs_type(
- cls, facts: Optional[List[Dict[str, Any]]]
- ) -> Optional["ParentRelType"]:
- """Convert FamilySearch relationship facts to ParentRelType"""
- if not facts:
- return None
-
- mapping = {
- "http://gedcomx.org/BiologicalParent": cls.BIRTH,
- "http://gedcomx.org/AdoptiveParent": cls.ADOPTED,
- "http://gedcomx.org/StepParent": cls.STEP,
- "http://gedcomx.org/FosterParent": cls.FOSTER,
- }
-
- for fact in facts:
- f_type = fact.get("type")
- if f_type in mapping:
- return mapping[f_type]
-
- # Failed to find a match, return unknown type
- return None
-
class Indi:
"""GEDCOM individual class
- :param fid: FamilySearch id
+ :param fid' FamilySearch id
:param tree: a tree object
:param num: the GEDCOM identifier
"""
self.tree = tree
self.num_prefix = "I"
self.origin_file: Optional[str] = None
- self.famc: Set[Tuple["Fam", Optional[ParentRelType]]] = set()
+ self.famc: Set["Fam"] = set()
self.fams: Set["Fam"] = set()
self.famc_fid: Set[str] = set()
self.fams_fid: Set[str] = set()
self.name: Optional[Name] = None
self.gender: Optional[str] = None
self.living: Optional[bool] = None
- # (father_id, mother_id, father_rel_type, mother_rel_type)
- self.parents: Set[
- Tuple[
- Optional[str],
- Optional[str],
- Optional[ParentRelType],
- Optional[ParentRelType],
- ]
- ] = set()
+ self.parents: Set[Tuple[Optional[str], Optional[str]]] = (
+ set()
+ ) # (father_id, mother_id)
self.spouses: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = (
set()
) # (person1, person2, relfid)
self.facts.add(
Fact(x, self.tree, num_prefix=f"INDI_{self.fid}")
)
- if "sources" in data and self.tree and self.tree.fs:
- sources = self.tree.fs.get_url(
- "/platform/tree/persons/%s/sources" % self.fid
- )
- if sources:
- for quote in sources["persons"][0]["sources"]:
- source_id = quote["descriptionId"]
- source_data = next(
- (
- s
- for s in sources["sourceDescriptions"]
- if s["id"] == source_id
- ),
- None,
- )
- if self.tree:
- if source_data:
- source = self.tree.ensure_source(source_data)
- else:
- existing_source = self.tree.sources.get(source_id)
- if existing_source:
- source = existing_source
- else:
- source = self.tree.ensure_source({"id": source_id})
- else:
- source = None
+ if "sources" in data and self.tree and self.tree.fs:
+ sources = self.tree.fs.get_url(
+ "/platform/tree/persons/%s/sources" % self.fid
+ )
+ if sources:
+ for quote in sources["persons"][0]["sources"]:
+ source_id = quote["descriptionId"]
+ source_data = next(
+ (
+ s
+ for s in sources["sourceDescriptions"]
+ if s["id"] == source_id
+ ),
+ None,
+ )
+ source = (
+ self.tree.ensure_source(source_data)
+ if self.tree and source_data
+ else None
+ )
if source and self.tree:
citation = self.tree.ensure_citation(quote, source)
self.citations.add(citation)
"""add family fid (for spouse or parent)"""
self.fams.add(fam)
- def add_famc(self, fam: "Fam", rel_type: Optional[ParentRelType] = None):
- """add family fid (for child) with optional relationship type"""
- self.famc.add((fam, rel_type))
+ def add_famc(self, fam: "Fam"):
+ """add family fid (for child)"""
+ self.famc.add(fam)
def get_notes(self):
"""retrieve individual notes"""
ET.SubElement(person, "parentin", hlink=fam.handle)
if self.famc:
- for fam, _rel_type in self.famc:
+ for fam in self.famc:
ET.SubElement(person, "childof", hlink=fam.handle)
for fact in self.facts:
self.sealing_child.print(file)
for fam in sorted(self.fams, key=lambda x: x.id or ""):
file.write("1 FAMS @F%s@\n" % fam.id)
- for fam, rel_type in sorted(self.famc, key=lambda x: x[0].id or ""):
+ for fam in sorted(self.famc, key=lambda x: x.id or ""):
file.write("1 FAMC @F%s@\n" % fam.id)
- # Output PEDI tag for explicit relationship type
- if rel_type:
- file.write("2 PEDI %s\n" % rel_type.value)
- else:
- warn(f"Missing PEDI type for {self.fid} in family {fam.id}")
# print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr)
# for num in self.fams_ids:
# print(f'Famc Ids: {self.famc_ids}', file=sys.stderr)
father: str | None = rel.get("parent1", {}).get("resourceId")
mother: str | None = rel.get("parent2", {}).get("resourceId")
child: str | None = rel.get("child", {}).get("resourceId")
-
- # Extract relationship types from fatherFacts/motherFacts
- father_rel = None
- mother_rel = None
- for fact in rel.get("fatherFacts", []):
- if "type" in fact:
- father_rel = ParentRelType.from_fs_type(fact["type"])
- break
- for fact in rel.get("motherFacts", []):
- if "type" in fact:
- mother_rel = ParentRelType.from_fs_type(fact["type"])
- break
-
- # Store parent relationship with types
if child in self.indi:
- self.indi[child].parents.add(
- (father, mother, father_rel, mother_rel)
- )
+ self.indi[child].parents.add((father, mother))
if father in self.indi:
self.indi[father].children.add((father, mother, child))
if mother in self.indi:
# if (father, mother) not in self.fam:
# self.fam[(father, mother)] = Fam(father, mother, self)
- def add_trio(
- self,
- father: Indi | None,
- mother: Indi | None,
- child: Indi | None,
- father_rel: Optional[ParentRelType] = None,
- mother_rel: Optional[ParentRelType] = None,
- ):
+ def add_trio(self, father: Indi | None, mother: Indi | None, child: Indi | None):
"""add a children relationship to the family tree
- :param father: the father Indi or None
- :param mother: the mother Indi or None
- :param child: the child Indi or None
- :param father_rel: relationship type to father (birth, step, adopted, foster)
- :param mother_rel: relationship type to mother (birth, step, adopted, foster)
+ :param father: the father fid or None
+ :param mother: the mother fid or None
+ :param child: the child fid or None
"""
fam = self.ensure_family(father, mother)
if child is not None:
fam.add_child(child)
- # Use the more specific relationship type (default to birth if both are the same)
- rel_type = father_rel or mother_rel
- child.add_famc(fam, rel_type)
+ child.add_famc(fam)
if father is not None:
father.add_fams(fam)
fids_list = [f for f in fids if f in self.indi]
parents = set()
for fid in fids_list:
- for father, mother, _, _ in self.indi[fid].parents:
- if father:
- parents.add(father)
- if mother:
- parents.add(mother)
+ for couple in self.indi[fid].parents:
+ parents |= set(couple)
if parents:
parents -= set(self.exclude)
self.add_indis(set(filter(None, parents)))
for fid in fids_list:
- for father, mother, father_rel, mother_rel in self.indi[fid].parents:
+ for father, mother in self.indi[fid].parents:
self.add_trio(
self.indi.get(father) if father else None,
self.indi.get(mother) if mother else None,
self.indi.get(fid) if fid else None,
- father_rel,
- mother_rel,
)
- return parents
+ return set(filter(None, parents))
def add_spouses(self, fids: Iterable[str]):
"""add spouse relationships
"""Utility constants and functions for tree package"""
-import os
import re
-import sys
-
-
-def warn(msg: str):
- """Write a warning message to stderr with optional color (if TTY)."""
- use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "")
- if use_color:
- sys.stderr.write(f"\033[1;33m{msg}\033[0m\n") # Bold yellow
- else:
- sys.stderr.write(f"{msg}\n")
-
# Constants
COUNTY = "County"
# Default to stdin
input_handles.append(sys.stdin)
- # Helper for whitespace normalization in quotes
- def norm_space(s):
- return " ".join(s.split()) if s else ""
-
- # Deduplicate names by string representation (deterministic: first alphabetically wins)
- def merge_names(target_set, source_set):
- # Combine all names and sort deterministically
- all_names = list(target_set) + list(source_set)
- all_names.sort(
- key=lambda x: (
- str(x),
- x.given or "",
- x.surname or "",
- x.prefix or "",
- x.suffix or "",
- x.kind or "",
- str(x.alternative) if hasattr(x, "alternative") else "",
- x.note.text if hasattr(x, "note") and x.note else "",
- )
- )
- # Rebuild target_set keeping first occurrence by string
- target_set.clear()
- seen = set()
- for n in all_names:
- s = str(n)
- if s not in seen:
- target_set.add(n)
- seen.add(s)
-
try:
# read the GEDCOM data
for file in input_handles:
ged = Gedcom(file, tree)
+ # Deduplicate names by string representation (deterministic: first alphabetically wins)
+ def merge_names(target_set, source_set):
+ # Combine all names and sort deterministically
+ all_names = list(target_set) + list(source_set)
+ all_names.sort(key=lambda x: (
+ str(x),
+ x.given or "",
+ x.surname or "",
+ x.prefix or "",
+ x.suffix or "",
+ x.kind or "",
+ str(x.alternative) if hasattr(x, 'alternative') else "",
+ x.note.text if hasattr(x, 'note') and x.note else "",
+ ))
+ # Rebuild target_set keeping first occurrence by string
+ target_set.clear()
+ seen = set()
+ for n in all_names:
+ s = str(n)
+ if s not in seen:
+ target_set.add(n)
+ seen.add(s)
+
+ # Helper for whitespace normalization in quotes
+ def norm_space(s):
+ return " ".join(s.split()) if s else ""
+
# add information about individuals
new_indi = 0
merged_indi = 0
for chil_fid in fam.chil_fid:
if chil_fid in tree.indi:
fam.children.add(tree.indi[chil_fid])
- tree.indi[chil_fid].famc.add((fam, None))
+ tree.indi[chil_fid].famc.add(fam)
# compute number for family relationships and print GEDCOM file
tree.reset_num()
i1 = Indi("I1", self.tree)
self.tree.indi["I1"] = i1
- # Manually populate parents list for I1 (father, mother, father_rel, mother_rel)
- i1.parents = {("I2", "I3", None, None)} # Father, Mother, no rel types
+ # Manually populate parents list for I1
+ i1.parents = {("I2", "I3")} # Father, Mother
# Case 1: No exclude
self.tree.exclude = []
name = "getmyancestors"
description = "Retrieve GEDCOM data from FamilySearch Tree"
requires-python = ">=3.7"
-license = {text = "GNU Affero General Public License v3 (AGPLv3)"}
+license = {text = "GNU"}
keywords = [
"getmyancestors",
"familysearch",
]
classifiers = [
"Environment :: Console",
- "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+ "License :: OSI Approved :: GNU General Public License (GPL)",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3 :: Only",
]
data_file = ".tmp/.coverage"
[tool.coverage.report]
-fail_under = 68.27
+fail_under = 45.00
precision = 2
show_missing = true
-Subproject commit 82518a63992b6b465da6081a6d2a54d2f54708db
+Subproject commit cefbd8dbd42cbb85209bae8e242e57add0c0e520
# Shared expectations for test fixtures
# These values are baselined from "Live" fixture generation
# and should be updated whenever fixtures are regenerated.
-export EXPECTED_ADA_LINES=10902
-export EXPECTED_MARIE_LINES=3444
-export EXPECTED_MERGED_LINES=13536
+export EXPECTED_ADA_LINES=11587
+export EXPECTED_MARIE_LINES=3698
+export EXPECTED_MERGED_LINES=14480
print(f"✓ {label} matches artifact exactly.")
return True
- print(f"⚠️ {label} differs from artifact. Showing diff (first 100 lines):")
- subprocess.run(
- f"diff --color=always {generated_path} {artifact_path} | head -100",
- shell=True,
- check=False,
- )
- print("...")
+ print(f"⚠️ {label} differs from artifact. Showing diff (first 10 lines):")
print("Diff Stat:")
subprocess.run(
[
],
check=False,
)
+ print("...")
+ subprocess.run(
+ ["diff", "--color=always", str(generated_path), str(artifact_path)], check=False
+ )
print(f"❌ Verified failed for {label}")
return False
expectations = load_expectations()
exp_ada = expectations.get("EXPECTED_ADA_LINES", 0)
exp_marie = expectations.get("EXPECTED_MARIE_LINES", 0)
+ exp_merged = expectations.get("EXPECTED_MERGED_LINES", 0)
# 2. Setup Cache
setup_cache()
env["FAMILYSEARCH_PASS"] = env.get("FAMILYSEARCH_PASS", "dummy_password")
env["GMA_OFFLINE_MODE"] = "1"
env["GMA_DEBUG"] = "1"
- env["PYTHONHASHSEED"] = "0"
if "NO_CACHE" in env:
del env["NO_CACHE"]
# Check merged file with exact diff (no line count tolerance)
diff_result = subprocess.run(
- [
- "git",
- "diff",
- "--no-index",
- "--exit-code",
- "--color=always",
- str(merged),
- str(ARTIFACTS_DIR / "merged_scientists.ged"),
- ],
- check=False,
- )
- print("Diff Stat:")
- subprocess.run(
- [
- "git",
- "diff",
- "--no-index",
- "--stat",
- str(merged),
- str(ARTIFACTS_DIR / "merged_scientists.ged"),
- ],
- check=False,
+ ["git", "diff", "--no-index", "--exit-code", "--color=always", str(merged), str(ARTIFACTS_DIR / "merged_scientists.ged")],
)
if diff_result.returncode != 0:
- print("❌ Merged file differs from artifact (see diff above)")
+ print(f"❌ Merged file differs from artifact (see diff above)")
failed = True
else:
print(f"✓ Merged file matches artifact exactly ({l_merged} lines).")
)
if not all_matched:
- print(
- "❌ Offline Test Failed due to artifact mismatch (One of Part1, Part2, or Merged)"
- )
+ print("❌ Offline Test Failed due to artifact mismatch")
sys.exit(1)
print("✅ Offline Test Complete!")
text=True,
cwd=self.project_root,
check=True,
- timeout=300, # 5 minute timeout
)
# Test that the package can be imported