--- /dev/null
+if [ -f .venv/bin/activate ]; then
+ source .venv/bin/activate
+fi
+unset PS1
+if [ -f .env ]; then
+ dotenv .env
+fi
+export PYTHONPATH=.
+
--- /dev/null
+!.gemini/
+!test_debug.py
+
+.venv/
+build
+*.egg-info
+!.envrc
+!.env
+
+.coverage
+http_cache
+.pytest_cache
+.tmp
--- /dev/null
+# SQLite files use git-sqlite-filter for readable diffs
+*.sqlite filter=sqlite diff=sqlite
Expected behavior:
-Corresponding Familysearch link (optional):
\ No newline at end of file
+Corresponding Familysearch link (optional):
--- /dev/null
+---
+name: ci
+
+"on":
+ push: {}
+
+permissions:
+ contents: read
+
+jobs:
+ test-core:
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-latest, windows-latest]
+ runs-on: ${{ matrix.os }}
+ env:
+ SKIP_VENV: 1
+ steps:
+ - name: Install git-sqlite-filter
+ run: |
+ pip install pipx
+ pipx install git+https://github.com/gamesguru/git-sqlite-filter.git@c2a4089
+ pipx ensurepath
+ echo "$HOME/.local/bin" >> $GITHUB_PATH
+ git config --global filter.sqlite.clean "git-sqlite-clean %f"
+ git config --global filter.sqlite.smudge "git-sqlite-smudge %f"
+ git config --global filter.sqlite.required true
+
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+
+ - name: Fetch master
+ run: git fetch origin master
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: 3
+ cache: "pip"
+
+ - name: Install dependencies
+ run: pip install ".[dev]"
+
+ - name: Format Check
+ run: make format
+ if: runner.os == 'Linux'
+
+ - name: Lint
+ run: make lint
+ if: runner.os == 'Linux'
+
+ - name: Test [Unit]
+ run: make test/unit
+
+ - name: Test [Offline Verification]
+ shell: bash
+ # Fails due to bash script not being cross-platform out of the box
+ continue-on-error: ${{ matrix.os == 'windows-latest' }}
+ run: make test/offline
+
+ - name: Combine Coverage
+ run: make test/cov
+
+ - name: Coveralls
+ uses: coverallsapp/github-action@v2
+ with:
+ parallel: true
+ flag-name: run-${{ matrix.os }}
+
+ finish:
+ needs: test-core
+ if: ${{ always() }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Coveralls Finished
+ uses: coverallsapp/github-action@v2
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ parallel-finished: true
# Redis
dump.rdb
-# Dotfiles
-.*
-!.gitignore
-!.readthedocs.yml
-
# vscode
.vscode/
# getmyancestors stuff
*.log
+*.txt
*.settings
-*.ged
\ No newline at end of file
+*.ged
+*.db
+*.sqlite
+*.sqlite3
+
+!.geminiignore
+/test_debug.py
+
+.tmp/
+.tmp
+tests/cache/
--- /dev/null
+[submodule "tests/data"]
+ path = res/testdata
+ url = https://gitlab.com/gg-io/getmyancestors-tests-data.git
--- /dev/null
+# Benoît Fontaine
+Linekio <benoitfontaine.ba@gmail.com> Benoît Fontaine <benoitfontaine.ba@gmail.com>
+Linekio <benoitfontaine.ba@gmail.com> Linekio <benoitfontaine.ba@gmail.com>
+Linekio <benoitfontaine.ba@gmail.com> <benoit.fontaine@acft.fr>
+Linekio <benoitfontaine.ba@gmail.com> <benoit.fontaine@siqual.fr>
+Linekio <benoitfontaine.ba@gmail.com> <benoitfontaine@phytocontrol.com>
+Linekio <benoitfontaine.ba@gmail.com> linek <benoitfontaine.ba@gmail.com>
+Linekio <benoitfontaine.ba@gmail.com> Benoît <benoitfontaine.ba@gmail.com>
+Linekio <benoitfontaine.ba@gmail.com> benoit-phytocontrol <benoitfontaine@phytocontrol.com>
+
+# Adriaan Joubert
+adriaanjoubert <adriaan@joubert.xyz> <45142747+adriaanjoubert@users.noreply.github.com>
+adriaanjoubert <adriaan@joubert.xyz> Adriaan Joubert <adriaan@joubert.xyz>
+
+# Fred Wright
+fhgwright <fw@fwright.net> Fred Wright <fw@fwright.net>
+
+# Melroy van den Berg
+melroy89 <melroy@melroy.org> Melroy van den Berg <melroy@melroy.org>
+
+# Giulio Genovese
+a2800276 <giulio.genovese@gmail.com> Giulio Genovese <giulio.genovese@gmail.com>
+
+# Tim Becker
+freeseek <tim@presseverykey.com> Tim Becker <tim@presseverykey.com>
+
+# Jadson Matos
+jadsongmatos <jadson.g-matos@outlook.com> jadsongmatos <jadson.g-matos@outlook.com>
+
+# Clean IDs (easy mappings)
+changeling <cklarson@gmail.com>
+bsudy <barnabas.sudy@gmail.com>
+gamesguru <chown_tee@proton.me>
+josemando <josemando@gmail.com>
+sebdu66 <52951441+sebdu66@users.noreply.github.com>
--- /dev/null
+SHELL:=/bin/bash
+PYTHON ?= python3
+.DEFAULT_GOAL=_help
+
+.PHONY: _help
+_help:
+ @printf "\nUsage: make <command>, valid commands:\n\n"
+ @grep -h "##H@@" $(MAKEFILE_LIST) | grep -v IGNORE_ME | sed -e 's/##H@@//' | column -t -s $$'\t'
+
+# help: ## Show this help
+# @grep -Eh '\s##\s' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
+
+
+# -include .env
+
+
+.PHONY: test/unit
+test/unit: ##H@@ Run Unit tests only
+ $(PYTHON) -m coverage run -p -m pytest getmyancestors/tests
+
+# Installation
+.PHONY: deps
+deps: ##H@@ Install dependencies
+ $(PYTHON) -m pip install --no-user ".[dev]"
+
+# Installation tests
+.PHONY: test/install
+test/install: ##H@@ Run installation tests
+ $(PYTHON) -m coverage run -p -m pytest tests/test_installation.py
+
+.PHONY: test/offline
+test/offline: ##H@@ Run offline verification (requires fixtures)
+ $(PYTHON) -m pytest tests/offline_test.py
+
+
+# Generate targets for all test files (enables autocomplete)
+TEST_FILES := $(wildcard getmyancestors/tests/test_*.py)
+TEST_TARGETS := $(patsubst getmyancestors/tests/%.py,test/unit/%,$(TEST_FILES))
+
+.PHONY: $(TEST_TARGETS)
+$(TEST_TARGETS): test/unit/%:
+ pytest getmyancestors/tests/$*.py -v
+
+.PHONY: test/
+test/: ##H@@ Run unit & E2E tests
+test/: test/unit test/offline test/cov
+
+.PHONY: test/cov
+test/cov: ##H@@ Combine all coverage data and show report
+ -$(PYTHON) -m coverage combine
+ $(PYTHON) -m coverage report
+
+
+REMOTE_HEAD ?= origin/master
+PY_CHANGED_FILES ?= $(shell git diff --name-only --diff-filter=MACU $(REMOTE_HEAD) '*.py')
+PY_CHANGED_FILES_FLAG ?= $(if $(PY_CHANGED_FILES),1,)
+SH_ALL_FILES ?= $(shell git ls-files '*.sh')
+PRETTIER_ALL_FILES ?= $(shell git ls-files '*.js' '*.css' '*.html' '*.md' '*.yaml' '*.yml')
+
+.PHONY: format
+format: ##H@@ Format with black & isort
+ # ==================================================
+ # formatting
+ # ==================================================
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # prettier (optional)
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ -prettier --write ${PRETTIER_ALL_FILES}
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # shfmt (optional)
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ -shfmt -w ${SH_ALL_FILES}
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # isort
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \
+ isort ${PY_CHANGED_FILES}; \
+ fi
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # black
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \
+ black ${PY_CHANGED_FILES}; \
+ fi
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # ruff (format)
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \
+ ruff check --fix --exit-zero $(ARGS) ${PY_CHANGED_FILES}; \
+ fi
+
+.PHONY: lint/
+lint/: lint/ruff lint/pylint lint/mypy
+lint/: ##H@@ Lint with ruff, pylint, and mypy
+
+.PHONY: lint
+lint: lint/
+
+.PHONY: lint/ruff
+lint/ruff:
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # ruff (lint)
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \
+ ruff check ${PY_CHANGED_FILES}; \
+ fi
+
+.PHONY: lint/pylint
+lint/pylint:
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # pylint
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \
+ pylint -j 0 ${PY_CHANGED_FILES}; \
+ fi
+
+.PHONY: lint/mypy
+lint/mypy:
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # mypy
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ -if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \
+ mypy ${PY_CHANGED_FILES}; \
+ fi
+
+.PHONY: pylint
+pylint: lint/pylint
+
+.PHONY: mypy
+mypy: lint/mypy
+
+
+.PHONY: clean
+clean: ##H@@ Clean up build files/cache
+ rm -rf *.egg-info build dist .coverage .coverage.*
+ rm -rf .tmp .pytest_cache .ruff_cache .mypy_cache
+ # One unified find command to clean python artifacts while ignoring .venv
+ find . -type d -name ".venv" -prune -o \
+ \( \( -name "__pycache__" -o -name "http_cache" \) -type d -o \
+ \( -name "*.pyc" -o -name "*.pyo" -o -name "*.pyd" -o -name "*.so" \) -type f \) \
+ -exec rm -rf {} +
+ @echo "✓ Cleaned build files, caches, and test artifacts"
-getmyancestors
-==============
+# getmyancestors
_getmyancestors_ is a python3 package that downloads family trees in GEDCOM format from FamilySearch.
This script requires python3 and the modules indicated in the requirements.txt file. To install the modules, run in your terminal:
-
-Installation
-============
+# Installation
The easiest way to install _getmyancestors_ is to use pip:
`pip install .`
-How to use
-==========
-
-With graphical user interface:
-
-```
-fstogedcom
-```
-
-Command line examples:
-
-Download four generations of ancestors for the main individual in your tree and output gedcom on stdout (will prompt for username and password):
-
-```
-getmyancestors
-```
-
-Download four generations of ancestors and output gedcom to a file while generating a verbode stderr (will prompt for username and password):
-
-```
-getmyancestors -o out.ged -v
-```
-
-Download four generations of ancestors for individual LF7T-Y4C and generate a verbose log file:
-
-```
-getmyancestors -u username -p password -i LF7T-Y4C -o out.ged -l out.log -v
-```
+For development with linting and testing tools:
-Download six generations of ancestors for individual LF7T-Y4C and generate a verbose log file:
+`pip install ".[dev]"`
-```
-getmyancestors -a 6 -u username -p password -i LF7T-Y4C -o out.ged -l out.log -v
-```
+### GUI Installation (optional)
-Download four generations of ancestors for individual LF7T-Y4C including all their children and their children spouses:
+For the graphical interface (`fstogedcom`), you may need to install Tkinter:
-```
-getmyancestors -d 1 -m -u username -p password -i LF7T-Y4C -o out.ged
-```
+- **Ubuntu/Debian**: `sudo apt install python3-tk`
+- **Fedora/RHEL**: `sudo dnf install python3-tkinter`
+- **macOS**: `brew install python-tk` or use the official Python installer
+- **Windows**: Usually included with Python installation
-Download six generations of ancestors for individuals L4S5-9X4 and LHWG-18F including all their children, grandchildren and their spouses:
+# How to use
-```
-getmyancestors -a 6 -d 2 -m -u username -p password -i L4S5-9X4 LHWG-18F -o out.ged
-```
-
-Download four generations of ancestors for individual LF7T-Y4C including LDS ordinances (need LDS account)
-
-```
-getmyancestors -c -u username -p password -i LF7T-Y4C -o out.ged
-```
-
-Merge two Gedcom files
-
-```
-mergemyancestors -i file1.ged file2.ged -o out.ged
-```
-
-
-Support
-=======
-
-Submit questions or suggestions, or feature requests by opening an Issue at https://github.com/Linekio/getmyancestors/issues
-
-Donation
-========
-
-If this project help you, you can give me a tip :)
-
-[](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=98X3CY93XTAYJ)
+With graphical user interface:
# coding: utf-8
-from . import getmyancestors
-from . import mergemyancestors
-__version__ = "1.0.6"
+__version__ = "1.1.2"
-from getmyancestors import getmyancestors
+from .getmyanc import app
-getmyancestors.main()
+if __name__ == "__main__":
+ app()
MAX_PERSONS = 200
FACT_TAG_EVENT_TYPE = {
- 'BIRT': 'Birth',
- 'DEAT': 'Death',
- 'BURI': 'Burial',
- 'CREM': 'Cremation',
- 'NATU': 'Naturalization',
+ "BIRT": "Birth",
+ "DEAT": "Death",
+ "BURI": "Burial",
+ "CREM": "Cremation",
+ "NATU": "Naturalization",
}
FACT_TAGS = {
"http://gedcomx.org/Cremation": "CREM",
"http://gedcomx.org/Caste": "CAST",
"http://gedcomx.org/Nationality": "NATI",
+ "http://gedcomx.org/Immigration": "IMMI",
}
FACT_EVEN = {
"NotNeeded": "INFANT",
}
+
# mergemyancestors constants and functions
-def reversed_dict(d):
+def reversed_dict(d: dict) -> dict:
return {val: key for key, val in d.items()}
-# mergemyancestors classes
+import os
+import sys
+from typing import Optional
+
+from getmyancestors.classes.constants import FACT_TYPES, ORDINANCES
from getmyancestors.classes.tree import (
- Indi,
Fact,
Fam,
+ Indi,
Memorie,
Name,
Note,
Ordinance,
Source,
)
-from getmyancestors.classes.constants import FACT_TYPES, ORDINANCES
+
+
+def _warn(msg: str):
+ """Write a warning message to stderr with optional color (if TTY)."""
+ use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "")
+ if use_color:
+ sys.stderr.write(f"\033[33m{msg}\033[0m\n")
+ else:
+ sys.stderr.write(f"{msg}\n")
class Gedcom:
self.tree = tree
self.level = 0
self.pointer = None
- self.tag = None
- self.data = None
+ self.tag: Optional[str] = None
+ self.data: Optional[str] = None
self.flag = False
self.indi = dict()
self.fam = dict()
def __parse(self):
"""Parse the GEDCOM file into self.tree"""
while self.__get_line():
- if self.tag == "INDI":
- self.num = int(self.pointer[2 : len(self.pointer) - 1])
+ if self.tag == "INDI" and self.pointer:
+ self.num = self.pointer[2 : len(self.pointer) - 1]
self.indi[self.num] = Indi(tree=self.tree, num=self.num)
self.__get_indi()
- elif self.tag == "FAM":
- self.num = int(self.pointer[2 : len(self.pointer) - 1])
+ elif self.tag == "FAM" and self.pointer:
+ self.num = self.pointer[2 : len(self.pointer) - 1]
if self.num not in self.fam:
self.fam[self.num] = Fam(tree=self.tree, num=self.num)
self.__get_fam()
- elif self.tag == "NOTE":
- self.num = int(self.pointer[2 : len(self.pointer) - 1])
+ elif self.tag == "NOTE" and self.pointer:
+ self.num = self.pointer[2 : len(self.pointer) - 1]
if self.num not in self.note:
self.note[self.num] = Note(tree=self.tree, num=self.num)
self.__get_note()
elif self.tag == "SOUR" and self.pointer:
- self.num = int(self.pointer[2 : len(self.pointer) - 1])
+ self.num = self.pointer[2 : len(self.pointer) - 1]
if self.num not in self.sour:
self.sour[self.num] = Source(num=self.num)
self.__get_source()
elif self.tag == "SLGC":
self.indi[self.num].sealing_child = self.__get_ordinance()
elif self.tag == "FAMS":
- self.indi[self.num].fams_num.add(int(self.data[2 : len(self.data) - 1]))
+ if self.data:
+ self.indi[self.num].fams_num.add(self.data[2 : len(self.data) - 1])
elif self.tag == "FAMC":
- self.indi[self.num].famc_num.add(int(self.data[2 : len(self.data) - 1]))
+ if self.data:
+ self.indi[self.num].famc_num.add(self.data[2 : len(self.data) - 1])
elif self.tag == "_FSFTID":
self.indi[self.num].fid = self.data
elif self.tag == "NOTE":
- num = int(self.data[2 : len(self.data) - 1])
- if num not in self.note:
- self.note[num] = Note(tree=self.tree, num=num)
- self.indi[self.num].notes.add(self.note[num])
+ if self.data:
+ num = self.data[2 : len(self.data) - 1]
+ if num not in self.note:
+ self.note[num] = Note(tree=self.tree, num=num)
+ self.indi[self.num].notes.add(self.note[num])
elif self.tag == "SOUR":
self.indi[self.num].sources.add(self.__get_link_source())
elif self.tag == "OBJE":
"""Parse a family"""
while self.__get_line() and self.level > 0:
if self.tag == "HUSB":
- self.fam[self.num].husb_num = int(self.data[2 : len(self.data) - 1])
+ if self.data:
+ self.fam[self.num].husb_num = self.data[2 : len(self.data) - 1]
elif self.tag == "WIFE":
- self.fam[self.num].wife_num = int(self.data[2 : len(self.data) - 1])
+ if self.data:
+ self.fam[self.num].wife_num = self.data[2 : len(self.data) - 1]
elif self.tag == "CHIL":
- self.fam[self.num].chil_num.add(int(self.data[2 : len(self.data) - 1]))
+ if self.data:
+ self.fam[self.num].chil_num.add(self.data[2 : len(self.data) - 1])
elif self.tag in FACT_TYPES:
self.fam[self.num].facts.add(self.__get_fact())
elif self.tag == "SLGS":
elif self.tag == "_FSFTID":
self.fam[self.num].fid = self.data
elif self.tag == "NOTE":
- num = int(self.data[2 : len(self.data) - 1])
- if num not in self.note:
- self.note[num] = Note(tree=self.tree, num=num)
- self.fam[self.num].notes.add(self.note[num])
+ if self.data:
+ num = self.data[2 : len(self.data) - 1]
+ if num not in self.note:
+ self.note[num] = Note(tree=self.tree, num=num)
+ self.fam[self.num].notes.add(self.note[num])
elif self.tag == "SOUR":
self.fam[self.num].sources.add(self.__get_link_source())
self.flag = True
added = True
elif self.tag == "NICK":
nick = Name()
- nick.given = self.data
+ nick.given = self.data or ""
self.indi[self.num].nicknames.add(nick)
elif self.tag == "NOTE":
- num = int(self.data[2 : len(self.data) - 1])
- if num not in self.note:
- self.note[num] = Note(tree=self.tree, num=num)
- name.note = self.note[num]
+ if self.data:
+ num = self.data[2 : len(self.data) - 1]
+ if num not in self.note:
+ self.note[num] = Note(tree=self.tree, num=num)
+ name.note = self.note[num]
if not added:
self.indi[self.num].birthnames.add(name)
self.flag = True
if self.tag == "DATE":
fact.date = self.__get_text()
elif self.tag == "PLAC":
- fact.place = self.__get_text()
+ fact.place = self.tree.ensure_place(self.__get_text())
elif self.tag == "MAP":
fact.map = self.__get_map()
elif self.tag == "NOTE":
- if self.data[:12] == "Description:":
+ if self.data and self.data[:12] == "Description:":
fact.value = self.data[13:]
continue
- num = int(self.data[2 : len(self.data) - 1])
- if num not in self.note:
- self.note[num] = Note(tree=self.tree, num=num)
- fact.note = self.note[num]
+ if self.data:
+ num = self.data[2 : len(self.data) - 1]
+ if num not in self.note:
+ self.note[num] = Note(tree=self.tree, num=num)
+ fact.note = self.note[num]
elif self.tag == "CONT":
- fact.value += "\n" + self.data
+ fact.value = (fact.value or "") + "\n" + (self.data or "")
elif self.tag == "CONC":
- fact.value += self.data
+ fact.value = (fact.value or "") + (self.data or "")
self.flag = True
return fact
def __get_text(self):
"""Parse a multiline text"""
- text = self.data
+ text = self.data or ""
while self.__get_line():
if self.tag == "CONT":
- text += "\n" + self.data
+ text += "\n" + (self.data if self.data else "")
elif self.tag == "CONC":
- text += self.data
+ text += self.data if self.data else ""
else:
break
self.flag = True
else:
self.tree.sources[self.data] = self.sour[self.num]
elif self.tag == "NOTE":
- num = int(self.data[2 : len(self.data) - 1])
- if num not in self.note:
- self.note[num] = Note(tree=self.tree, num=num)
- self.sour[self.num].notes.add(self.note[num])
+ if self.data:
+ num = self.data[2 : len(self.data) - 1]
+ if num not in self.note:
+ self.note[num] = Note(tree=self.tree, num=num)
+ self.sour[self.num].notes.add(self.note[num])
self.flag = True
def __get_link_source(self):
"""Parse a link to a source"""
- num = int(self.data[2 : len(self.data) - 1])
+ num = "0"
+ if self.data:
+ num = self.data[2 : len(self.data) - 1]
+
if num not in self.sour:
self.sour[num] = Source(num=num)
page = None
elif self.tag == "STAT":
ordinance.status = ORDINANCES[self.data]
elif self.tag == "FAMC":
- num = int(self.data[2 : len(self.data) - 1])
- if num not in self.fam:
- self.fam[num] = Fam(tree=self.tree, num=num)
- ordinance.famc = self.fam[num]
+ if self.data:
+ num = self.data[2 : len(self.data) - 1]
+ if num not in self.fam:
+ self.fam[num] = Fam(tree=self.tree, num=num)
+ ordinance.famc = self.fam[num]
self.flag = True
return ordinance
def __add_id(self):
"""Reset GEDCOM identifiers"""
- for num in self.fam:
- if self.fam[num].husb_num:
- self.fam[num].husb_fid = self.indi[self.fam[num].husb_num].fid
- if self.fam[num].wife_num:
- self.fam[num].wife_fid = self.indi[self.fam[num].wife_num].fid
- for chil in self.fam[num].chil_num:
- self.fam[num].chil_fid.add(self.indi[chil].fid)
- for num in self.indi:
- for famc in self.indi[num].famc_num:
- self.indi[num].famc_fid.add(
- (self.fam[famc].husb_fid, self.fam[famc].wife_fid)
+ # Set fallback fid from GEDCOM pointer if _FSFTID was not present
+ for num, indi in self.indi.items():
+ if indi.fid is None:
+ name_str = str(indi.name) if indi.name else "Unknown"
+ _warn(
+ f"Warning: Individual @I{num}@ ({name_str}) missing _FSFTID tag, "
+ f"using GEDCOM pointer as fallback."
)
- for fams in self.indi[num].fams_num:
- self.indi[num].fams_fid.add(
- (self.fam[fams].husb_fid, self.fam[fams].wife_fid)
+ indi.fid = num # Use GEDCOM pointer ID as fallback
+
+ for num, fam in self.fam.items():
+ if fam.fid is None:
+ husb_name = "Unknown"
+ if fam.husb_num and fam.husb_num in self.indi:
+ h = self.indi[fam.husb_num]
+ husb_name = str(h.name) if h.name else "Unknown"
+
+ wife_name = "Unknown"
+ if fam.wife_num and fam.wife_num in self.indi:
+ w = self.indi[fam.wife_num]
+ wife_name = str(w.name) if w.name else "Unknown"
+
+ _warn(
+ f"Warning: Family @F{num}@ ({husb_name} & {wife_name}) missing _FSFTID tag, "
+ f"using GEDCOM pointer as fallback."
)
+ fam.fid = num # Use GEDCOM pointer ID as fallback
+
+ for _num, fam in self.fam.items():
+ if fam.husb_num:
+ fam.husb_fid = self.indi[fam.husb_num].fid
+ if fam.wife_num:
+ fam.wife_fid = self.indi[fam.wife_num].fid
+ for chil in fam.chil_num:
+ fam.chil_fid.add(self.indi[chil].fid)
+ for _num, indi in self.indi.items():
+ for famc in indi.famc_num:
+ # Store fam.fid instead of (husb, wife) tuple for consistent keying
+ indi.famc_fid.add(self.fam[famc].fid)
+ for fams in indi.fams_num:
+ indi.fams_fid.add(self.fam[fams].fid)
# fstogedcom classes and functions
+import asyncio
import os
import re
-import time
-import asyncio
import tempfile
+import time
from threading import Thread
+from tkinter import IntVar, Menu, StringVar, TclError, filedialog, messagebox
+from tkinter.ttk import Button, Checkbutton, Entry, Frame, Label, Notebook, Treeview
+from typing import Literal, cast
+
from diskcache import Cache
-from tkinter import (
- StringVar,
- IntVar,
- filedialog,
- messagebox,
- Menu,
- TclError,
-)
-from tkinter.ttk import Frame, Label, Entry, Button, Checkbutton, Treeview, Notebook
-
-from getmyancestors.classes.tree import Indi, Fam, Tree
from getmyancestors.classes.gedcom import Gedcom
from getmyancestors.classes.session import Session
from getmyancestors.classes.translation import translations
+from getmyancestors.classes.tree import Fam, Indi, Tree
tmp_dir = os.path.join(tempfile.gettempdir(), "fstogedcom")
cache = Cache(tmp_dir)
state = "normal"
except TclError:
state = "disabled"
- menu.add_command(label=_("Copy"), command=self.copy, state=state)
- menu.add_command(label=_("Cut"), command=self.cut, state=state)
+
+ # Cast to Literal for mypy
+ state_lit = cast(Literal["normal", "disabled"], state)
+ menu.add_command(label=_("Copy"), command=self.copy, state=state_lit)
+ menu.add_command(label=_("Cut"), command=self.cut, state=state_lit)
menu.add_command(label=_("Paste"), command=self.paste)
menu.post(event.x_root, event.y_root)
_("Error"), message=_("File not found: ") + os.path.basename(filename)
)
return
- file = open(filename, "r", encoding="utf-8")
- new_id = self.insert("", 0, text=os.path.basename(filename))
- self.files[new_id] = file
+ try:
+ # pylint: disable=consider-using-with
+ file = open(filename, "r", encoding="utf-8")
+ except OSError as e:
+ messagebox.showinfo(_("Error"), message=_("Error opening file: ") + str(e))
+ return
+
+ try:
+ new_id = self.insert("", 0, text=os.path.basename(filename))
+ self.files[new_id] = file
+ except TclError:
+ file.close()
+ messagebox.showinfo(_("Error"), message=_("Error adding file to list"))
def popup(self, event):
"""open menu to remove item"""
ged = Gedcom(file, tree)
# add informations about individuals
- for num in ged.indi:
- fid = ged.indi[num].fid
+ for _num, indi in ged.indi.items():
+ fid = indi.fid
if fid not in tree.indi:
indi_counter += 1
tree.indi[fid] = Indi(tree=tree, num=indi_counter)
tree.indi[fid].tree = tree
- tree.indi[fid].fid = ged.indi[num].fid
- tree.indi[fid].fams_fid |= ged.indi[num].fams_fid
- tree.indi[fid].famc_fid |= ged.indi[num].famc_fid
- tree.indi[fid].name = ged.indi[num].name
- tree.indi[fid].birthnames = ged.indi[num].birthnames
- tree.indi[fid].nicknames = ged.indi[num].nicknames
- tree.indi[fid].aka = ged.indi[num].aka
- tree.indi[fid].married = ged.indi[num].married
- tree.indi[fid].gender = ged.indi[num].gender
- tree.indi[fid].facts = ged.indi[num].facts
- tree.indi[fid].notes = ged.indi[num].notes
- tree.indi[fid].sources = ged.indi[num].sources
- tree.indi[fid].memories = ged.indi[num].memories
- tree.indi[fid].baptism = ged.indi[num].baptism
- tree.indi[fid].confirmation = ged.indi[num].confirmation
- tree.indi[fid].endowment = ged.indi[num].endowment
+ tree.indi[fid].fid = indi.fid
+ tree.indi[fid].fams_fid |= indi.fams_fid
+ tree.indi[fid].famc_fid |= indi.famc_fid
+ tree.indi[fid].name = indi.name
+ tree.indi[fid].birthnames |= indi.birthnames
+ tree.indi[fid].nicknames |= indi.nicknames
+ tree.indi[fid].aka |= indi.aka
+ tree.indi[fid].married |= indi.married
+ tree.indi[fid].gender = indi.gender
+ tree.indi[fid].facts |= indi.facts
+ tree.indi[fid].notes |= indi.notes
+ tree.indi[fid].sources |= indi.sources
+ tree.indi[fid].memories |= indi.memories
+ tree.indi[fid].baptism = indi.baptism
+ tree.indi[fid].confirmation = indi.confirmation
+ tree.indi[fid].endowment = indi.endowment
if not (
tree.indi[fid].sealing_child and tree.indi[fid].sealing_child.famc
):
- tree.indi[fid].sealing_child = ged.indi[num].sealing_child
+ tree.indi[fid].sealing_child = indi.sealing_child
# add informations about families
- for num in ged.fam:
- husb, wife = (ged.fam[num].husb_fid, ged.fam[num].wife_fid)
+ for _num, fam in ged.fam.items():
+ husb, wife = (fam.husb_fid, fam.wife_fid)
if (husb, wife) not in tree.fam:
fam_counter += 1
tree.fam[(husb, wife)] = Fam(husb, wife, tree, fam_counter)
tree.fam[(husb, wife)].tree = tree
- tree.fam[(husb, wife)].chil_fid |= ged.fam[num].chil_fid
- tree.fam[(husb, wife)].fid = ged.fam[num].fid
- tree.fam[(husb, wife)].facts = ged.fam[num].facts
- tree.fam[(husb, wife)].notes = ged.fam[num].notes
- tree.fam[(husb, wife)].sources = ged.fam[num].sources
- tree.fam[(husb, wife)].sealing_spouse = ged.fam[num].sealing_spouse
+ tree.fam[(husb, wife)].chil_fid |= fam.chil_fid
+ tree.fam[(husb, wife)].fid = fam.fid
+ tree.fam[(husb, wife)].facts |= fam.facts
+ tree.fam[(husb, wife)].notes |= fam.notes
+ tree.fam[(husb, wife)].sources |= fam.sources
+ tree.fam[(husb, wife)].sealing_spouse = fam.sealing_spouse
# merge notes by text
tree.notes = sorted(tree.notes, key=lambda x: x.text)
self.save_password = IntVar()
self.save_password.set(cache.get("save_password") or 0)
- check_save_password = Checkbutton(self, text=_("Save Password"), variable=self.save_password, onvalue=1, offvalue=0)
+ check_save_password = Checkbutton(
+ self,
+ text=_("Save Password"),
+ variable=self.save_password,
+ onvalue=1,
+ offvalue=0,
+ )
label_username.grid(row=0, column=0, pady=15, padx=(0, 5))
entry_username.grid(row=0, column=1)
def login(self):
"""log in FamilySearch"""
- global _
+ global _ # pylint: disable=global-statement
username = self.sign_in.username.get()
password = self.sign_in.password.get()
if not (username and password):
return
self.btn_valid.config(state="disabled")
self.info(_("Login to FamilySearch..."))
+ # pylint: disable=consider-using-with
self.logfile = open("download.log", "w", encoding="utf-8")
self.fs = Session(
self.sign_in.username.get(),
cache.delete("save_password")
cache.add("save_password", save_pass)
- url = "/service/tree/tree-data/reservations/person/%s/ordinances" % self.fs.fid
- lds_account = self.fs.get_url(url, {}).get("status") == "OK"
+ url = (
+ "https://www.familysearch.org/service/tree/tree-data/reservations/person/%s/ordinances"
+ % self.fs.fid
+ )
+ # Restore no_api=True to query main site service instead of API
+ response = self.fs.get_url(url, {}, no_api=True)
+ lds_account = response and response.get("status") == "OK"
self.options = Options(self.form, lds_account)
self.info("")
self.sign_in.destroy()
-# global imports
+import contextlib
+import getpass
+import hashlib
+import json
+import logging
+import os
+import re
+import sqlite3
import sys
+import threading
import time
-from urllib.parse import urlparse, parse_qs
+import traceback
+import webbrowser
+from urllib.parse import parse_qs, urlencode, urlparse
import requests
from requests_cache import CachedSession as CSession
-from fake_useragent import UserAgent
-
from requests_ratelimiter import LimiterAdapter
-# local imports
from getmyancestors.classes.translation import translations
+DEFAULT_CLIENT_ID = "a02j000000KTRjpAAH"
+DEFAULT_REDIRECT_URI = "https://misbach.github.io/fs-auth/index_raw.html"
+
+
+class SecureLogFilter(logging.Filter):
+ """Filter to censor sensitive data in logs"""
+
+ SENSITIVE_RE = re.compile(
+ r"(Authorization: Bearer |Cookie: |XSRF-TOKEN=|SESSION=|password=|_csrf=|username=)[^ \r\n&]+"
+ )
+
+ def filter(self, record):
+ if isinstance(record.msg, (str, bytes)):
+ msg = (
+ record.msg
+ if isinstance(record.msg, str)
+ else record.msg.decode("utf-8", "ignore")
+ )
+ record.msg = self.SENSITIVE_RE.sub(r"\1***", msg)
+ return True
+
+
+LICENSE_AGREEMENT = """
+================================================================================
+ getmyancestors - License & Terms of Use
+================================================================================
+
+This program is free software: you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+By using this software to access FamilySearch, you also agree to:
-# class Session(requests.Session):
-class GMASession:
+1. Comply with FamilySearch's Terms of Use (https://www.familysearch.org/terms)
+2. Not abuse the API through excessive requests or automated scraping
+3. If you experience a bug or a network loop, close the program and file a bug!
+4. Only use the tool for personal, non-commercial purposes.
+5. Respect the privacy of living individuals in any downloaded data
+6. Accept that FamilySearch may revoke API access for violations
+
+DO NOT USE THE TOOL EXCESSIVELY!
+DOWNLOAD YOUR FAMILY'S GEDCOM AND USE IT OFFLINE.
+BE RESPECTFUL OF FAMILYSEARCH'S SERVERS AND RESPECT THEIR TERMS OF USE.
+
+================================================================================
+"""
+
+
+class GMASession(requests.Session):
"""Create a FamilySearch session
:param username and password: valid FamilySearch credentials
:param verbose: True to active verbose mode
:param timeout: time before retry a request
"""
- def __init__(self, username, password, verbose=False, logfile=False, timeout=60):
- # super().__init__('http_cache', backend='filesystem', expire_after=86400)
- # super().__init__()
+ DEFAULT_HEADERS = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+ "Accept-Language": "en-US,en;q=0.9",
+ }
+
+ def __init__(
+ self,
+ username,
+ password,
+ client_id=None,
+ redirect_uri=None,
+ verbose=False,
+ logfile=None,
+ timeout=60,
+ requests_per_second=5,
+ ):
+ requests.Session.__init__(self)
self.username = username
self.password = password
+ self.lock = threading.Lock()
+ self.client_id = client_id or DEFAULT_CLIENT_ID
+ if redirect_uri:
+ self.redirect_uri = redirect_uri
+ else:
+ self.redirect_uri = DEFAULT_REDIRECT_URI
+ # Warn about using fallback redirect URI - check TTY before coloring
+ # Suppress in offline mode as we don't login
+ if not os.environ.get("GMA_OFFLINE_MODE"):
+ use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "")
+ msg = (
+ "⚠ WARNING: Using fallback redirect URI (misbach.github.io)\n"
+ " This is a third-party OAuth callback. Consider registering your own.\n"
+ " See: https://www.familysearch.org/developers/\n"
+ )
+ if use_color:
+ sys.stderr.write(f"\033[33m{msg}\033[0m")
+ else:
+ sys.stderr.write(msg)
self.verbose = verbose
self.logfile = logfile
self.timeout = timeout
- self.fid = self.lang = self.display_name = None
+ self.fid = None
+ self.lang = None
+ self.display_name = None
self.counter = 0
- self.headers = {"User-Agent": UserAgent().firefox}
- # Apply a rate-limit (5 requests per second) to all requests
- adapter = LimiterAdapter(per_second=5)
- self.mount('http://', adapter)
- self.mount('https://', adapter)
+ # Persistence setup - use ~/.cache/getmyancestors/ by default
+ cache_dir = os.environ.get(
+ "GMA_CACHE_DIR", os.path.expanduser("~/.cache/getmyancestors")
+ )
+ os.makedirs(cache_dir, exist_ok=True)
+ self.db_path = os.path.join(cache_dir, "session.sqlite")
+ # Cookie file is now stored in cache directory too
+ self.cookie_file = os.path.join(cache_dir, "cookies.json")
+ self._init_db()
+ self.check_license()
- self.login()
+ # Debug logging toggle
+ # Debug logging toggle
+ if os.environ.get("GMA_DEBUG"):
+ logger = logging.getLogger()
+ logger.setLevel(logging.DEBUG)
+ # Add secure filter
+ secure_filter = SecureLogFilter()
+ for handler in logger.handlers:
+ handler.addFilter(secure_filter)
+ if not logger.handlers:
+ handler = logging.StreamHandler(sys.stderr)
+ handler.addFilter(secure_filter)
+ logger.addHandler(handler)
+
+ # Optional: Enable full HTTP level logging if GMA_TRACE is set
+ if os.environ.get("GMA_TRACE"):
+ import http.client as http_client # pylint: disable=import-outside-toplevel
+
+ http_client.HTTPConnection.debuglevel = 1
+ self.write_log(
+ "🐞 TRACE MODE ENABLED - WARNING: Logs will contain sensitive data unless filtered by SecureLogFilter."
+ )
+
+ self.write_log("🐞 DEBUG MODE ENABLED - Censored logging active.")
+
+ # Hardcode robust User-Agent to avoid bot detection
+ with self.lock:
+ self.headers.update(self.DEFAULT_HEADERS)
+
+ # Apply a rate-limit (default 5 requests per second) to all requests
+ # Credit: Josemando Sobral
+ adapter = LimiterAdapter(per_second=requests_per_second)
+ self.mount("https://", adapter)
+
+ # Defer login to subclasses to ensure initialization is complete
+ # self.login()
+
+ def _init_db(self):
+ """Initialize SQLite database for session storage"""
+ with sqlite3.connect(self.db_path) as conn:
+ conn.execute(
+ "CREATE TABLE IF NOT EXISTS session (key TEXT PRIMARY KEY, value TEXT)"
+ )
+ conn.commit()
+
+ def check_license(self):
+ """Check if user has accepted the current license agreement"""
+ # Allow tests/CI to bypass this check explicitly
+ if os.environ.get("GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT"):
+ return
+
+ # Hash combines license text AND username so acceptance is per-user
+ current_hash = hashlib.sha256(
+ (LICENSE_AGREEMENT + self.username).encode("utf-8")
+ ).hexdigest()
+ accepted_hash = None
+
+ # 1. Check external license file
+ # We store license acceptance in a separate JSON file so it survives cache clearing
+ license_file = os.path.join(
+ os.path.dirname(self.db_path), "..", "license-agreement.json"
+ )
+ license_file = os.path.abspath(license_file)
+
+ if os.path.exists(license_file):
+ try:
+ with open(license_file, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ if data.get("license_hash") == current_hash:
+ accepted_hash = data["license_hash"]
+ except Exception:
+ pass # Ignore file errors
+
+ if accepted_hash == current_hash:
+ return
+
+ # 2. Prompt user if mismatch (NO lock held)
+ if not sys.stdin.isatty():
+ sys.stderr.write(
+ "ERROR: License agreement has changed or not yet been accepted.\n"
+ "Please run this tool interactively to accept the license.\n"
+ )
+ sys.exit(1)
+
+ print(LICENSE_AGREEMENT)
+ try:
+ response = (
+ input("Do you agree to the terms above? (yes/no): ").strip().lower()
+ )
+ if response != "yes":
+ print("License not accepted. Exiting.")
+ sys.exit(1)
+
+ # 3. Write new hash to JSON file
+ try:
+ data = {"license_hash": current_hash}
+ with open(license_file, "w", encoding="utf-8") as f:
+ json.dump(data, f)
+ except Exception as e:
+ # Fallback or just print warning if we can't save
+ if self.verbose:
+ print(
+ f"Warning: Could not save license agreement to {license_file}: {e}"
+ )
+
+ print("License accepted.\n")
+
+ except (EOFError, KeyboardInterrupt):
+ print("\nLicense acceptance cancelled. Exiting.")
+ sys.exit(1)
@property
def logged(self):
- return bool(self.cookies.get("fssessionid"))
+ with self.lock:
+ return bool(
+ self.cookies.get("fssessionid") or self.headers.get("Authorization")
+ )
+
+ def save_cookies(self):
+ """save cookies and authorization header to JSON (explicitly NOT sqlite for security)"""
+ try:
+ with self.lock:
+ cookies_export = requests.utils.dict_from_cookiejar(self.cookies)
+ auth_header = self.headers.get("Authorization")
+
+ data = {
+ "cookies": cookies_export,
+ "auth": auth_header,
+ }
+ # Save to separate JSON file
+ cookie_file = os.path.join(
+ os.path.dirname(self.db_path), "..", "cookies.json"
+ )
+ cookie_file = os.path.abspath(cookie_file)
+
+ with open(cookie_file, "w", encoding="utf-8") as f:
+ json.dump(data, f)
+
+ if self.verbose:
+ self.write_log("Session saved to JSON: " + cookie_file)
+ except Exception as e:
+ self.write_log("Error saving session: " + str(e))
+
+ def load_cookies(self):
+ """load cookies and authorization header from JSON"""
+ cookie_file = os.path.join(os.path.dirname(self.db_path), "..", "cookies.json")
+ cookie_file = os.path.abspath(cookie_file)
+
+ if os.path.exists(cookie_file):
+ try:
+ with open(cookie_file, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ self._apply_session_data(data)
+ if self.verbose:
+ self.write_log("Session loaded from JSON: " + cookie_file)
+ return True
+ except Exception as e:
+ self.write_log("Error loading session from JSON: " + str(e))
+
+ # 2. Legacy Migration: checking old cookie file if it exists
+ if os.path.exists(self.cookie_file):
+ try:
+ with open(self.cookie_file, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ self._apply_session_data(data)
+ # We do NOT auto-save to new JSON here to respect read-only/security.
+ # It will save to new JSON only on next login/save_cookies call.
+ if self.verbose:
+ self.write_log(
+ "Session loaded (migrated) from legacy JSON: "
+ + self.cookie_file
+ )
+ return True
+ except Exception as e:
+ self.write_log("Error loading legacy cookie file: " + str(e))
+
+ return False
+
+ def _apply_session_data(self, data):
+ """Internal helper to apply session dict to current session"""
+ if isinstance(data, dict) and ("cookies" in data or "auth" in data):
+ cookies_dict = data.get("cookies", {})
+ auth_header = data.get("auth")
+ else:
+ cookies_dict = data
+ auth_header = None
+
+ with self.lock:
+ self.cookies.update(requests.utils.cookiejar_from_dict(cookies_dict))
+ if auth_header:
+ self.headers.update({"Authorization": auth_header})
+
+ # ANSI color codes for terminal output
+ COLOR_RESET = "\033[0m"
+ COLOR_RED = "\033[91m"
+ COLOR_YELLOW = "\033[93m"
+
+ def write_log(self, text, level="info"):
+ """write text in the log file with optional color"""
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+ log = f"[{timestamp}]: {text}\n"
- def write_log(self, text):
- """write text in the log file"""
- log = "[%s]: %s\n" % (time.strftime("%Y-%m-%d %H:%M:%S"), text)
if self.verbose:
- sys.stderr.write(log)
+ # Apply color if TTY or FORCE_COLOR is set (for piped output like tee)
+ use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "")
+ if level == "error" and use_color:
+ sys.stderr.write(f"{self.COLOR_RED}{log}{self.COLOR_RESET}")
+ elif level == "warning" and use_color:
+ sys.stderr.write(f"{self.COLOR_YELLOW}{log}{self.COLOR_RESET}")
+ else:
+ sys.stderr.write(log)
+
if self.logfile:
- self.logfile.write(log)
+ self.logfile.write(log) # No color in log files
+ # pylint: disable=inconsistent-return-statements
def login(self):
"""retrieve FamilySearch session ID
(https://familysearch.org/developers/docs/guides/oauth2)
"""
- while True:
+ if self.load_cookies():
+ if self.verbose:
+ self.write_log("Attempting to reuse cached session...")
+ # Use auto_login=False to prevent recursion if session is invalid
+ # Force network verification to prevent infinite loops with stale cache
+ context = (
+ self.cache_disabled()
+ if hasattr(self, "cache_disabled")
+ else contextlib.nullcontext()
+ )
+ with context:
+ self.set_current(auto_login=False)
+ if self.logged and self.fid:
+ if self.verbose:
+ self.write_log("Successfully reused cached session.")
+ return True
+ if self.verbose:
+ self.write_log("Cached session invalid or expired.")
+
+ # Define context manager for disabling cache
+ if hasattr(self, "cache_disabled"):
+ cache_context = self.cache_disabled()
+ else:
+ cache_context = contextlib.nullcontext()
+
+ with cache_context:
try:
+ if not self.username or not self.password:
+ return self.manual_login()
+
+ # Clear cookies to ensure fresh start for new login
+ with self.lock:
+ self.cookies.clear()
+
url = "https://www.familysearch.org/auth/familysearch/login"
self.write_log("Downloading: " + url)
- self.get(url, headers=self.headers)
- xsrf = self.cookies["XSRF-TOKEN"]
+
+ # Use the temp session for requests
+ self.get(url, headers=self.headers, timeout=self.timeout)
+ xsrf = self.cookies.get("XSRF-TOKEN")
+ if not xsrf:
+ self.write_log("No XSRF token found. Switching to manual login.")
+ return self.manual_login()
+
url = "https://ident.familysearch.org/login"
self.write_log("Downloading: " + url)
res = self.post(
"password": self.password,
},
headers=self.headers,
+ timeout=self.timeout,
)
+
try:
data = res.json()
except ValueError:
- self.write_log("Invalid auth request")
- self.write_log(res.headers)
- self.write_log(res.text)
-
- raise "Invalid auth request"
- # continue
- if "loginError" in data:
- self.write_log(data["loginError"])
- return
+ self.write_log(f"Headless Login Failed. Status: {res.status_code}")
+ self.write_log(f"Response Preview: {res.text[:200]}")
+ self.write_log("Switching to manual login.")
+ return self.manual_login()
+
if "redirectUrl" not in data:
- self.write_log(res.text)
- continue
+ self.write_log("Redirect URL not found in response.")
+ return self.manual_login()
url = data["redirectUrl"]
self.write_log("Downloading: " + url)
- res = self.get(url, headers=self.headers)
- res.raise_for_status()
+ self.get(url, headers=self.headers, timeout=self.timeout)
- url = f"https://ident.familysearch.org/cis-web/oauth2/v3/authorization?response_type=code&scope=openid profile email qualifies_for_affiliate_account country&client_id=a02j000000KTRjpAAH&redirect_uri=https://misbach.github.io/fs-auth/index_raw.html&username={self.username}"
+ params = urlencode(
+ {
+ "response_type": "code",
+ "scope": "openid profile email qualifies_for_affiliate_account country",
+ "client_id": self.client_id,
+ "redirect_uri": self.redirect_uri,
+ "username": self.username,
+ }
+ )
+ url = f"https://ident.familysearch.org/cis-web/oauth2/v3/authorization?{params}"
self.write_log("Downloading: " + url)
- response = self.get(url, allow_redirects=False, headers=self.headers)
- location = response.headers["location"]
- code = parse_qs(urlparse(location).query).get("code")
+
+ # Allow redirects so we follow the chain to the callback URI
+ response = self.get(
+ url,
+ allow_redirects=True,
+ headers=self.headers,
+ timeout=self.timeout,
+ )
+
+ # Check if we landed on the redirect URI (or have the code in the URL)
+ final_url = response.url
+ code = None
+
+ if "code=" in final_url:
+ code = parse_qs(urlparse(final_url).query).get("code")
+
+ # If not in final URL, check history (in case of a meta refresh or stop)
+ if not code and response.history:
+ for resp in response.history:
+ if "code=" in resp.headers.get("Location", ""):
+ code = parse_qs(
+ urlparse(resp.headers["Location"]).query
+ ).get("code")
+ if code:
+ break
+
+ if not code:
+ self.write_log(f"Code not found in URL: {final_url}")
+ return self.manual_login(response.url)
+
+ if isinstance(code, list):
+ code_str = code[0]
+ else:
+ code_str = code
+
+ # Use raw requests to avoid cache interference just in case
url = "https://ident.familysearch.org/cis-web/oauth2/v3/token"
self.write_log("Downloading: " + url)
- res = self.post(
+ res = requests.post(
url,
data={
"grant_type": "authorization_code",
- "client_id": "a02j000000KTRjpAAH",
- "code": code,
- "redirect_uri": "https://misbach.github.io/fs-auth/index_raw.html",
+ "client_id": self.client_id,
+ "code": code_str,
+ "redirect_uri": self.redirect_uri,
},
headers=self.headers,
+ timeout=self.timeout,
)
- try:
- data = res.json()
- except ValueError:
- self.write_log("Invalid auth request")
- continue
+ data = res.json()
+ if "access_token" in data:
+ with self.lock:
+ self.headers.update(
+ {"Authorization": f"Bearer {data['access_token']}"}
+ )
+ self.set_current(auto_login=False)
+ if self.logged:
+ self.save_cookies()
+ return True
+ except Exception as e:
+ self.write_log("Headless login error: " + str(e))
+ self.write_log(traceback.format_exc())
+ return self.manual_login()
+
+ # pylint: disable=inconsistent-return-statements
+ def manual_login(self, auth_url=None):
+ """Perform manual login"""
+ if not auth_url:
+ auth_url = f"https://ident.familysearch.org/cis-web/oauth2/v3/authorization?response_type=code&scope=openid profile email qualifies_for_affiliate_account country&client_id={self.client_id}&redirect_uri={self.redirect_uri}&username={self.username}"
- if "access_token" not in data:
- self.write_log(res.text)
+ print("\n" + "=" * 60)
+ print("Headless login failed. Manual login required.")
+ print("=" * 60)
+ print(f"Opening browser to login: {auth_url}")
+
+ # Only open browser if we really are in a terminal context, but user asked to stop?
+ # We will open it because otherwise they can't login.
+ try:
+ webbrowser.open(auth_url)
+ except Exception: # Catch specific exception
+ pass
+
+ print("\n" + "-" * 30)
+ print("MANUAL FALLBACK:")
+ print("1. Log in to FamilySearch in the opened window.")
+ print("2. Once logged in, you will be redirected.")
+ print(
+ "3. Copy the 'code' from the URL or simply copy the FULL destination URL."
+ )
+ print(
+ " (If it says 'code already used', assume you need to re-login or check for Access Token)"
+ )
+ print("-" * 30)
+
+ while True:
+ try:
+ user_input = getpass.getpass(
+ "Paste the code, token, or full redirect URL here: "
+ ).strip()
+ if not user_input:
+ sys.exit(2)
+
+ code = None
+ session_id = None
+
+ # Check for Access Token first
+ if "access_token=" in user_input:
+ try:
+ parsed = urlparse(user_input)
+ if parsed.fragment:
+ qs = parse_qs(parsed.fragment)
+ if "access_token" in qs:
+ session_id = qs["access_token"][0]
+ if not session_id and parsed.query:
+ qs = parse_qs(parsed.query)
+ if "access_token" in qs:
+ session_id = qs["access_token"][0]
+ except Exception: # Catch specific exception
+ pass
+
+ if (
+ not session_id
+ and len(user_input) > 50
+ and "=" not in user_input
+ and "http" not in user_input
+ ):
+ session_id = user_input
+
+ if session_id:
+ with self.lock:
+ self.headers.update({"Authorization": f"Bearer {session_id}"})
+ self.cookies.set(
+ "fssessionid", session_id, domain=".familysearch.org"
+ )
+ self.set_current(auto_login=False)
+ if self.logged and self.fid:
+ self.save_cookies()
+ print("\nSuccess! Session established via Token.")
+ return True
+
+ print("\nToken appeared invalid. Try again.")
continue
- access_token = data["access_token"]
- self.headers.update({"Authorization": f"Bearer {access_token}"})
- except requests.exceptions.ReadTimeout:
- self.write_log("Read timed out")
- continue
- except requests.exceptions.ConnectionError:
- self.write_log("Connection aborted")
- time.sleep(self.timeout)
- continue
- except requests.exceptions.HTTPError:
- self.write_log("HTTPError")
- time.sleep(self.timeout)
- continue
- except KeyError:
- self.write_log("KeyError")
- time.sleep(self.timeout)
- continue
- except ValueError:
- self.write_log("ValueError")
- time.sleep(self.timeout)
- continue
- if self.logged:
- self.set_current()
- break
+ # Check for Code
+ if "code=" in user_input:
+ try:
+ parsed = urlparse(user_input)
+ qs = parse_qs(parsed.query)
+ if "code" in qs:
+ code = qs["code"][0]
+ except Exception: # Catch specific exception
+ pass
+ elif len(user_input) < 50:
+ code = user_input
+
+ if code:
+ url = "https://ident.familysearch.org/cis-web/oauth2/v3/token"
+ try:
+ # Raw request to avoid cache
+ res = requests.post(
+ url,
+ data={
+ "grant_type": "authorization_code",
+ "client_id": self.client_id,
+ "code": code,
+ "redirect_uri": self.redirect_uri,
+ },
+ headers=self.headers,
+ timeout=self.timeout,
+ )
+
+ data = res.json()
+ if "access_token" in data:
+ session_id = data["access_token"]
+ with self.lock:
+ self.headers.update(
+ {"Authorization": f"Bearer {session_id}"}
+ )
+ self.cookies.set(
+ "fssessionid",
+ session_id,
+ domain=".familysearch.org",
+ )
+ self.set_current(auto_login=False)
+ if self.logged and self.fid:
+ self.save_cookies()
+ print("\nSuccess! Session established via Code.")
+ return True
+
+ error_desc = data.get(
+ "error_description", data.get("error", "Unknown error")
+ )
+ print(f"\nToken exchange failed: {error_desc}")
+
+ except Exception as e:
+ print(f"\nError during token exchange: {e}")
+
+ print("Invalid input or failed login. Please try again.")
- def get_url(self, url, headers=None):
+ except (EOFError, KeyboardInterrupt):
+ print("\nLogin cancelled.")
+ sys.exit(2)
+
+ def get_url(self, url, headers=None, auto_login=True, no_api=False):
"""retrieve JSON structure from a FamilySearch URL"""
self.counter += 1
if headers is None:
headers = {"Accept": "application/x-gedcomx-v1+json"}
- headers.update(self.headers)
+ # headers.update(self.headers) - redundant, requests merges session headers automatically
while True:
try:
self.write_log("Downloading: " + url)
+ # Used HEAD logic here (explicit API URL)
+ full_url = url if no_api else "https://api.familysearch.org" + url
r = self.get(
- "https://api.familysearch.org" + url,
+ full_url,
timeout=self.timeout,
headers=headers,
)
except requests.exceptions.ReadTimeout:
- self.write_log("Read timed out")
+ self.write_log("Read timed out", level="warning")
continue
except requests.exceptions.ConnectionError:
- self.write_log("Connection aborted")
+ self.write_log("Connection aborted", level="warning")
time.sleep(self.timeout)
continue
- self.write_log("Status code: %s" % r.status_code)
+ except sqlite3.InterfaceError as e:
+ # Cache corruption from threading - log and retry without cache
+ self.write_log(
+ "Cache error (sqlite3.InterfaceError): %s - Retrying without cache"
+ % e,
+ level="warning",
+ )
+ with self.cache_disabled(): # type: ignore[attr-defined]
+ try:
+ r = self.get(
+ full_url,
+ timeout=self.timeout,
+ headers=headers,
+ )
+ except requests.exceptions.RequestException as retry_err:
+ self.write_log(
+ "Retry blocked by network error: %s" % retry_err,
+ level="warning",
+ )
+ # Let the outer loop retry or fail gracefully
+ continue
+ # Color status codes based on severity
+ if r.status_code >= 500:
+ self.write_log("Status code: %s" % r.status_code, level="error")
+ elif r.status_code >= 400:
+ self.write_log("Status code: %s" % r.status_code, level="warning")
+ else:
+ self.write_log("Status code: %s" % r.status_code)
+ if self.verbose and hasattr(r, "from_cache") and r.from_cache:
+ self.write_log("CACHE HIT: " + url)
if r.status_code == 204:
return None
- if r.status_code in {404, 405, 410, 500}:
- self.write_log("WARNING: " + url)
+ if r.status_code in {404, 405, 410, 500, 503, 504}:
+ self.write_log("WARNING: " + url, level="warning")
return None
if r.status_code == 401:
- self.login()
- continue
+ if auto_login:
+ self.login()
+ continue
+
+ return None
try:
r.raise_for_status()
except requests.exceptions.HTTPError:
- self.write_log("HTTPError")
+ self.write_log("HTTPError", level="error")
+ # Log full request/response details for all HTTP errors
+ self.write_log(
+ " Request: GET https://api.familysearch.org%s" % url,
+ level="warning",
+ )
+ self.write_log(
+ (
+ " Response: %s" % r.text[:500]
+ if len(r.text) > 500
+ else " Response: %s" % r.text
+ ),
+ level="warning",
+ )
if r.status_code == 403:
- if (
- "message" in r.json()["errors"][0]
- and r.json()["errors"][0]["message"]
- == "Unable to get ordinances."
- ):
+ try:
+ error_data = r.json()
+ if (
+ "errors" in error_data
+ and error_data["errors"]
+ and error_data["errors"][0].get("message")
+ == "Unable to get ordinances."
+ ):
+ self.write_log(
+ "Unable to get ordinances. "
+ "Try with an LDS account or without option -c.",
+ level="error",
+ )
+ return "error"
+ error_msg = error_data["errors"][0].get("message", "")
self.write_log(
- "Unable to get ordinances. "
- "Try with an LDS account or without option -c."
+ "WARNING: code 403 from %s %s" % (url, error_msg),
+ level="warning",
+ )
+ except (ValueError, KeyError, IndexError):
+ self.write_log(
+ "WARNING: code 403 from %s (no error details)" % url,
+ level="warning",
)
- return "error"
- self.write_log(
- "WARNING: code 403 from %s %s"
- % (url, r.json()["errors"][0]["message"] or "")
- )
return None
time.sleep(self.timeout)
continue
try:
return r.json()
except Exception as e:
- self.write_log("WARNING: corrupted file from %s, error: %s" % (url, e))
+ self.write_log(
+ "WARNING: corrupted file from %s, error: %s" % (url, e),
+ level="warning",
+ )
+
return None
- def set_current(self):
+ def set_current(self, auto_login=True):
"""retrieve FamilySearch current user ID, name and language"""
url = "/platform/users/current"
- data = self.get_url(url)
+ data = self.get_url(url, auto_login=auto_login)
if data:
self.fid = data["users"][0]["personId"]
self.lang = data["users"][0]["preferredLanguage"]
self.display_name = data["users"][0]["displayName"]
def _(self, string):
- """translate a string into user's language
- TODO replace translation file for gettext format
- """
- if string in translations and self.lang in translations[string]:
+ """translate a string into user's language"""
+ if self.lang and string in translations and self.lang in translations[string]:
return translations[string][self.lang]
return string
class CachedSession(GMASession, CSession):
+ # pylint: disable=abstract-method
+ def __init__(
+ self,
+ username,
+ password,
+ client_id=None,
+ redirect_uri=None,
+ verbose=False,
+ logfile=False,
+ timeout=60,
+ cache_control=True,
+ requests_per_second=5,
+ ):
+ # Cache setup - use ~/.cache/getmyancestors/ by default
+ cache_dir = os.environ.get(
+ "GMA_CACHE_DIR", os.path.expanduser("~/.cache/getmyancestors")
+ )
+ os.makedirs(cache_dir, exist_ok=True)
+ cache_path = os.path.join(cache_dir, "requests")
- def __init__(self, username, password, verbose=False, logfile=False, timeout=60):
- CSession.__init__(self, 'http_cache', backend='filesystem', expire_after=86400)
- GMASession.__init__(self, username, password, verbose=verbose, logfile=logfile, timeout=timeout)
-class Session(GMASession, requests.Session):
+ GMASession.__init__(
+ self,
+ username,
+ password,
+ client_id,
+ redirect_uri,
+ verbose=verbose,
+ logfile=logfile,
+ timeout=timeout,
+ requests_per_second=requests_per_second,
+ )
- def __init__(self, username, password, verbose=False, logfile=False, timeout=60):
- requests.Session.__init__(self)
- GMASession.__init__(self, username, password, verbose=verbose, logfile=logfile, timeout=timeout)
+ # Offline mode adjustments
+ offline_mode = bool(os.environ.get("GMA_OFFLINE_MODE"))
+ expire_after = -1 if offline_mode else 86400
+
+ # Use Filesystem backend as per requirement
+ CSession.__init__(
+ self,
+ cache_path,
+ backend="filesystem",
+ expire_after=expire_after,
+ allowable_codes=(200, 204),
+ cache_control=cache_control, # Enable HTTP conditional requests (ETag/Last-Modified)
+ allow_to_fetch_missing=(not offline_mode), # prevent fetch on miss
+ )
+ # Re-apply default headers as CSession.__init__ might have wiped them
+ with self.lock:
+ self.headers.update(self.DEFAULT_HEADERS)
+ # Check for offline mode via environment variable
+ if os.environ.get("GMA_OFFLINE_MODE"):
+ self.write_log(
+ "🔧 OFFLINE MODE ENABLED - skipping login and using cached data only."
+ )
+ # In offline mode, skip login - all requests must come from cache
+ # Satisfaction for self.logged property
+ with self.lock:
+ self.headers.update({"Authorization": "Bearer OFFLINE"})
+ self.fid = "OFFLINE"
+ self.lang = "en"
+ self.display_name = "Offline Mode"
+ else:
+ self.login()
+
+ def request(self, *args, **kwargs):
+ """Override request to block network in offline mode"""
+ if os.environ.get("GMA_OFFLINE_MODE"):
+ # Set only_if_cached to True for requests-cache
+ kwargs["only_if_cached"] = True
+ return super().request(*args, **kwargs)
+
+
+class Session(GMASession):
+ def __init__(
+ self,
+ username,
+ password,
+ client_id=None,
+ redirect_uri=None,
+ verbose=False,
+ logfile=False,
+ timeout=60,
+ # pylint: disable=unused-argument
+ cache_control=True, # Ignored for non-cached sessions
+ requests_per_second=5,
+ ):
+ GMASession.__init__(
+ self,
+ username,
+ password,
+ client_id,
+ redirect_uri,
+ verbose=verbose,
+ logfile=logfile,
+ timeout=timeout,
+ requests_per_second=requests_per_second,
+ )
+ self.login()
"Cut": {"fr": "Couper"},
"Paste": {"fr": "Coller"},
"Username:": {
- "fr": "Nom d'utilisateur :",
- "de": "Benutzername:",
+ "fr": "Nom d'utilisateur :",
+ "de": "Benutzername:",
},
"Password:": {
- "fr": "Mot de passe :",
- "de": "Passwort:",
+ "fr": "Mot de passe :",
+ "de": "Passwort:",
},
"Save Password": {
- "fr": "Enregistrer le mot de passe",
- "de": "Passwort speichern",
+ "fr": "Enregistrer le mot de passe",
+ "de": "Passwort speichern",
},
"ID already exist": {"fr": "Cet identifiant existe déjà"},
"Invalid FamilySearch ID: ": {"fr": "Identifiant FamilySearch invalide : "},
+++ /dev/null
-import sys
-import re
-import time
-import asyncio
-import os
-from urllib.parse import unquote, unquote_plus
-from datetime import datetime
-from typing import Set, Dict, List, Tuple, Union, Optional, BinaryIO, Any
-# global imports
-import babelfish
-import geocoder
-import requests
-import xml.etree.cElementTree as ET
-from xml.etree.cElementTree import Element
-from requests_cache import CachedSession
-
-# local imports
-import getmyancestors
-from getmyancestors.classes.constants import (
- MAX_PERSONS,
- FACT_EVEN,
- FACT_TAGS,
- ORDINANCES_STATUS,
-)
-
-
-COUNTY = 'County'
-COUNTRY = 'Country'
-CITY = 'City'
-
-GEONAME_FEATURE_MAP = {
- 'ADM1': COUNTY, # first-order administrative division a primary administrative division of a country, such as a state in the United States
- 'ADM1H': COUNTY, # historical first-order administrative division a former first-order administrative division
- 'ADM2': COUNTY, # second-order administrative division a subdivision of a first-order administrative division
- 'ADM2H': COUNTY, # historical second-order administrative division a former second-order administrative division
- 'ADM3': COUNTY, # third-order administrative division a subdivision of a second-order administrative division
- 'ADM3H': COUNTY, # historical third-order administrative division a former third-order administrative division
- 'ADM4': COUNTY, # fourth-order administrative division a subdivision of a third-order administrative division
- 'ADM4H': COUNTY, # historical fourth-order administrative division a former fourth-order administrative division
- 'ADM5': COUNTY, # fifth-order administrative division a subdivision of a fourth-order administrative division
- 'ADM5H': COUNTY, # historical fifth-order administrative division a former fifth-order administrative division
- 'ADMD': COUNTY, # administrative division an administrative division of a country, undifferentiated as to administrative level
- 'ADMDH': COUNTY, # historical administrative division a former administrative division of a political entity, undifferentiated as to administrative level
- # 'LTER': leased area a tract of land leased to another country, usually for military installations
- 'PCL': COUNTRY, # political entity
- 'PCLD': COUNTRY, # dependent political entity
- 'PCLF': COUNTRY, # freely associated state
- 'PCLH': COUNTRY, # historical political entity a former political entity
- 'PCLI': COUNTRY, # independent political entity
- 'PCLIX': COUNTRY, # section of independent political entity
- 'PCLS': COUNTRY, # semi-independent political entity
-
- 'PPL': CITY, # populated place a city, town, village, or other agglomeration of buildings where people live and work
- 'PPLA': CITY, # seat of a first-order administrative division seat of a first-order administrative division (PPLC takes precedence over PPLA)
- 'PPLA2': CITY, # seat of a second-order administrative division
- 'PPLA3': CITY, # seat of a third-order administrative division
- 'PPLA4': CITY, # seat of a fourth-order administrative division
- 'PPLA5': CITY, # seat of a fifth-order administrative division
- 'PPLC': CITY, # capital of a political entity
- 'PPLCH': CITY, # historical capital of a political entity a former capital of a political entity
- 'PPLF': CITY, # farm village a populated place where the population is largely engaged in agricultural activities
- 'PPLG': CITY, # seat of government of a political entity
- 'PPLH': CITY, # historical populated place a populated place that no longer exists
- 'PPLL': CITY, # populated locality an area similar to a locality but with a small group of dwellings or other buildings
- 'PPLQ': CITY, # abandoned populated place
- 'PPLR': CITY, # religious populated place a populated place whose population is largely engaged in religious occupations
- 'PPLS': CITY, # populated places cities, towns, villages, or other agglomerations of buildings where people live and work
- 'PPLW': CITY, # destroyed populated place a village, town or city destroyed by a natural disaster, or by war
- 'PPLX': CITY, # section of populated place
-
-}
-
-# getmyancestors classes and functions
-def cont(string):
- """parse a GEDCOM line adding CONT and CONT tags if necessary"""
- level = int(string[:1]) + 1
- lines = string.splitlines()
- res = list()
- max_len = 255
- for line in lines:
- c_line = line
- to_conc = list()
- while len(c_line.encode("utf-8")) > max_len:
- index = min(max_len, len(c_line) - 2)
- while (
- len(c_line[:index].encode("utf-8")) > max_len
- or re.search(r"[ \t\v]", c_line[index - 1 : index + 1])
- ) and index > 1:
- index -= 1
- to_conc.append(c_line[:index])
- c_line = c_line[index:]
- max_len = 248
- to_conc.append(c_line)
- res.append(("\n%s CONC " % level).join(to_conc))
- max_len = 248
- return ("\n%s CONT " % level).join(res) + "\n"
-
-class Note:
- """GEDCOM Note class
- :param text: the Note content
- :param tree: a Tree object
- :param num: the GEDCOM identifier
- """
-
- counter = {}
-
- def __init__(self, text="", tree=None, num=None, num_prefix=None, note_type=None):
- self._handle = None
- self.note_type = note_type or 'Source Note'
- self.num_prefix = num_prefix
- if num:
- self.num = num
- else:
- Note.counter[num_prefix or 'None'] = Note.counter.get(num_prefix or 'None', 0) + 1
- self.num = Note.counter[num_prefix or 'None']
- print(f'##### Creating Note: {num_prefix}, {self.num}', file=sys.stderr)
- self.text = text.strip()
-
- if tree:
- tree.notes.append(self)
-
- @property
- def id(self):
- return f'{self.num_prefix}_{self.num}' if self.num_prefix != None else self.num
-
- def print(self, file=sys.stdout):
- """print Note in GEDCOM format"""
- print(f'Note: {self.text}', file=sys.stderr)
- file.write(cont("0 @N%s@ NOTE %s" % (self.id, self.text)))
-
- def link(self, file=sys.stdout, level=1):
- """print the reference in GEDCOM format"""
- print(f'Linking Note: {self.id}', file=sys.stderr)
- file.write("%s NOTE @N%s@\n" % (level, self.id))
-
-
- @property
- def handle(self):
- if not self._handle:
- self._handle = '_' + os.urandom(10).hex()
-
- return self._handle
-
- def printxml(self, parent_element: Element) -> None:
- note_element = ET.SubElement(
- parent_element,
- 'note',
- handle=self.handle,
- # change='1720382308',
- id=self.id,
- type='Source Note'
- )
- ET.SubElement(note_element, 'text').text = self.text
-
-class Source:
- """GEDCOM Source class
- :param data: FS Source data
- :param tree: a Tree object
- :param num: the GEDCOM identifier
- """
-
- counter = 0
-
- def __init__(self, data=None, tree=None, num=None):
- if num:
- self.num = num
- else:
- Source.counter += 1
- self.num = Source.counter
-
- self._handle = None
-
- self.tree = tree
- self.url = self.citation = self.title = self.fid = None
- self.notes = set()
- if data:
- self.fid = data["id"]
- if "about" in data:
- self.url = data["about"].replace(
- "familysearch.org/platform/memories/memories",
- "www.familysearch.org/photos/artifacts",
- )
- if "citations" in data:
- self.citation = data["citations"][0]["value"]
- if "titles" in data:
- self.title = data["titles"][0]["value"]
- if "notes" in data:
- notes = [ n['text'] for n in data["notes"] if n["text"] ]
- for idx, n in enumerate(notes):
- self.notes.add(Note(
- n,
- self.tree,
- num="S%s-%s" % (self.id, idx),
- note_type='Source Note'
- ))
- self.modified = data['attribution']['modified']
-
- @property
- def id(self):
- return 'S' + str(self.fid or self.num)
-
-
- @property
- def handle(self):
- if not self._handle:
- self._handle = '_' + os.urandom(10).hex()
-
- return self._handle
-
- def print(self, file=sys.stdout):
- """print Source in GEDCOM format"""
- file.write("0 @S%s@ SOUR \n" % self.id)
- if self.title:
- file.write(cont("1 TITL " + self.title))
- if self.citation:
- file.write(cont("1 AUTH " + self.citation))
- if self.url:
- file.write(cont("1 PUBL " + self.url))
- for n in self.notes:
- n.link(file, 1)
- file.write("1 REFN %s\n" % self.fid)
-
- def link(self, file=sys.stdout, level=1):
- """print the reference in GEDCOM format"""
- file.write("%s SOUR @S%s@\n" % (level, self.id))
-
- def printxml(self, parent_element: Element) -> None:
-
- # <source handle="_fa593c277b471380bbcc5282e8f" change="1720382301" id="SQ8M5-NSP">
- # <stitle>Palkovics Cser József, "Hungary Civil Registration, 1895-1980"</stitle>
- # <sauthor>"Hungary Civil Registration, 1895-1980", , <i>FamilySearch</i> (https://www.familysearch.org/ark:/61903/1:1:6JBQ-NKWD : Thu Mar 07 10:23:43 UTC 2024), Entry for Palkovics Cser József and Palkovics Cser István, 27 Aug 1928.</sauthor>
- # <spubinfo>https://familysearch.org/ark:/61903/1:1:6JBQ-NKWD</spubinfo>
- # <srcattribute type="REFN" value="Q8M5-NSP"/>
- # </source>
- source_element = ET.SubElement(
- parent_element,
- 'source',
- handle=self.handle,
- change=str(int(self.modified / 1000)),
- id=self.id
- )
- if self.title:
- ET.SubElement(source_element, 'stitle').text = self.title
- if self.citation:
- ET.SubElement(source_element, 'sauthor').text = self.citation
- if self.url:
- ET.SubElement(source_element, 'spubinfo').text = self.url
- if self.fid:
- ET.SubElement(source_element, 'srcattribute', type='REFN', value=self.fid)
-
-
-class Fact:
- """GEDCOM Fact class
- :param data: FS Fact data
- :param tree: a tree object
- """
-
- counter = {}
-
- def __init__(self, data=None, tree: Optional['Tree']=None, num_prefix=None):
- self.value = self.type = self.date = None
- self.date_type = None
- self.place: Optional[Place] = None
- self.note = None
- self._handle: Optional[str] = None
- if data:
- if "value" in data:
- self.value = data["value"]
- if "type" in data:
- self.type = data["type"]
- self.fs_type = self.type
- if self.type in FACT_EVEN:
- self.type = tree.fs._(FACT_EVEN[self.type])
- elif self.type[:6] == "data:,":
- self.type = unquote(self.type[6:])
- elif self.type not in FACT_TAGS:
- self.type = None
-
-
- self.num_prefix = f'{num_prefix}_{FACT_TAGS[self.type]}' if num_prefix and self.type in FACT_TAGS else num_prefix
- Fact.counter[self.num_prefix or 'None'] = Fact.counter.get(self.num_prefix or 'None', 0) + 1
- self.num = Fact.counter[self.num_prefix or 'None']
- if data:
- if "date" in data:
- if 'formal' in data['date']:
- self.date = data['date']['formal'].split('+')[-1].split('/')[0]
- if data['date']['formal'].startswith('A+'):
- self.date_type = 'about'
- if data['date']['formal'].startswith('/+'):
- self.date_type = 'before'
- if data['date']['formal'].endswith('/'):
- self.date_type = 'after'
- else:
- self.date = data["date"]["original"]
- if "place" in data:
- place = data["place"]
- place_name = place["original"]
- place_id = place["description"][1:] if "description" in place and place["description"][1:] in tree.places else None
- self.place = tree.ensure_place(place_name, place_id)
- if "changeMessage" in data["attribution"]:
- self.note = Note(
- data["attribution"]["changeMessage"],
- tree,
- num_prefix='E' + self.num_prefix if self.num_prefix else None,
- note_type='Event Note',
- )
- if self.type == "http://gedcomx.org/Death" and not (
- self.date or self.place
- ):
- self.value = "Y"
-
- if tree:
- tree.facts.add(self)
-
-
- @property
- def id(self):
- return f'{self.num_prefix}_{self.num}' if self.num_prefix != None else self.num
-
-
- @property
- def handle(self):
- if not self._handle:
- self._handle = '_' + os.urandom(10).hex()
-
- return self._handle
-
- def printxml(self, parent_element):
-
- event_element = ET.SubElement(
- parent_element,
- 'event',
- handle=self.handle,
- # change='1720382301',
- id=self.id
- )
-
- ET.SubElement(event_element, 'type').text = (
- unquote_plus(self.type[len('http://gedcomx.org/'):])
- if self.type.startswith('http://gedcomx.org/')
- else self.type
- )
- # FACT_TAGS.get(self.type, self.type)
- if self.date:
- params={
- 'val': self.date,
- }
- if self.date_type is not None:
- params['type'] = self.date_type
- ET.SubElement(event_element, 'datestr', **params)
- if self.place:
- ET.SubElement(event_element, 'place', hlink=self.place.handle)
- if self.note:
- ET.SubElement(event_element, 'noteref', hlink=self.note.handle)
-
- def print(self, file=sys.stdout):
- """print Fact in GEDCOM format
- the GEDCOM TAG depends on the type, defined in FACT_TAGS
- """
- if self.type in FACT_TAGS:
- tmp = "1 " + FACT_TAGS[self.type]
- if self.value:
- tmp += " " + self.value
- file.write(cont(tmp))
- elif self.type:
- file.write("1 EVEN\n2 TYPE %s\n" % self.type)
- if self.value:
- file.write(cont("2 NOTE Description: " + self.value))
- else:
- return
- if self.date:
- file.write(cont("2 DATE " + self.date))
- if self.place:
- self.place.print(file, 2)
- if self.map:
- latitude, longitude = self.map
- file.write("3 MAP\n4 LATI %s\n4 LONG %s\n" % (latitude, longitude))
- if self.note:
- self.note.link(file, 2)
-
-
-class Memorie:
- """GEDCOM Memorie class
- :param data: FS Memorie data
- """
-
- def __init__(self, data=None):
- self.description = self.url = None
- if data and "links" in data:
- self.url = data["about"]
- if "titles" in data:
- self.description = data["titles"][0]["value"]
- if "descriptions" in data:
- self.description = (
- "" if not self.description else self.description + "\n"
- ) + data["descriptions"][0]["value"]
-
- def print(self, file=sys.stdout):
- """print Memorie in GEDCOM format"""
- file.write("1 OBJE\n2 FORM URL\n")
- if self.description:
- file.write(cont("2 TITL " + self.description))
- if self.url:
- file.write(cont("2 FILE " + self.url))
-
-
-NAME_MAP = {
- "preferred" : 'Preeferred Name',
- "nickname" : 'Nickname',
- "birthname": 'Birth Name',
- "aka": 'Also Known As',
- "married": 'Married Name',
-}
-
-class Name:
- """GEDCOM Name class
- :param data: FS Name data
- :param tree: a Tree object
- """
-
- def __init__(self, data=None, tree=None, owner_fis=None, kind=None, alternative: bool=False):
- self.given = ""
- self.surname = ""
- self.prefix = None
- self.suffix = None
- self.note = None
- self.alternative = alternative
- self.owner_fis = owner_fis
- self.kind = kind
- if data:
- if "parts" in data["nameForms"][0]:
- for z in data["nameForms"][0]["parts"]:
- if z["type"] == "http://gedcomx.org/Given":
- self.given = z["value"]
- if z["type"] == "http://gedcomx.org/Surname":
- self.surname = z["value"]
- if z["type"] == "http://gedcomx.org/Prefix":
- self.prefix = z["value"]
- if z["type"] == "http://gedcomx.org/Suffix":
- self.suffix = z["value"]
- if "changeMessage" in data["attribution"]:
- self.note = Note(
- data["attribution"]["changeMessage"],
- tree,
- num_prefix=f'NAME_{owner_fis}_{kind}',
- note_type='Name Note',
- )
-
- def printxml(self, parent_element):
- params = {}
- if self.kind is not None:
- params['type'] = NAME_MAP.get(self.kind, self.kind)
- if self.alternative:
- params['alt'] = '1'
- person_name = ET.SubElement(parent_element, 'name', **params)
- ET.SubElement(person_name, 'first').text = self.given
- ET.SubElement(person_name, 'surname').text = self.surname
- # TODO prefix / suffix
-
-
- def print(self, file=sys.stdout, typ=None):
- """print Name in GEDCOM format
- :param typ: type for additional names
- """
- tmp = "1 NAME %s /%s/" % (self.given, self.surname)
- if self.suffix:
- tmp += " " + self.suffix
- file.write(cont(tmp))
- if typ:
- file.write("2 TYPE %s\n" % typ)
- if self.prefix:
- file.write("2 NPFX %s\n" % self.prefix)
- if self.note:
- self.note.link(file, 2)
-
-
-
-class Place:
- """GEDCOM Place class
- :param name: the place name
- :param tree: a Tree object
- :param num: the GEDCOM identifier
- """
-
- counter = 0
-
- def __init__(
- self,
- id: str,
- name: str,
- type: Optional[str]=None,
- parent: Optional['Place']=None,
- latitude: Optional[float]=None,
- longitude: Optional[float]=None):
- self._handle = None
- self.name = name
- self.type = type
- self.id = id
- self.parent = parent
- self.latitude = latitude
- self.longitude = longitude
-
- @property
- def handle(self):
- if not self._handle:
- self._handle = '_' + os.urandom(10).hex()
-
- return self._handle
-
-
- def print(self, file=sys.stdout, indentation=0):
- """print Place in GEDCOM format"""
- file.write("%d @P%s@ PLAC %s\n" % (indentation, self.num, self.name))
-
- def printxml(self, parent_element):
-
-
- # <placeobj handle="_fac310617a8744e1d62f3d0dafe" change="1723223127" id="P0000" type="Country">
- # <pname value="Magyarország"/>
- # </placeobj>
- # <placeobj handle="_fac310962e15149e8244c2ccade" change="1723223149" id="P0001" type="County">
- # <pname value="Fejér"/>
- # <placeref hlink="_fac310617a8744e1d62f3d0dafe"/>
- # </placeobj>
- place_element = ET.SubElement(
- parent_element,
- 'placeobj',
- handle=self.handle,
- # change='1720382307',
- id=self.id,
- type=self.type or 'Unknown'
- )
- # ET.SubElement(place_element, 'ptitle').text = self.name
- ET.SubElement(place_element, 'pname', value=self.name)
- if self.parent:
- ET.SubElement(place_element, 'placeref', hlink=self.parent.handle)
- if self.latitude and self.longitude:
- ET.SubElement(place_element, 'coord', long=str(self.longitude), lat=str(self.latitude))
-
-class Ordinance:
- """GEDCOM Ordinance class
- :param data: FS Ordinance data
- """
-
- def __init__(self, data=None):
- self.date = self.temple_code = self.status = self.famc = None
- if data:
- if "completedDate" in data:
- self.date = data["completedDate"]
- if "completedTemple" in data:
- self.temple_code = data["completedTemple"]["code"]
- self.status = data["status"]
-
- def print(self, file=sys.stdout):
- """print Ordinance in Gecom format"""
- if self.date:
- file.write(cont("2 DATE " + self.date))
- if self.temple_code:
- file.write("2 TEMP %s\n" % self.temple_code)
- if self.status in ORDINANCES_STATUS:
- file.write("2 STAT %s\n" % ORDINANCES_STATUS[self.status])
- if self.famc:
- file.write("2 FAMC @F%s@\n" % self.famc.num)
-
-class Citation:
-
- def __init__(self, data: Dict[str, Any], source: Source):
- self._handle = None
- self.id = data["id"]
- self.source = source
- self.message = (
- data["attribution"]["changeMessage"]
- if "changeMessage" in data["attribution"]
- else None
- )
- # TODO create citation note out of this.
- self.modified = data['attribution']['modified']
-
-
- @property
- def handle(self):
- if not self._handle:
- self._handle = '_' + os.urandom(10).hex()
-
- return self._handle
-
- def printxml(self, parent_element: Element):
-
-# <citation handle="_fac4a72a01b1681293ea1ee8d9" change="1723265781" id="C0000">
-# <dateval val="1998-05-03"/>
-# <confidence>2</confidence>
-# <noteref hlink="_fac4a71ac2c6c5749abd6a0bd72"/>
-# <sourceref hlink="_fac4a70566329a02afcc10731f5"/>
-# </citation>
- citation_element = ET.SubElement(
- parent_element,
- 'citation',
- handle=self.handle,
- change=str(int(self.modified / 1000)),
- id='C' + str(self.id)
- )
- ET.SubElement(citation_element, 'confidence').text = '2'
- ET.SubElement(citation_element, 'sourceref', hlink=self.source.handle)
-
-
-class Indi:
- """GEDCOM individual class
- :param fid' FamilySearch id
- :param tree: a tree object
- :param num: the GEDCOM identifier
- """
-
- counter = 0
-
- def __init__(self, fid: str, tree: 'Tree', num=None):
- self._handle = None
- if num:
- self.num = num
- else:
- Indi.counter += 1
- self.num = Indi.counter
- self.fid = fid
- self.tree = tree
- self.famc: Set['Fam'] = set()
- self.fams: Set['Fam'] = set()
- # self.famc_fid = set()
- # self.fams_fid = set()
- # self.famc_num = set()
- # self.fams_num = set()
- # self.famc_ids = set()
- # self.fams_ids = set()
- self.name: Optional[Name] = None
- self.gender = None
- self.living = None
- self.parents: Set[Tuple[str, str]] = set() # (father_id, mother_id)
- self.spouses: Set[Tuple[str, str, str]] = set() # (person1, person2, relfid)
- self.children: Set[Tuple[str, str, str]] = set() # (father_id, mother_id, child_id)
- self.baptism = self.confirmation = self.initiatory = None
- self.endowment = self.sealing_child = None
- self.nicknames: Set[Name] = set()
- self.birthnames: Set[Name] = set()
- self.married: Set[Name] = set()
- self.aka: Set[Name] = set()
- self.facts: Set[Fact] = set()
- self.notes: Set[Note] = set()
- # self.sources: Set[Source] = set()
- self.citations: Set[Citation] = set()
- self.memories = set()
-
- def add_data(self, data):
- """add FS individual data"""
- if data:
- self.living = data["living"]
- for x in data["names"]:
- alt = not x.get('preferred', False)
- if x["type"] == "http://gedcomx.org/Nickname":
- self.nicknames.add(Name(x, self.tree, self.fid, "nickname", alt))
- elif x["type"] == "http://gedcomx.org/BirthName":
- self.birthnames.add(Name(x, self.tree, self.fid, "birthname", alt))
- elif x["type"] == "http://gedcomx.org/AlsoKnownAs":
- self.aka.add(Name(x, self.tree, self.fid, "aka", alt))
- elif x["type"] == "http://gedcomx.org/MarriedName":
- self.married.add(Name(x, self.tree, self.fid, "married", alt))
- else:
- print('Unknown name type: ' + x.get('type'), file=sys.stderr)
- raise 'Unknown name type'
- if "gender" in data:
- if data["gender"]["type"] == "http://gedcomx.org/Male":
- self.gender = "M"
- elif data["gender"]["type"] == "http://gedcomx.org/Female":
- self.gender = "F"
- elif data["gender"]["type"] == "http://gedcomx.org/Unknown":
- self.gender = "U"
- if "facts" in data:
- for x in data["facts"]:
- if x["type"] == "http://familysearch.org/v1/LifeSketch":
- self.notes.add(
- Note(
- "=== %s ===\n%s"
- % (self.tree.fs._("Life Sketch"), x.get("value", "")),
- self.tree,
- num_prefix=f'INDI_{self.fid}',
- note_type='Person Note',
- )
- )
- else:
- self.facts.add(Fact(x, self.tree, num_prefix=f'INDI_{self.fid}'))
- if "sources" in data:
- sources = self.tree.fs.get_url(
- "/platform/tree/persons/%s/sources" % self.fid
- )
- if sources:
- quotes = dict()
- for quote in sources["persons"][0]["sources"]:
- source_id = quote["descriptionId"]
- source_data = next(
- (s for s in sources['sourceDescriptions'] if s['id'] == source_id),
- None,
- )
- source = self.tree.ensure_source(source_data)
- if source:
- citation = self.tree.ensure_citation(quote, source)
- self.citations.add(citation)
-
- for evidence in data.get("evidence", []):
- memory_id, *_ = evidence["id"].partition("-")
- url = "/platform/memories/memories/%s" % memory_id
- memorie = self.tree.fs.get_url(url)
- if memorie and "sourceDescriptions" in memorie:
- for x in memorie["sourceDescriptions"]:
- if x["mediaType"] == "text/plain":
- text = "\n".join(
- val.get("value", "")
- for val in x.get("titles", [])
- + x.get("descriptions", [])
- )
- self.notes.add(
- Note(
- text,
- self.tree,
- num_prefix=f'INDI_{self.fid}',
- note_type='Person Note',
- ))
- else:
- self.memories.add(Memorie(x))
-
- def add_fams(self, fam: 'Fam'):
- """add family fid (for spouse or parent)"""
- self.fams.add(fam)
-
- def add_famc(self, fam: 'Fam'):
- """add family fid (for child)"""
- self.famc.add(fam)
-
- def get_notes(self):
- """retrieve individual notes"""
- print(f'Getting Notes for {self.fid}', file=sys.stderr)
- notes = self.tree.fs.get_url("/platform/tree/persons/%s/notes" % self.fid)
- if notes:
- for n in notes["persons"][0]["notes"]:
- text_note = "=== %s ===\n" % n["subject"] if "subject" in n else ""
- text_note += n["text"] + "\n" if "text" in n else ""
- self.notes.add(
- Note(
- text_note,
- self.tree,
- num_prefix=f'INDI_{self.fid}',
- note_type='Person Note',
- ))
-
- def get_ordinances(self):
- """retrieve LDS ordinances
- need a LDS account
- """
- res = []
- famc = False
- if self.living:
- return res, famc
- url = "/service/tree/tree-data/reservations/person/%s/ordinances" % self.fid
- data = self.tree.fs.get_url(url, {})
- if data:
- for key, o in data["data"].items():
- if key == "baptism":
- self.baptism = Ordinance(o)
- elif key == "confirmation":
- self.confirmation = Ordinance(o)
- elif key == "initiatory":
- self.initiatory = Ordinance(o)
- elif key == "endowment":
- self.endowment = Ordinance(o)
- elif key == "sealingsToParents":
- for subo in o:
- self.sealing_child = Ordinance(subo)
- relationships = subo.get("relationships", {})
- father = relationships.get("parent1Id")
- mother = relationships.get("parent2Id")
- if father and mother:
- famc = father, mother
- elif key == "sealingsToSpouses":
- res += o
- return res, famc
-
- def get_contributors(self):
- """retrieve contributors"""
- temp = set()
- url = "/platform/tree/persons/%s/changes" % self.fid
- data = self.tree.fs.get_url(url, {"Accept": "application/x-gedcomx-atom+json"})
- if data:
- for entries in data["entries"]:
- for contributors in entries["contributors"]:
- temp.add(contributors["name"])
- if temp:
- text = "=== %s ===\n%s" % (
- self.tree.fs._("Contributors"),
- "\n".join(sorted(temp)),
- )
- for n in self.tree.notes:
- if n.text == text:
- self.notes.add(n)
- return
- self.notes.add(Note(text, self.tree, num_prefix=f'INDI_{self.fid}_CONTRIB', note_type='Contribution Note'))
-
- @property
- def id(self):
- return self.fid or self.num
-
-
- @property
- def handle(self):
- if not self._handle:
- self._handle = '_' + os.urandom(10).hex()
-
- return self._handle
-
- def printxml(self, parent_element):
-
- # <person handle="_fa593c2779e5ed1c947416cba9e" change="1720382301" id="IL43B-D2H">
- # <gender>M</gender>
- # <name type="Birth Name">
- # <first>József</first>
- # <surname>Cser</surname>
- # <noteref hlink="_fa593c2779f7c527e3afe4623b9"/>
- # </name>
- # <eventref hlink="_fa593c277a0712aa4241bbf47db" role="Primary"/>
- # <attribute type="_FSFTID" value="L43B-D2H"/>
- # <childof hlink="_fa593c277af212e6c1f9f44bc4a"/>
- # <parentin hlink="_fa593c277af72c83e0e3fbf6ed2"/>
- # <citationref hlink="_fa593c277b7715371c26d1b0a81"/>
- # </person>
- person = ET.SubElement(parent_element,
- 'person',
- handle=self.handle,
- # change='1720382301',
- id='I' + str(self.id))
- if self.fid:
- ET.SubElement(person, 'attribute', type='_FSFTID', value=self.fid)
-
- if self.name:
- self.name.printxml(person)
- for name in self.nicknames | self.birthnames | self.aka | self.married:
- name.printxml(person)
-
- gender = ET.SubElement(person, 'gender')
- gender.text = self.gender
-
- if self.fams:
- for fam in self.fams:
- ET.SubElement(person, 'parentin', hlink=fam.handle)
-
- if self.famc:
- for fam in self.famc:
- ET.SubElement(person, 'childof', hlink=fam.handle)
-
-
- ET.SubElement(person, 'attribute', type="_FSFTID", value=self.fid)
-
-
- for fact in self.facts:
- ET.SubElement(person, 'eventref', hlink=fact.handle, role='Primary')
-
- for citation in self.citations:
- ET.SubElement(person, 'citationref', hlink=citation.handle)
-
- for note in self.notes:
- ET.SubElement(person, 'noteref', hlink=note.handle)
-
- # <noteref hlink="_fac4a686369713d9cd55159ada9"/>
- # <citationref hlink="_fac4a72a01b1681293ea1ee8d9"/>
-
-
- def print(self, file=sys.stdout):
- """print individual in GEDCOM format"""
- file.write("0 @I%s@ INDI\n" % self.id)
- if self.name:
- self.name.print(file)
- for o in self.nicknames:
- file.write(cont("2 NICK %s %s" % (o.given, o.surname)))
- for o in self.birthnames:
- o.print(file)
- for o in self.aka:
- o.print(file, "aka")
- for o in self.married:
- o.print(file, "married")
- if self.gender:
- file.write("1 SEX %s\n" % self.gender)
- for o in self.facts:
- o.print(file)
- for o in self.memories:
- o.print(file)
- if self.baptism:
- file.write("1 BAPL\n")
- self.baptism.print(file)
- if self.confirmation:
- file.write("1 CONL\n")
- self.confirmation.print(file)
- if self.initiatory:
- file.write("1 WAC\n")
- self.initiatory.print(file)
- if self.endowment:
- file.write("1 ENDL\n")
- self.endowment.print(file)
- if self.sealing_child:
- file.write("1 SLGC\n")
- self.sealing_child.print(file)
- for fam in self.fams:
- file.write("1 FAMS @F%s@\n" % fam.id)
- for fam in self.famc:
- file.write("1 FAMC @F%s@\n" % fam.id)
- # print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr)
- # for num in self.fams_ids:
- # print(f'Famc Ids: {self.famc_ids}', file=sys.stderr)
- # for num in self.famc_ids:
- # file.write("1 FAMC @F%s@\n" % num)
- file.write("1 _FSFTID %s\n" % self.fid)
- for o in self.notes:
- o.link(file)
- for source, quote in self.sources:
- source.link(file, 1)
- if quote:
- file.write(cont("2 PAGE " + quote))
-
-
-class Fam:
- """GEDCOM family class
- :param husb: husbant fid
- :param wife: wife fid
- :param tree: a Tree object
- :param num: a GEDCOM identifier
- """
-
- counter = 0
-
- def __init__(self, husband: Indi | None, wife: Indi | None, tree: 'Tree'):
- self._handle = None
- self.num = Fam.gen_id(husband, wife)
- self.fid = None
- self.husband = husband
- self.wife = wife
- self.tree = tree
- self.children: Set[Indi] = set()
- self.facts: Set[Fact] = set()
- self.sealing_spouse = None
- self.notes = set()
- self.sources = set()
-
- @property
- def handle(self):
- if not self._handle:
- self._handle = '_' + os.urandom(10).hex()
-
- return self._handle
-
- @staticmethod
- def gen_id(husband: Indi | None, wife: Indi | None) -> str:
- if husband and wife:
- return f'FAM_{husband.id}-{wife.id}'
- elif husband:
- return f'FAM_{husband.id}-UNK'
- elif wife:
- return f'FAM_UNK-{wife.id}'
- else:
- Fam.counter += 1
- return f'FAM_UNK-UNK-{Fam.counter}'
-
- def add_child(self, child: Indi | None):
- """add a child fid to the family"""
- if child is not None:
- self.children.add(child)
-
- def add_marriage(self, fid: str):
- """retrieve and add marriage information
- :param fid: the marriage fid
- """
- if not self.fid:
- self.fid = fid
- url = "/platform/tree/couple-relationships/%s" % self.fid
- data = self.tree.fs.get_url(url)
- if data:
- if "facts" in data["relationships"][0]:
- for x in data["relationships"][0]["facts"]:
- self.facts.add(Fact(x, self.tree, num_prefix=f'FAM_{self.fid}'))
- if "sources" in data["relationships"][0]:
- quotes = dict()
- for x in data["relationships"][0]["sources"]:
- quotes[x["descriptionId"]] = (
- x["attribution"]["changeMessage"]
- if "changeMessage" in x["attribution"]
- else None
- )
- new_sources = quotes.keys() - self.tree.sources.keys()
- if new_sources:
- sources = self.tree.fs.get_url(
- "/platform/tree/couple-relationships/%s/sources" % self.fid
- )
- for source in sources["sourceDescriptions"]:
- if (
- source["id"] in new_sources
- and source["id"] not in self.tree.sources
- ):
- self.tree.sources[source["id"]] = Source(
- source, self.tree
- )
- for source_fid in quotes:
- self.sources.add(
- (self.tree.sources[source_fid], quotes[source_fid])
- )
-
- def get_notes(self):
- """retrieve marriage notes"""
- if self.fid:
- notes = self.tree.fs.get_url(
- "/platform/tree/couple-relationships/%s/notes" % self.fid
- )
- if notes:
- for n in notes["relationships"][0]["notes"]:
- text_note = "=== %s ===\n" % n["subject"] if "subject" in n else ""
- text_note += n["text"] + "\n" if "text" in n else ""
- self.notes.add(Note(text_note, self.tree, num_prefix=f'FAM_{self.fid}', note_type='Marriage Note'))
-
- def get_contributors(self):
- """retrieve contributors"""
- if self.fid:
- temp = set()
- url = "/platform/tree/couple-relationships/%s/changes" % self.fid
- data = self.tree.fs.get_url(
- url, {"Accept": "application/x-gedcomx-atom+json"}
- )
- if data:
- for entries in data["entries"]:
- for contributors in entries["contributors"]:
- temp.add(contributors["name"])
- if temp:
- text = "=== %s ===\n%s" % (
- self.tree.fs._("Contributors"),
- "\n".join(sorted(temp)),
- )
- for n in self.tree.notes:
- if n.text == text:
- self.notes.add(n)
- return
- self.notes.add(Note(text, self.tree, num_prefix=f'FAM_{self.fid}_CONTRIB', note_type='Contribution Note'))
-
- @property
- def id(self):
- return self.num
-
- def printxml(self, parent_element):
- # <family handle="_fa593c277af212e6c1f9f44bc4a" change="1720382301" id="F9MKP-K92">
- # <rel type="Unknown"/>
- # <father hlink="_fa593c277f14dc6db9ab19cbe09"/>
- # <mother hlink="_fa593c277cd4af15983d7064c59"/>
- # <childref hlink="_fa593c279e1466787c923487b98"/>
- # <attribute type="_FSFTID" value="9MKP-K92"/>
- # </family>
- family = ET.SubElement(parent_element,
- 'family',
- handle=self.handle,
- # change='1720382301',
- id=self.id)
- ET.SubElement(family, 'rel', type='Unknown')
- if self.husband:
- ET.SubElement(family, 'father', hlink=self.husband.handle)
- if self.wife:
- ET.SubElement(family, 'mother', hlink=self.wife.handle)
- for child in self.children:
- ET.SubElement(family, 'childref', hlink=child.handle)
- for fact in self.facts:
- ET.SubElement(family, 'eventref', hlink=fact.handle, role='Primary')
-
- def print(self, file=sys.stdout):
- """print family information in GEDCOM format"""
- file.write("0 @F%s@ FAM\n" % self.id)
- if self.husband:
- file.write("1 HUSB @I%s@\n" % self.husband.id)
- if self.wife:
- file.write("1 WIFE @I%s@\n" % self.wife.id)
- for child in self.children:
- file.write("1 CHIL @I%s@\n" % child.id)
- for o in self.facts:
- o.print(file)
- if self.sealing_spouse:
- file.write("1 SLGS\n")
- self.sealing_spouse.print(file)
- if self.fid:
- file.write("1 _FSFTID %s\n" % self.fid)
- for o in self.notes:
- o.link(file)
- for source, quote in self.sources:
- source.link(file, 1)
- if quote:
- file.write(cont("2 PAGE " + quote))
-
-
-class Tree:
- """family tree class
- :param fs: a Session object
- """
-
- def __init__(self, fs: Optional[requests.Session]=None, exclude: List[str]=None, geonames_key=None):
- self.fs = fs
- self.geonames_key = geonames_key
- self.indi: Dict[str, Indi] = dict()
- self.fam: Dict[str, Fam] = dict()
- self.notes = list()
- self.facts: Set[Fact] = set()
- self.sources: Dict[str, Source] = dict()
- self.citations: Dict[str, Citation] = dict()
- self.places: List[Place] = []
- self.places_by_names: Dict[str, Place] = dict()
- self.place_cache: Dict[str, Tuple[float, float]] = dict()
- self.display_name = self.lang = None
- self.exclude: List[str] = exclude or []
- self.place_counter = 0
- if fs:
- self.display_name = fs.display_name
- self.lang = babelfish.Language.fromalpha2(fs.lang).name
-
- self.geosession = CachedSession('http_cache', backend='filesystem', expire_after=86400)
-
- def add_indis(self, fids_in: List[str]):
- """add individuals to the family tree
- :param fids: an iterable of fid
- """
- fids = []
- for fid in fids_in:
- if fid not in self.exclude:
- fids.append(fid)
- else:
- print(
- "Excluding %s from the family tree" % fid, file=sys.stderr
- )
-
- async def add_datas(loop, data):
- futures = set()
- for person in data["persons"]:
- self.indi[person["id"]] = Indi(person["id"], self)
- futures.add(
- loop.run_in_executor(None, self.indi[person["id"]].add_data, person)
- )
- for future in futures:
- await future
-
- new_fids = [fid for fid in fids if fid and fid not in self.indi]
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- while new_fids:
- data = self.fs.get_url(
- "/platform/tree/persons?pids=" + ",".join(new_fids[:MAX_PERSONS])
- )
- if data:
- if "places" in data:
- for place in data["places"]:
- if place["id"] not in self.place_cache:
- self.place_cache[place["id"]] = (
- place["latitude"],
- place["longitude"],
- )
- loop.run_until_complete(add_datas(loop, data))
- if "childAndParentsRelationships" in data:
- for rel in data["childAndParentsRelationships"]:
- father: str | None = rel.get("parent1", {}).get("resourceId")
- mother: str | None = rel.get("parent2", {}).get("resourceId")
- child: str | None = rel.get("child", {}).get("resourceId")
- if child in self.indi:
- self.indi[child].parents.add((father, mother))
- if father in self.indi:
- self.indi[father].children.add((father, mother, child))
- if mother in self.indi:
- self.indi[mother].children.add((father, mother, child))
- if "relationships" in data:
- for rel in data["relationships"]:
- if rel["type"] == "http://gedcomx.org/Couple":
- person1 = rel["person1"]["resourceId"]
- person2 = rel["person2"]["resourceId"]
- relfid = rel["id"]
- if person1 in self.indi:
- self.indi[person1].spouses.add(
- (person1, person2, relfid)
- )
- if person2 in self.indi:
- self.indi[person2].spouses.add(
- (person1, person2, relfid)
- )
- new_fids = new_fids[MAX_PERSONS:]
-
- def ensure_source(self, source_data: Dict[str, Any]) -> Source:
- if source_data["id"] not in self.sources:
- self.sources[source_data["id"]] = Source(source_data, self)
- return self.sources.get(source_data["id"])
-
- def ensure_citation(self, data: Dict[str, Any], source: Source) -> Citation:
- citation_id = data["id"]
- if citation_id not in self.citations:
- self.citations[citation_id] = Citation(data, source)
- return self.citations[citation_id]
-
- def ensure_family(self, father: Optional['Indi'], mother: Optional['Indi']) -> Fam:
- fam_id = Fam.gen_id(father, mother)
- if fam_id not in self.fam:
- self.fam[fam_id] = Fam(father, mother, self)
- return self.fam[fam_id]
-
-
- def place_by_geoname_id(self, id: str) -> Optional[Place]:
- for place in self.places:
- if place.id == id:
- return place
- return None
-
- def get_by_geonames_id(self, geonames_id: str) -> Place:
- print('Fetching place hierarchy for', geonames_id, file=sys.stderr)
- hierarchy = geocoder.geonames(
- geonames_id,
- key=self.geonames_key,
- lang=['hu', 'en', 'de'],
- method='hierarchy',
- session=self.geosession,
- )
-
- if hierarchy and hierarchy.ok:
- last_place = None
- for item in hierarchy.geojson.get('features', []):
- properties = item.get('properties', {})
- code = properties.get('code')
-
- if code in ['AREA', 'CONT']:
- continue
-
- print('Properties', properties, file=sys.stderr)
- id = 'GEO' + str(properties['geonames_id'])
- place = self.place_by_geoname_id(id)
- if place is None:
- place = Place(
- id,
- properties.get('address'),
- GEONAME_FEATURE_MAP.get(code, 'Unknown'),
- last_place,
- properties.get('lat'),
- properties.get('lng')
- )
- self.places.append(place)
- last_place = place
- return last_place
-
- @property
- def _next_place_counter(self):
- self.place_counter += 1
- return self.place_counter
-
-
- def ensure_place(self, place_name: str, fid: Optional[str] = None, coord: Optional[Tuple[float, float]] = None) -> Place:
- if place_name not in self.places_by_names:
- place = None
- if self.geonames_key:
- print('Fetching place', place_name, file=sys.stderr)
- geoname_record = geocoder.geonames(
- place_name,
- key=self.geonames_key,
- session=self.geosession,
- )
- if geoname_record and geoname_record.ok:
- place = self.get_by_geonames_id(geoname_record.geonames_id)
- if place is None:
- coord = self.place_cache.get(fid) if coord is None else coord
- place = Place(
- 'PFSID' + fid if fid is not None else 'P' + str(self._next_place_counter),
- place_name,
- latitude=coord[0] if coord is not None else None,
- longitude=coord[1] if coord is not None else None
- )
- self.places.append(place)
- self.places_by_names[place_name] = place
- return self.places_by_names[place_name]
-
- # def add_fam(self, father, mother):
- # """add a family to the family tree
- # :param father: the father fid or None
- # :param mother: the mother fid or None
- # """
- # if (father, mother) not in self.fam:
- # self.fam[(father, mother)] = Fam(father, mother, self)
-
- def add_trio(self, father: Indi | None, mother: Indi | None, child: Indi | None):
- """add a children relationship to the family tree
- :param father: the father fid or None
- :param mother: the mother fid or None
- :param child: the child fid or None
- """
- fam = self.ensure_family(father, mother)
- if child is not None:
- fam.add_child(child)
- child.add_famc(fam)
-
- if father is not None:
- father.add_fams(fam)
- if mother is not None:
- mother.add_fams(fam)
-
- def add_parents(self, fids: Set[str]):
- """add parents relationships
- :param fids: a set of fids
- """
- parents = set()
- for fid in fids & self.indi.keys():
- for couple in self.indi[fid].parents:
- parents |= set(couple)
- if parents:
- self.add_indis(parents)
- for fid in fids & self.indi.keys():
- for father, mother in self.indi[fid].parents:
- if (
- mother in self.indi
- and father in self.indi
- or not father
- and mother in self.indi
- or not mother
- and father in self.indi
- ):
- self.add_trio(
- self.indi.get(father),
- self.indi.get(mother),
- self.indi.get(fid),
- )
- return set(filter(None, parents))
-
- def add_spouses(self, fids: Set[str]):
- """add spouse relationships
- :param fids: a set of fid
- """
-
- async def add(loop, rels: Set[Tuple[str, str, str]]):
- futures = set()
- for father, mother, relfid in rels:
- if father in self.exclude or mother in self.exclude:
- continue
- fam_id = Fam.gen_id(self.indi[father], self.indi[mother])
- if self.fam.get(fam_id):
- futures.add(
- loop.run_in_executor(
- None, self.fam[fam_id].add_marriage, relfid
- )
- )
- for future in futures:
- await future
-
- rels: Set[Tuple[str, str, str]] = set()
- for fid in fids & self.indi.keys():
- rels |= self.indi[fid].spouses
- loop = asyncio.get_event_loop()
- if rels:
- self.add_indis(
- set.union(*({father, mother} for father, mother, relfid in rels))
- )
- for father, mother, _ in rels:
- if father in self.indi and mother in self.indi:
- father_indi = self.indi[father]
- mother_indi = self.indi[mother]
- fam = self.ensure_family(father_indi, mother_indi)
- father_indi.add_fams(fam)
- mother_indi.add_fams(fam)
-
- loop.run_until_complete(add(loop, rels))
-
- def add_children(self, fids):
- """add children relationships
- :param fids: a set of fid
- """
- rels: Set[Tuple[str, str, str]] = set()
- for fid in fids & self.indi.keys():
- rels |= self.indi[fid].children if fid in self.indi else set()
- children = set()
- if rels:
- self.add_indis(set.union(*(set(rel) for rel in rels)))
- for father, mother, child in rels:
- if child in self.indi and (
- mother in self.indi
- and father in self.indi
- or not father
- and mother in self.indi
- or not mother
- and father in self.indi
- ):
- self.add_trio(
- self.indi.get(father),
- self.indi.get(mother),
- self.indi.get(child),
- )
- children.add(child)
- return children
-
- def add_ordinances(self, fid):
- """retrieve ordinances
- :param fid: an individual fid
- """
- if fid in self.indi:
- ret, famc = self.indi[fid].get_ordinances()
- if famc and famc in self.fam:
- self.indi[fid].sealing_child.famc = self.fam[famc]
- for o in ret:
- spouse_id = o["relationships"]["spouseId"]
- if (fid, spouse_id) in self.fam:
- self.fam[fid, spouse_id].sealing_spouse = Ordinance(o)
- elif (spouse_id, fid) in self.fam:
- self.fam[spouse_id, fid].sealing_spouse = Ordinance(o)
-
- def reset_num(self):
- """reset all GEDCOM identifiers"""
- # for husb, wife in self.fam:
- # self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None
- # self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None
- # self.fam[(husb, wife)].chil_num = set(
- # self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid
- # )
- # for fid in self.indi:
- # self.indi[fid].famc_num = set(
- # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid
- # )
- # self.indi[fid].fams_num = set(
- # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid
- # )
- # self.indi[fid].famc_ids = set(
- # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid
- # )
- # self.indi[fid].fams_ids = set(
- # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid
- # )
-
- def printxml(self, file: BinaryIO):
-
-# root = ET.Element("root")
-# doc = ET.SubElement(root, "doc")
-
-# ET.SubElement(doc, "field1", name="blah").text = "some value1"
-# ET.SubElement(doc, "field2", name="asdfasd").text = "some vlaue2"
-
-# tree = ET.ElementTree(root)
-# tree.write("filename.xml")
-
-# <?xml version="1.0" encoding="UTF-8"?>
-# <!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN"
-# "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
-# <database xmlns="http://gramps-project.org/xml/1.7.1/">
-# <header
-# <created date="2024-07-07" version="5.2.2"/>
-# <researcher>
-# <resname>Barnabás Südy</resname>
-# </researcher>
-# </header>
-
- root = ET.Element("database", xmlns="http://gramps-project.org/xml/1.7.1/")
-
- header = ET.SubElement(root, "header")
- ET.SubElement(header, "created", date=datetime.strftime(datetime.now(), "%Y-%m-%d"), version="5.2.2")
- researcher = ET.SubElement(header, "researcher")
- resname = ET.SubElement(researcher, "resname")
- resname.text = self.display_name
-
- people = ET.SubElement(root, "people")
- for indi in sorted(self.indi.values(), key=lambda x: x.num):
- indi.printxml(people)
-
- families = ET.SubElement(root, "families")
- for fam in sorted(self.fam.values(), key=lambda x: x.num):
- fam.printxml(families)
-
- events = ET.SubElement(root, "events")
- for fact in self.facts:
- fact.printxml(events)
-
- notes = ET.SubElement(root, "notes")
- for note in sorted(self.notes, key=lambda x: x.id):
- note.printxml(notes)
-
- places = ET.SubElement(root, "places")
- for place in self.places:
- place.printxml(places)
-
- sources = ET.SubElement(root, "sources")
- for source in self.sources.values():
- source.printxml(sources)
-
- citations = ET.SubElement(root, "citations")
- for citation in self.citations.values():
- citation.printxml(citations)
-
- tree = ET.ElementTree(root)
-
- doctype='<!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN" "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">'
- file.write(doctype.encode('utf-8'))
- tree.write(file, 'utf-8')
-
-
- def print(self, file=sys.stdout):
- """print family tree in GEDCOM format"""
- file.write("0 HEAD\n")
- file.write("1 CHAR UTF-8\n")
- file.write("1 GEDC\n")
- file.write("2 VERS 5.5.1\n")
- file.write("2 FORM LINEAGE-LINKED\n")
- file.write("1 SOUR getmyancestors\n")
- file.write("2 VERS %s\n" % getmyancestors.__version__)
- file.write("2 NAME getmyancestors\n")
- file.write("1 DATE %s\n" % time.strftime("%d %b %Y"))
- file.write("2 TIME %s\n" % time.strftime("%H:%M:%S"))
- file.write("1 SUBM @SUBM@\n")
- file.write("0 @SUBM@ SUBM\n")
- file.write("1 NAME %s\n" % self.display_name)
- # file.write("1 LANG %s\n" % self.lang)
-
- for fid in sorted(self.indi, key=lambda x: self.indi.__getitem__(x).num):
- self.indi[fid].print(file)
- for fam in sorted(self.fam.values(), key=lambda x: x.num):
- fam.print(file)
- sources = sorted(self.sources.values(), key=lambda x: x.num)
- for s in sources:
- s.print(file)
- notes = sorted(self.notes, key=lambda x: x.id)
- for i, n in enumerate(notes):
- if i > 0:
- if n.id == notes[i - 1].id:
- continue
- n.print(file)
- file.write("0 TRLR\n")
--- /dev/null
+"""Tree module for family tree data structures"""
+
+# Import classes in dependency order (utils -> records -> elements -> core)
+from .core import Fam, Indi, Tree
+from .elements import Citation, Name, Ordinance, Place
+from .records import Fact, Memorie, Note, Source
+from .utils import CITY, COUNTRY, COUNTY, GEONAME_FEATURE_MAP, NAME_MAP, cont
+
+__all__ = [
+ # Functions
+ "cont",
+ # Constants
+ "COUNTY",
+ "COUNTRY",
+ "CITY",
+ "NAME_MAP",
+ "GEONAME_FEATURE_MAP",
+ # Classes from records
+ "Note",
+ "Source",
+ "Fact",
+ "Memorie",
+ # Classes from elements
+ "Name",
+ "Place",
+ "Ordinance",
+ "Citation",
+ # Classes from core
+ "Indi",
+ "Fam",
+ "Tree",
+]
--- /dev/null
+"""Core classes: Indi, Fam, Tree"""
+
+# pylint: disable=too-many-lines
+
+import asyncio
+import hashlib
+import os
+import sys
+import threading
+import time
+import xml.etree.ElementTree as ET
+from datetime import datetime
+from typing import Any, BinaryIO, Dict, Iterable, List, Optional, Set, Tuple, Union
+
+# global imports
+import babelfish
+import geocoder
+from requests_cache import CachedSession
+
+# local imports
+from getmyancestors import __version__
+from getmyancestors.classes.constants import MAX_PERSONS
+from getmyancestors.classes.session import GMASession
+
+from .elements import Citation, Name, Ordinance, Place
+from .records import Fact, Memorie, Note, Source
+from .utils import GEONAME_FEATURE_MAP, cont
+
+
+class Indi:
+ """GEDCOM individual class
+ :param fid' FamilySearch id
+ :param tree: a tree object
+ :param num: the GEDCOM identifier
+ """
+
+ counter = 0
+
+ def __init__(
+ self, fid: Optional[str] = None, tree: Optional["Tree"] = None, num=None
+ ):
+ self._handle: Optional[str] = None
+ if num:
+ self.num = num
+ else:
+ Indi.counter += 1
+ self.num = Indi.counter
+ self.fid = fid
+ self.tree = tree
+ self.num_prefix = "I"
+ self.origin_file: Optional[str] = None
+ self.famc: Set["Fam"] = set()
+ self.fams: Set["Fam"] = set()
+ self.famc_fid: Set[str] = set()
+ self.fams_fid: Set[str] = set()
+ self.famc_num: Set[int] = set()
+ self.fams_num: Set[int] = set()
+ self.famc_ids: Set[str] = set()
+ self.fams_ids: Set[str] = set()
+ self.name: Optional[Name] = None
+ self.gender: Optional[str] = None
+ self.living: Optional[bool] = None
+ self.parents: Set[Tuple[Optional[str], Optional[str]]] = (
+ set()
+ ) # (father_id, mother_id)
+ self.spouses: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = (
+ set()
+ ) # (person1, person2, relfid)
+ self.children: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = (
+ set()
+ ) # (father_id, mother_id, child_id)
+ self.baptism: Optional[Ordinance] = None
+ self.confirmation: Optional[Ordinance] = None
+ self.initiatory: Optional[Ordinance] = None
+ self.endowment: Optional[Ordinance] = None
+ self.sealing_child: Optional[Ordinance] = None
+ self.nicknames: Set[Name] = set()
+ self.birthnames: Set[Name] = set()
+ self.married: Set[Name] = set()
+ self.aka: Set[Name] = set()
+ self.facts: Set[Fact] = set()
+ self.notes: Set[Note] = set()
+ self.sources: Set[Tuple[Source, Optional[str]]] = set()
+ self.citations: Set[Citation] = set()
+ self.memories: Set[Memorie] = set()
+
+ def __str__(self):
+ """Return readable string for debugging/reference purposes."""
+ return f"{self.num}. {self.name}, fam: {self.fid}"
+
+ def add_data(self, data):
+ """add FS individual data"""
+ if data:
+ self.living = data["living"]
+ for x in data["names"]:
+ alt = not x.get("preferred", False)
+ if x["type"] == "http://gedcomx.org/Nickname":
+ self.nicknames.add(Name(x, self.tree, self.fid, "nickname", alt))
+ elif x["type"] == "http://gedcomx.org/BirthName":
+ self.birthnames.add(Name(x, self.tree, self.fid, "birthname", alt))
+ elif x["type"] == "http://gedcomx.org/AlsoKnownAs":
+ self.aka.add(Name(x, self.tree, self.fid, "aka", alt))
+ elif x["type"] == "http://gedcomx.org/MarriedName":
+ self.married.add(Name(x, self.tree, self.fid, "married", alt))
+ else:
+ print("Unknown name type: " + x.get("type"), file=sys.stderr)
+ raise ValueError("Unknown name type")
+ if "gender" in data:
+ if data["gender"]["type"] == "http://gedcomx.org/Male":
+ self.gender = "M"
+ elif data["gender"]["type"] == "http://gedcomx.org/Female":
+ self.gender = "F"
+ elif data["gender"]["type"] == "http://gedcomx.org/Unknown":
+ self.gender = "U"
+ if "facts" in data:
+ for x in data["facts"]:
+ if x["type"] == "http://familysearch.org/v1/LifeSketch":
+ self.notes.add(
+ Note(
+ "=== %s ===\n%s"
+ % (
+ (
+ self.tree.fs._("Life Sketch")
+ if self.tree and self.tree.fs
+ else "Life Sketch"
+ ),
+ x.get("value", ""),
+ ),
+ self.tree,
+ num_prefix=f"INDI_{self.fid}",
+ note_type="Person Note",
+ )
+ )
+ else:
+ self.facts.add(
+ Fact(x, self.tree, num_prefix=f"INDI_{self.fid}")
+ )
+ if "sources" in data and self.tree and self.tree.fs:
+ sources = self.tree.fs.get_url(
+ "/platform/tree/persons/%s/sources" % self.fid
+ )
+ if sources:
+ for quote in sources["persons"][0]["sources"]:
+ source_id = quote["descriptionId"]
+ source_data = next(
+ (
+ s
+ for s in sources["sourceDescriptions"]
+ if s["id"] == source_id
+ ),
+ None,
+ )
+ source = (
+ self.tree.ensure_source(source_data)
+ if self.tree and source_data
+ else None
+ )
+ if source and self.tree:
+ citation = self.tree.ensure_citation(quote, source)
+ self.citations.add(citation)
+ self.sources.add((source, citation.message))
+
+ for evidence in data.get("evidence", []):
+ memory_id, *_ = evidence["id"].partition("-")
+ url = "/platform/memories/memories/%s" % memory_id
+ memorie = (
+ self.tree.fs.get_url(url) if self.tree and self.tree.fs else None
+ )
+ if memorie and "sourceDescriptions" in memorie:
+ for x in memorie["sourceDescriptions"]:
+ if x["mediaType"] == "text/plain":
+ text = "\n".join(
+ val.get("value", "")
+ for val in x.get("titles", [])
+ + x.get("descriptions", [])
+ )
+ self.notes.add(
+ Note(
+ text,
+ self.tree,
+ num_prefix=f"INDI_{self.fid}",
+ note_type="Person Note",
+ )
+ )
+ else:
+ self.memories.add(Memorie(x))
+
+ def add_fams(self, fam: "Fam"):
+ """add family fid (for spouse or parent)"""
+ self.fams.add(fam)
+
+ def add_famc(self, fam: "Fam"):
+ """add family fid (for child)"""
+ self.famc.add(fam)
+
+ def get_notes(self):
+ """retrieve individual notes"""
+ name_str = str(self.name) if self.name else "Unknown"
+ print(
+ f"Getting Notes for {self.fid} {name_str}",
+ file=sys.stderr,
+ )
+ if not self.tree or not self.tree.fs:
+ return
+ notes = self.tree.fs.get_url("/platform/tree/persons/%s/notes" % self.fid)
+ if notes:
+ for n in notes["persons"][0]["notes"]:
+ text_note = "=== %s ===\n" % n["subject"] if "subject" in n else ""
+ text_note += n["text"] + "\n" if "text" in n else ""
+ self.notes.add(
+ Note(
+ text_note,
+ self.tree,
+ num_prefix=f"INDI_{self.fid}",
+ note_type="Person Note",
+ )
+ )
+
+ def get_ordinances(self):
+ """retrieve LDS ordinances
+ need a LDS account
+ """
+ res: List[Any] = []
+ famc: Union[bool, Tuple[str, str]] = False
+ if self.living:
+ return res, famc
+ if not self.tree or not self.tree.fs:
+ return res, famc
+ url = "/service/tree/tree-data/reservations/person/%s/ordinances" % self.fid
+ data = self.tree.fs.get_url(url, {}, no_api=True)
+ if data:
+ for key, o in data["data"].items():
+ if key == "baptism":
+ self.baptism = Ordinance(o)
+ elif key == "confirmation":
+ self.confirmation = Ordinance(o)
+ elif key == "initiatory":
+ self.initiatory = Ordinance(o)
+ elif key == "endowment":
+ self.endowment = Ordinance(o)
+ elif key == "sealingsToParents":
+ for subo in o:
+ self.sealing_child = Ordinance(subo)
+ relationships = subo.get("relationships", {})
+ father = relationships.get("parent1Id")
+ mother = relationships.get("parent2Id")
+ if father and mother:
+ famc = father, mother
+ elif key == "sealingsToSpouses":
+ res += o
+ return res, famc
+
+ @property
+ def id(self):
+ return self.fid or self.num
+
+ @property
+ def handle(self):
+ if not self._handle:
+ self._handle = "_" + os.urandom(10).hex()
+
+ return self._handle
+
+ def printxml(self, parent_element):
+ # <person handle="_fa593c2779e5ed1c947416cba9e" change="1720382301" id="IL43B-D2H">
+ # <gender>M</gender>
+ # <name type="Birth Name">
+ # <first>József</first>
+ # <surname>Cser</surname>
+ # <noteref hlink="_fa593c277f7c527e3afe4623b9"/>
+ # </name>
+ # <eventref hlink="_fa593c277a0712aa4241bbf47db" role="Primary"/>
+ # <attribute type="_FSFTID" value="L43B-D2H"/>
+ # <childof hlink="_fa593c277af212e6c1f9f44bc4a"/>
+ # <parentin hlink="_fa593c277af72c83e0e3fbf6ed2"/>
+ # <citationref hlink="_fa593c277b7715371c26d1b0a81"/>
+ # </person>
+ person = ET.SubElement(
+ parent_element,
+ "person",
+ handle=self.handle,
+ # change='1720382301',
+ id="I" + str(self.id),
+ )
+ if self.fid:
+ # Add custom attribute for FamilySearch ID
+ ET.SubElement(person, "attribute", type="_FSFTID", value=self.fid)
+
+ if self.name:
+ self.name.printxml(person)
+ for name in self.nicknames | self.birthnames | self.aka | self.married:
+ name.printxml(person)
+
+ gender = ET.SubElement(person, "gender")
+ gender.text = self.gender
+
+ if self.fams:
+ for fam in self.fams:
+ ET.SubElement(person, "parentin", hlink=fam.handle)
+
+ if self.famc:
+ for fam in self.famc:
+ ET.SubElement(person, "childof", hlink=fam.handle)
+
+ for fact in self.facts:
+ ET.SubElement(person, "eventref", hlink=fact.handle, role="Primary")
+
+ for citation in self.citations:
+ ET.SubElement(person, "citationref", hlink=citation.handle)
+
+ for note in self.notes:
+ ET.SubElement(person, "noteref", hlink=note.handle)
+
+ # <noteref hlink="_fac4a686369713d9cd55159ada9"/>
+ # <citationref hlink="_fac4a72a01b1681293ea1ee8d9"/>
+
+ def get_contributors(self):
+ """retrieve contributors"""
+ if self.fid and self.tree:
+ url = "/platform/tree/persons/%s/changes" % self.fid
+ text = self.tree.get_contributors_text(url)
+ if text:
+ for n in self.tree.notes:
+ if n.text == text:
+ self.notes.add(n)
+ return
+ self.notes.add(Note(text, self.tree))
+
+ def print(self, file=sys.stdout):
+ """print individual in GEDCOM format"""
+ file.write("0 @I%s@ INDI\n" % self.id)
+ if self.name:
+ self.name.print(file)
+ for nick in sorted(
+ self.nicknames,
+ key=lambda x: (
+ x.given or "",
+ x.surname or "",
+ x.prefix or "",
+ x.suffix or "",
+ x.kind or "",
+ str(x.alternative),
+ x.note.text if x.note else "",
+ ),
+ ):
+ file.write(cont("2 NICK %s %s" % (nick.given, nick.surname)))
+ for birthname in sorted(
+ self.birthnames,
+ key=lambda x: (
+ x.given or "",
+ x.surname or "",
+ x.prefix or "",
+ x.suffix or "",
+ x.kind or "",
+ str(x.alternative),
+ x.note.text if x.note else "",
+ ),
+ ):
+ birthname.print(file)
+ for aka in sorted(
+ self.aka,
+ key=lambda x: (
+ x.given or "",
+ x.surname or "",
+ x.prefix or "",
+ x.suffix or "",
+ x.kind or "",
+ str(x.alternative),
+ x.note.text if x.note else "",
+ ),
+ ):
+ aka.print(file, "aka")
+ for married_name in sorted(
+ self.married,
+ key=lambda x: (
+ x.given or "",
+ x.surname or "",
+ x.prefix or "",
+ x.suffix or "",
+ x.kind or "",
+ str(x.alternative),
+ x.note.text if x.note else "",
+ ),
+ ):
+ married_name.print(file, "married")
+ if self.gender:
+ file.write("1 SEX %s\n" % self.gender)
+ for fact in sorted(
+ self.facts,
+ key=lambda x: (
+ x.date or "9999",
+ x.type or "",
+ x.value or "",
+ x.place.id if x.place else "",
+ x.note.text if x.note else "",
+ ),
+ ):
+ fact.print(file)
+ for memory in sorted(
+ self.memories, key=lambda x: (x.url or "", x.description or "")
+ ):
+ memory.print(file)
+ if self.baptism:
+ file.write("1 BAPL\n")
+ self.baptism.print(file)
+ if self.confirmation:
+ file.write("1 CONL\n")
+ self.confirmation.print(file)
+ if self.initiatory:
+ file.write("1 WAC\n")
+ self.initiatory.print(file)
+ if self.endowment:
+ file.write("1 ENDL\n")
+ self.endowment.print(file)
+ if self.sealing_child:
+ file.write("1 SLGC\n")
+ self.sealing_child.print(file)
+ for fam in sorted(self.fams, key=lambda x: x.id or ""):
+ file.write("1 FAMS @F%s@\n" % fam.id)
+ for fam in sorted(self.famc, key=lambda x: x.id or ""):
+ file.write("1 FAMC @F%s@\n" % fam.id)
+ # print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr)
+ # for num in self.fams_ids:
+ # print(f'Famc Ids: {self.famc_ids}', file=sys.stderr)
+ # for num in self.famc_ids:
+ # file.write("1 FAMC @F%s@\n" % num)
+ file.write("1 _FSFTID %s\n" % self.fid)
+ for note in sorted(self.notes, key=lambda x: x.id or ""):
+ note.link(file)
+ for source, quote in sorted(
+ self.sources, key=lambda x: (x[0].id or "", x[1] or "")
+ ):
+ source.link(file, 1)
+ if quote:
+ file.write(cont("2 PAGE " + quote))
+
+
+class Fam:
+ """GEDCOM family class
+ :param husb: husbant fid
+ :param wife: wife fid
+ :param tree: a Tree object
+ :param num: a GEDCOM identifier
+ """
+
+ counter = 0
+
+ def __init__(
+ self,
+ husband: Optional[Indi] = None,
+ wife: Optional[Indi] = None,
+ tree: Optional["Tree"] = None,
+ num=None,
+ ):
+ self._handle: Optional[str] = None
+ self.num = num if num else Fam.gen_id(husband, wife)
+ self.fid: Optional[str] = None
+ self._husband = husband
+ self._wife = wife
+ self.tree = tree
+ self.num_prefix = "F"
+ self.origin_file: Optional[str] = None
+ self.children: Set[Indi] = set()
+ self.facts: Set[Fact] = set()
+ self.sealing_spouse: Optional[Ordinance] = None
+ self.husb_num: Optional[str] = None
+ self.wife_num: Optional[str] = None
+ self.chil_num: Set[str] = set()
+ self.husb_fid: Optional[str] = None
+ self.wife_fid: Optional[str] = None
+ self.chil_fid: Set[str] = set()
+ self.notes: Set[Note] = set()
+ self.sources: Set[Tuple[Source, Optional[str]]] = set()
+
+ @property
+ def husband(self):
+ """get husband"""
+ if self._husband:
+ return self._husband
+ if self.husb_num and self.tree and self.husb_num in self.tree.indi:
+ return self.tree.indi.get(self.husb_num)
+ return None
+
+ @husband.setter
+ def husband(self, value):
+ """set husband"""
+ self._husband = value
+
+ @property
+ def wife(self):
+ """get wife"""
+ if self._wife:
+ return self._wife
+ if self.wife_num and self.tree and self.wife_num in self.tree.indi:
+ return self.tree.indi.get(self.wife_num)
+ return None
+
+ @wife.setter
+ def wife(self, value):
+ """set wife"""
+ self._wife = value
+
+ @property
+ def handle(self):
+ if not self._handle:
+ self._handle = "_" + os.urandom(10).hex()
+
+ return self._handle
+
+ @staticmethod
+ def gen_id(husband: Indi | None, wife: Indi | None) -> str:
+ if husband and wife:
+ return f"FAM_{husband.id}-{wife.id}"
+ if husband:
+ return f"FAM_{husband.id}-UNK"
+ if wife:
+ return f"FAM_UNK-{wife.id}"
+
+ Fam.counter += 1
+ return f"FAM_UNK-UNK-{Fam.counter}"
+
+ def add_child(self, child: Indi | None):
+ """add a child fid to the family"""
+ if child is not None:
+ self.children.add(child)
+
+ def add_marriage(self, fid: str):
+ """retrieve and add marriage information
+ :param fid: the marriage fid
+ """
+ if not self.tree or not self.tree.fs:
+ return
+
+ if not self.fid:
+ self.fid = fid
+ url = "/platform/tree/couple-relationships/%s" % self.fid
+ data = self.tree.fs.get_url(url)
+ if data:
+ if "facts" in data["relationships"][0]:
+ for x in data["relationships"][0]["facts"]:
+ self.facts.add(Fact(x, self.tree, num_prefix=f"FAM_{self.fid}"))
+ if "sources" in data["relationships"][0]:
+ quotes = dict()
+ for x in data["relationships"][0]["sources"]:
+ quotes[x["descriptionId"]] = (
+ x["attribution"]["changeMessage"]
+ if "changeMessage" in x["attribution"]
+ else None
+ )
+ # self.tree.sources is effectively Dict[str, Source] so keys() returns strings
+ new_sources = quotes.keys() - self.tree.sources.keys()
+ if new_sources:
+ sources = self.tree.fs.get_url(
+ "/platform/tree/couple-relationships/%s/sources" % self.fid
+ )
+ for source in sources["sourceDescriptions"]:
+ if (
+ source["id"] in new_sources
+ and source["id"] not in self.tree.sources
+ ):
+ self.tree.sources[source["id"]] = Source(
+ source, self.tree
+ )
+ for source_fid, change_message in quotes.items():
+ self.sources.add(
+ (self.tree.sources[source_fid], change_message)
+ )
+
+ def get_notes(self):
+ """retrieve marriage notes"""
+ if self.fid and self.tree and self.tree.fs:
+ notes = self.tree.fs.get_url(
+ "/platform/tree/couple-relationships/%s/notes" % self.fid
+ )
+ if notes:
+ for n in notes["relationships"][0]["notes"]:
+ text_note = "=== %s ===\n" % n["subject"] if "subject" in n else ""
+ text_note += n["text"] + "\n" if "text" in n else ""
+ self.notes.add(
+ Note(
+ text_note,
+ self.tree,
+ num_prefix=f"FAM_{self.fid}",
+ note_type="Marriage Note",
+ )
+ )
+
+ @property
+ def id(self):
+ # Prefer fid (original FamilySearch ID) to preserve through merge
+ # Fall back to num (counter) for newly created families
+ return self.fid if self.fid else self.num
+
+ def printxml(self, parent_element):
+ # <family handle="_fa593c277af212e6c1f9f44bc4a" change="1720382301" id="F9MKP-K92">
+ # <rel type="Unknown"/>
+ # <father hlink="_fa593c277f14dc6db9ab19cbe09"/>
+ # <mother hlink="_fa593c277cd4af15983d7064c59"/>
+ # <childref hlink="_fa593c279e1466787c923487b98"/>
+ # <attribute type="_FSFTID" value="9MKP-K92"/>
+ # </family>
+ family = ET.SubElement(
+ parent_element,
+ "family",
+ handle=self.handle,
+ # change='1720382301',
+ id=self.id,
+ )
+ ET.SubElement(family, "rel", type="Unknown")
+ if self.husband:
+ ET.SubElement(family, "father", hlink=self.husband.handle)
+ if self.wife:
+ ET.SubElement(family, "mother", hlink=self.wife.handle)
+ for child in self.children:
+ ET.SubElement(family, "childref", hlink=child.handle)
+ for fact in self.facts:
+ ET.SubElement(family, "eventref", hlink=fact.handle, role="Primary")
+
+ def get_contributors(self):
+ """retrieve contributors"""
+ if self.fid and self.tree:
+ url = "/platform/tree/couple-relationships/%s/changes" % self.fid
+ text = self.tree.get_contributors_text(url)
+ if text:
+ for n in self.tree.notes:
+ if n.text == text:
+ self.notes.add(n)
+ return
+ self.notes.add(Note(text, self.tree))
+
+ def print(self, file=sys.stdout):
+ """print family information in GEDCOM format"""
+ file.write("0 @F%s@ FAM\n" % self.id)
+ if self.husband:
+ file.write("1 HUSB @I%s@\n" % self.husband.id)
+ if self.wife:
+ file.write("1 WIFE @I%s@\n" % self.wife.id)
+ for child in sorted(self.children, key=lambda x: x.id or ""):
+ file.write("1 CHIL @I%s@\n" % child.id)
+ for fact in sorted(
+ self.facts,
+ key=lambda x: (
+ x.date or "9999",
+ x.type or "",
+ x.value or "",
+ x.place.id if x.place else "",
+ x.note.text if x.note else "",
+ ),
+ ):
+ fact.print(file)
+ if self.sealing_spouse:
+ file.write("1 SLGS\n")
+ self.sealing_spouse.print(file)
+ if self.fid:
+ file.write("1 _FSFTID %s\n" % self.fid)
+ for note in sorted(self.notes, key=lambda x: x.id or ""):
+ note.link(file)
+ for source, quote in sorted(
+ self.sources, key=lambda x: (x[0].id or "", x[1] or "")
+ ):
+ source.link(file, 1)
+ if quote:
+ file.write(cont("2 PAGE " + quote))
+
+
+class Tree:
+ """family tree class
+ :param fs: a Session object
+ """
+
+ def __init__(
+ self,
+ fs: Optional[GMASession] = None,
+ exclude: Optional[List[str]] = None,
+ geonames_key=None,
+ creation_date: Optional[datetime] = None,
+ **kwargs,
+ ):
+ self.fs = fs
+ self.geonames_key = geonames_key
+ self.lock = threading.Lock()
+ self.creation_date: Optional[datetime] = creation_date
+ self.indi: Dict[str, Indi] = {}
+ self.fam: Dict[str, Fam] = {}
+ self.notes: Set[Note] = set()
+ self.facts: Set[Fact] = set()
+ self.sources: Dict[str, Source] = {}
+ self.citations: Dict[str, Citation] = {}
+ self.places: Set[Place] = set()
+ self.places_by_names: Dict[str, Place] = {}
+ self.place_cache: Dict[str, Tuple[float, float]] = {}
+ self.display_name: Optional[str] = None
+ self.lang: Optional[str] = None
+ self.exclude: List[str] = exclude or []
+ self.only_blood_relatives = False
+ if "only_blood_relatives" in kwargs:
+ self.only_blood_relatives = kwargs["only_blood_relatives"]
+ self.place_counter = 0
+ if fs:
+ self.display_name = fs.display_name
+ self.lang = babelfish.Language.fromalpha2(fs.lang).name
+
+ # Geocoder cache - honor GMA_CACHE_DIR if present, else fallback to ~/.cache/getmyancestors/
+ geocache_dir = os.environ.get(
+ "GMA_CACHE_DIR", os.path.expanduser("~/.cache/getmyancestors")
+ )
+ os.makedirs(geocache_dir, exist_ok=True)
+ geocache_path = os.path.join(geocache_dir, "geocoder_requests")
+
+ self.geosession = CachedSession(
+ geocache_path,
+ backend="sqlite",
+ expire_after=86400,
+ allowable_codes=(200,),
+ backend_kwargs={"table_name": "requests"},
+ )
+ if os.environ.get("GMA_OFFLINE_MODE"):
+ orig_request = self.geosession.request
+
+ def offline_request(*args, **kwargs):
+ kwargs["only_if_cached"] = True
+ return orig_request(*args, **kwargs)
+
+ self.geosession.request = offline_request # type: ignore[method-assign]
+
+ def add_indis(self, fids_in: Iterable[str]):
+ """add individuals to the family tree
+ :param fids: an iterable of fid
+ """
+ fids = []
+ for fid in fids_in:
+ if fid not in self.exclude:
+ fids.append(fid)
+ else:
+ print("Excluding %s from the family tree" % fid, file=sys.stderr)
+
+ async def add_datas(loop, data):
+ futures = set()
+ for person in data["persons"]:
+ self.indi[person["id"]] = Indi(person["id"], self)
+ futures.add(
+ loop.run_in_executor(None, self.indi[person["id"]].add_data, person)
+ )
+ for future in futures:
+ await future
+
+ new_fids = sorted([fid for fid in fids if fid and fid not in self.indi])
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ while new_fids:
+ if not self.fs:
+ break
+ data = self.fs.get_url(
+ "/platform/tree/persons?pids=" + ",".join(new_fids[:MAX_PERSONS])
+ )
+ if data:
+ if "places" in data:
+ for place in data["places"]:
+ if place["id"] not in self.place_cache:
+ self.place_cache[place["id"]] = (
+ place["latitude"],
+ place["longitude"],
+ )
+ loop.run_until_complete(add_datas(loop, data))
+ if "childAndParentsRelationships" in data:
+ for rel in data["childAndParentsRelationships"]:
+ father: str | None = rel.get("parent1", {}).get("resourceId")
+ mother: str | None = rel.get("parent2", {}).get("resourceId")
+ child: str | None = rel.get("child", {}).get("resourceId")
+ if child in self.indi:
+ self.indi[child].parents.add((father, mother))
+ if father in self.indi:
+ self.indi[father].children.add((father, mother, child))
+ if mother in self.indi:
+ self.indi[mother].children.add((father, mother, child))
+ if "relationships" in data:
+ for rel in data["relationships"]:
+ if rel["type"] == "http://gedcomx.org/Couple":
+ person1 = rel["person1"]["resourceId"]
+ person2 = rel["person2"]["resourceId"]
+ relfid = rel["id"]
+ if person1 in self.indi:
+ self.indi[person1].spouses.add(
+ (person1, person2, relfid)
+ )
+ if person2 in self.indi:
+ self.indi[person2].spouses.add(
+ (person1, person2, relfid)
+ )
+ new_fids = new_fids[MAX_PERSONS:]
+
+ def ensure_source(self, source_data: Dict[str, Any]) -> Source:
+ with self.lock:
+ if source_data["id"] not in self.sources:
+ self.sources[source_data["id"]] = Source(source_data, self)
+ return self.sources[source_data["id"]]
+
+ def ensure_citation(self, data: Dict[str, Any], source: Source) -> Citation:
+ with self.lock:
+ citation_id = data["id"]
+ if citation_id not in self.citations:
+ self.citations[citation_id] = Citation(data, source)
+ return self.citations[citation_id]
+
+ def ensure_family(self, father: Optional["Indi"], mother: Optional["Indi"]) -> Fam:
+ with self.lock:
+ fam_id = Fam.gen_id(father, mother)
+ if fam_id not in self.fam:
+ self.fam[fam_id] = Fam(father, mother, self)
+ return self.fam[fam_id]
+
+ def get_contributors_text(self, url: str) -> Optional[str]:
+ """Helper to fetch contributors from a changelog URL"""
+ if not self.fs:
+ return None
+ data = self.fs.get_url(url, {"Accept": "application/x-gedcomx-atom+json"})
+ if not data:
+ return None
+
+ contributors_map = {} # name -> uri
+ names = set()
+
+ for entry in data.get("entries", []):
+ for contrib in entry.get("contributors", []):
+ name = contrib.get("name", "Unknown")
+ uri = contrib.get("uri", "").replace("https://www.familysearch.org", "")
+ contributors_map[name] = uri
+ names.add(name)
+
+ if not names:
+ return None
+
+ text = "=== %s ===\n" % self.fs._("Contributors")
+
+ for name in sorted(names):
+ text += name
+ agent_uri = contributors_map[name]
+ # Fetch agent details
+ # Default headers work better per jcarroll findings
+ agent_data = self.fs.get_url(agent_uri)
+
+ # Display Name
+ try:
+ agent_names = agent_data["agents"][0]["names"]
+ display_name = "".join([n["value"] + " " for n in agent_names]).strip()
+ if display_name != name:
+ text += " (" + display_name + ")"
+ except (KeyError, IndexError, TypeError):
+ pass
+
+ # Email
+ try:
+ email = agent_data["agents"][0]["emails"][0]["resource"].replace(
+ "mailto:", " "
+ )
+ text += email
+ except (KeyError, IndexError, TypeError):
+ pass
+
+ # Phone
+ try:
+ phone = agent_data["agents"][0]["phones"][0]["resource"].replace(
+ "tel:", " "
+ )
+ text += phone
+ except (KeyError, IndexError, TypeError):
+ pass
+
+ text += "\n"
+
+ return text
+
+ def place_by_geoname_id(self, place_id: str) -> Optional[Place]:
+ for place in self.places:
+ if place.id == place_id:
+ return place
+ return None
+
+ def get_by_geonames_id(self, geonames_id: str) -> Optional[Place]:
+ print("Fetching place hierarchy for", geonames_id, file=sys.stderr)
+ hierarchy = geocoder.geonames(
+ geonames_id,
+ key=self.geonames_key,
+ lang=["hu", "en", "de"],
+ method="hierarchy",
+ session=self.geosession,
+ )
+
+ if hierarchy and hierarchy.ok:
+ last_place = None
+ for item in hierarchy.geojson.get("features", []):
+ properties = item.get("properties", {})
+ code = properties.get("code")
+
+ if code in ["AREA", "CONT"]:
+ continue
+
+ print("Properties", properties, file=sys.stderr)
+ place_id = "GEO" + str(properties["geonames_id"])
+ place = self.place_by_geoname_id(place_id)
+ if place is None:
+ place = Place(
+ place_id,
+ properties.get("address"),
+ GEONAME_FEATURE_MAP.get(code, "Unknown"),
+ last_place,
+ properties.get("lat"),
+ properties.get("lng"),
+ )
+ self.places.add(place)
+ last_place = place
+ return last_place
+ return None
+
+ @property
+ def _next_place_counter(self):
+ self.place_counter += 1
+ return self.place_counter
+
+ def ensure_place(
+ self,
+ place_name: str,
+ fid: Optional[str] = None,
+ coord: Optional[Tuple[float, float]] = None,
+ ) -> Place:
+ with self.lock:
+ if place_name not in self.places_by_names:
+ place = None
+ if self.geonames_key:
+ print("Fetching place", place_name, file=sys.stderr)
+ geoname_record = geocoder.geonames(
+ place_name,
+ key=self.geonames_key,
+ session=self.geosession,
+ )
+ if geoname_record and geoname_record.ok:
+ place = self.get_by_geonames_id(geoname_record.geonames_id)
+ if place is None:
+ coord = (
+ self.place_cache.get(fid) if coord is None and fid else coord
+ )
+ start_char = (
+ "P"
+ + hashlib.md5(place_name.encode("utf-8"))
+ .hexdigest()[:6]
+ .upper()
+ )
+ place = Place(
+ ("PFSID" + fid if fid is not None else start_char),
+ place_name,
+ latitude=coord[0] if coord is not None else None,
+ longitude=coord[1] if coord is not None else None,
+ )
+ self.places.add(place)
+ self.places_by_names[place_name] = place
+ return self.places_by_names[place_name]
+
+ # def add_fam(self, father, mother):
+ # """add a family to the family tree
+ # :param father: the father fid or None
+ # :param mother: the mother fid or None
+ # """
+ # if (father, mother) not in self.fam:
+ # self.fam[(father, mother)] = Fam(father, mother, self)
+
+ def add_trio(self, father: Indi | None, mother: Indi | None, child: Indi | None):
+ """add a children relationship to the family tree
+ :param father: the father fid or None
+ :param mother: the mother fid or None
+ :param child: the child fid or None
+ """
+ fam = self.ensure_family(father, mother)
+ if child is not None:
+ fam.add_child(child)
+ child.add_famc(fam)
+
+ if father is not None:
+ father.add_fams(fam)
+ if mother is not None:
+ mother.add_fams(fam)
+
+ def add_parents(self, fids: Iterable[str]) -> Set[str]:
+ """add parents relationships
+ :param fids: a set of fids
+ """
+ parents = set()
+ for fid in [f for f in fids if f in self.indi]:
+ for couple in self.indi[fid].parents:
+ parents |= set(couple)
+ if parents:
+ parents -= set(self.exclude)
+ self.add_indis(set(filter(None, parents)))
+ for fid in [f for f in fids if f in self.indi]:
+ for father, mother in self.indi[fid].parents:
+ self.add_trio(
+ self.indi.get(father) if father else None,
+ self.indi.get(mother) if mother else None,
+ self.indi.get(fid) if fid else None,
+ )
+ return set(filter(None, parents))
+
+ def add_spouses(self, fids: Iterable[str]):
+ """add spouse relationships
+ :param fids: a set of fid
+ """
+
+ async def add(
+ loop, rels: Set[Tuple[Optional[str], Optional[str], Optional[str]]]
+ ):
+ futures = set()
+ for father, mother, relfid in rels:
+ if (
+ father in self.exclude
+ or mother in self.exclude
+ or not father
+ or not mother
+ ):
+ continue
+ fam_id = Fam.gen_id(self.indi[father], self.indi[mother])
+ if self.fam.get(fam_id):
+ futures.add(
+ loop.run_in_executor(
+ None, self.fam[fam_id].add_marriage, relfid
+ )
+ )
+ for future in futures:
+ await future
+
+ rels: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = set()
+ for fid in [f for f in fids if f in self.indi]:
+ rels |= self.indi[fid].spouses
+ # TODO: test this
+ try:
+ loop = asyncio.get_running_loop()
+ except RuntimeError:
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+
+ if rels:
+ all_involved = set.union(
+ set(),
+ *(
+ {father, mother}
+ for father, mother, relfid in rels
+ if father and mother
+ ),
+ )
+ self.add_indis(set(filter(None, all_involved)))
+ for father, mother, _ in rels:
+ if father in self.indi and mother in self.indi:
+ father_indi = self.indi[father]
+ mother_indi = self.indi[mother]
+ fam = self.ensure_family(father_indi, mother_indi)
+ father_indi.add_fams(fam)
+ mother_indi.add_fams(fam)
+
+ loop.run_until_complete(add(loop, rels))
+
+ def add_children(self, fids: Iterable[str]) -> Set[str]:
+ """add children relationships
+ :param fids: a set of fid
+ """
+ rels: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = set()
+ for fid in [f for f in fids if f in self.indi]:
+ rels |= self.indi[fid].children if fid in self.indi else set()
+ children = set()
+ if rels:
+ all_involved = set.union(set(), *(set(rel) for rel in rels if rel))
+ all_involved -= set(self.exclude)
+ self.add_indis(set(filter(None, all_involved)))
+ for father, mother, child in rels:
+ has_child = child in self.indi
+ if not has_child:
+ continue
+
+ father_valid = not father or father in self.indi
+ mother_valid = not mother or mother in self.indi
+ if father_valid and mother_valid and (father or mother):
+ self.add_trio(
+ self.indi.get(father) if father else None,
+ self.indi.get(mother) if mother else None,
+ self.indi.get(child) if child else None,
+ )
+ children.add(child)
+ return set(filter(None, children))
+
+ def add_ordinances(self, fid):
+ """retrieve ordinances
+ :param fid: an individual fid
+ """
+ if fid in self.indi:
+ ret, famc = self.indi[fid].get_ordinances()
+ if famc:
+ # self.fam is keyed by (father_id, mother_id), so we can't look up by fam_id directly.
+ # Find family by iterating values
+ for f in self.fam.values():
+ if f.fid == famc:
+ sc = self.indi[fid].sealing_child
+ if sc:
+ sc.famc = f
+ break
+ for o in ret:
+ spouse_id = o["relationships"]["spouseId"]
+ for f in self.fam.values():
+ if (
+ f.husband
+ and f.husband.fid == fid
+ and f.wife
+ and f.wife.fid == spouse_id
+ ):
+ f.sealing_spouse = Ordinance(o)
+ break
+ if (
+ f.husband
+ and f.husband.fid == spouse_id
+ and f.wife
+ and f.wife.fid == fid
+ ):
+ f.sealing_spouse = Ordinance(o)
+ break
+
+ def reset_num(self):
+ """reset all GEDCOM identifiers"""
+ # TODO: implement this
+ # for husb, wife in self.fam:
+ # self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None
+ # self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None
+ # self.fam[(husb, wife)].chil_num = set(
+ # self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid
+ # )
+ # for fid in self.indi:
+ # self.indi[fid].famc_num = set(
+ # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid
+ # )
+ # self.indi[fid].fams_num = set(
+ # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid
+ # )
+ # self.indi[fid].famc_ids = set(
+ # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid
+ # )
+ # self.indi[fid].fams_ids = set(
+ # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid
+ # )
+
+ def printxml(self, file: BinaryIO):
+ # TODO: implement this
+ # root = ET.Element("root")
+ # doc = ET.SubElement(root, "doc")
+
+ # ET.SubElement(doc, "field1", name="blah").text = "some value1"
+ # ET.SubElement(doc, "field2", name="asdfasd").text = "some vlaue2"
+
+ # tree = ET.ElementTree(root)
+ # tree.write("filename.xml")
+
+ # <?xml version="1.0" encoding="UTF-8"?>
+ # <!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN"
+ # "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
+ # <database xmlns="http://gramps-project.org/xml/1.7.1/">
+ # <header
+ # <created date="2024-07-07" version="5.2.2"/>
+ # <researcher>
+ # <resname>Barnabás Südy</resname>
+ # </researcher>
+ # </header>
+
+ root = ET.Element("database", xmlns="http://gramps-project.org/xml/1.7.1/")
+
+ header = ET.SubElement(root, "header")
+ ET.SubElement(
+ header,
+ "created",
+ date=datetime.strftime(datetime.now(), "%Y-%m-%d"),
+ version="5.2.2",
+ )
+ researcher = ET.SubElement(header, "researcher")
+ resname = ET.SubElement(researcher, "resname")
+ resname.text = self.display_name
+
+ people = ET.SubElement(root, "people")
+ for indi in sorted(self.indi.values(), key=lambda x: x.id):
+ indi.printxml(people)
+
+ families = ET.SubElement(root, "families")
+ for fam in sorted(self.fam.values(), key=lambda x: x.id):
+ fam.printxml(families)
+
+ events = ET.SubElement(root, "events")
+ for fact in self.facts:
+ fact.printxml(events)
+
+ notes = ET.SubElement(root, "notes")
+ for note in sorted(self.notes, key=lambda x: x.id):
+ note.printxml(notes)
+
+ places = ET.SubElement(root, "places")
+ for place in self.places:
+ place.printxml(places)
+
+ sources = ET.SubElement(root, "sources")
+ for source in self.sources.values():
+ source.printxml(sources)
+
+ citations = ET.SubElement(root, "citations")
+ for citation in self.citations.values():
+ citation.printxml(citations)
+
+ tree = ET.ElementTree(root)
+
+ doctype = '<!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN" "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">'
+ file.write(doctype.encode("utf-8"))
+ tree.write(file, "utf-8")
+
+ def print(self, file=sys.stdout):
+ """print family tree in GEDCOM format"""
+ file.write("0 HEAD\n")
+ file.write("1 CHAR UTF-8\n")
+ file.write("1 GEDC\n")
+ file.write("2 VERS 5.5.1\n")
+ file.write("2 FORM LINEAGE-LINKED\n")
+ file.write("1 SOUR getmyancestors\n")
+ file.write("2 VERS %s\n" % __version__)
+ file.write("2 NAME getmyancestors\n")
+ # Use provided creation date if available, otherwise current time
+ if self.creation_date:
+ date_str = self.creation_date.strftime("%d %b %Y").upper()
+ time_str = self.creation_date.strftime("%H:%M:%S")
+ else:
+ date_str = time.strftime("%d %b %Y").upper()
+ time_str = time.strftime("%H:%M:%S")
+
+ file.write("1 DATE %s\n" % date_str)
+ file.write("2 TIME %s\n" % time_str)
+ file.write("1 SUBM @SUBM@\n")
+ file.write("0 @SUBM@ SUBM\n")
+ file.write("1 NAME %s\n" % self.display_name)
+ # file.write("1 LANG %s\n" % self.lang)
+
+ for fid in sorted(self.indi, key=lambda x: self.indi[x].id or ""):
+ self.indi[fid].print(file)
+ for fam in sorted(self.fam.values(), key=lambda x: x.id or ""):
+ fam.print(file)
+ sources = sorted(self.sources.values(), key=lambda x: x.id or "")
+ for s in sources:
+ s.print(file)
+ # Deduplicate notes by text content before printing
+ seen_texts = set()
+ unique_notes = []
+ for n in sorted(self.notes, key=lambda x: x.id):
+ if n.text not in seen_texts:
+ seen_texts.add(n.text)
+ unique_notes.append(n)
+ for n in unique_notes:
+ n.print(file)
+ file.write("0 TRLR\n")
--- /dev/null
+"""Element classes: Name, Place, Ordinance, Citation"""
+
+import os
+import sys
+import xml.etree.ElementTree as ET
+from typing import Any, Dict, Optional
+from xml.etree.ElementTree import Element
+
+from getmyancestors.classes.constants import ORDINANCES_STATUS
+
+from .records import Note
+from .utils import NAME_MAP, cont
+
+
+class Name:
+ """GEDCOM Name class"""
+
+ def __init__(
+ self, data=None, tree=None, owner_fis=None, kind=None, alternative: bool = False
+ ):
+ self.given = ""
+ self.surname = ""
+ self.prefix = None
+ self.suffix = None
+ self.note = None
+ self.alternative = alternative
+ self.owner_fis = owner_fis
+ self.kind = kind
+ if data:
+ if "parts" in data["nameForms"][0]:
+ for z in data["nameForms"][0]["parts"]:
+ if z["type"] == "http://gedcomx.org/Given":
+ self.given = z["value"]
+ if z["type"] == "http://gedcomx.org/Surname":
+ self.surname = z["value"]
+ if z["type"] == "http://gedcomx.org/Prefix":
+ self.prefix = z["value"]
+ if z["type"] == "http://gedcomx.org/Suffix":
+ self.suffix = z["value"]
+ if "changeMessage" in data.get("attribution", {}):
+ self.note = Note(
+ data["attribution"]["changeMessage"],
+ tree,
+ note_type="Name Note",
+ )
+
+ def __str__(self):
+ return f"{self.given} {self.surname}"
+
+ def __eq__(self, other):
+ if not isinstance(other, Name):
+ return NotImplemented
+ return (
+ self.given == other.given
+ and self.surname == other.surname
+ and self.prefix == other.prefix
+ and self.suffix == other.suffix
+ and self.kind == other.kind
+ and self.alternative == other.alternative
+ and (self.note.text if self.note else None)
+ == (other.note.text if other.note else None)
+ )
+
+ def __hash__(self):
+ return hash(
+ (
+ self.given,
+ self.surname,
+ self.prefix,
+ self.suffix,
+ self.kind,
+ self.alternative,
+ self.note.text if self.note else None,
+ )
+ )
+
+ def printxml(self, parent_element):
+ params = {}
+ if self.kind is not None:
+ params["type"] = NAME_MAP.get(self.kind, self.kind)
+ if self.alternative:
+ params["alt"] = "1"
+ person_name = ET.SubElement(parent_element, "name", **params)
+ ET.SubElement(person_name, "first").text = self.given
+ ET.SubElement(person_name, "surname").text = self.surname
+ # TODO prefix / suffix
+
+ def print(self, file=sys.stdout, typ=None):
+ tmp = "1 NAME %s /%s/" % (self.given, self.surname)
+ if self.suffix:
+ tmp += " " + self.suffix
+ file.write(cont(tmp))
+ if typ:
+ file.write("2 TYPE %s\n" % typ)
+ if self.prefix:
+ file.write("2 NPFX %s\n" % self.prefix)
+ if self.note:
+ self.note.link(file, 2)
+
+
+class Place:
+ """GEDCOM Place class"""
+
+ counter = 0
+
+ def __init__(
+ self,
+ place_id: str,
+ name: str,
+ place_type: Optional[str] = None,
+ parent: Optional["Place"] = None,
+ latitude: Optional[float] = None,
+ longitude: Optional[float] = None,
+ ):
+ self._handle: Optional[str] = None
+ self.name = name
+ self.type = place_type
+ self.id = place_id
+ self.parent = parent
+ self.latitude = latitude
+ self.longitude = longitude
+
+ @property
+ def handle(self):
+ if not self._handle:
+ self._handle = "_" + os.urandom(10).hex()
+ return self._handle
+
+ def print(self, file, indentation=0):
+ file.write("%d @P%s@ PLAC %s\n" % (indentation, self.id, self.name))
+
+ def __eq__(self, other):
+ if not isinstance(other, Place):
+ return NotImplemented
+ return self.name == other.name and self.id == other.id
+
+ def __hash__(self):
+ return hash((self.name, self.id))
+
+ def printxml(self, parent_element):
+ place_element = ET.SubElement(
+ parent_element,
+ "placeobj",
+ handle=self.handle,
+ id=self.id,
+ type=self.type or "Unknown",
+ )
+ ET.SubElement(place_element, "pname", value=self.name)
+ if self.parent:
+ ET.SubElement(place_element, "placeref", hlink=self.parent.handle)
+ if self.latitude is not None and self.longitude is not None:
+ ET.SubElement(
+ place_element, "coord", long=str(self.longitude), lat=str(self.latitude)
+ )
+
+
+class Ordinance:
+ """GEDCOM Ordinance class"""
+
+ def __init__(self, data=None):
+ self.date = self.temple_code = self.status = self.famc = None
+ if data:
+ if "completedDate" in data:
+ self.date = data["completedDate"]
+ if "completedTemple" in data:
+ self.temple_code = data["completedTemple"]["code"]
+ self.status = data.get("status")
+
+ def print(self, file):
+ if self.date:
+ file.write(cont("2 DATE " + self.date))
+ if self.temple_code:
+ file.write("2 TEMP %s\n" % self.temple_code)
+ if self.status in ORDINANCES_STATUS:
+ file.write("2 STAT %s\n" % ORDINANCES_STATUS[self.status])
+ if self.famc:
+ file.write("2 FAMC @F%s@\n" % self.famc.num)
+
+
+class Citation:
+ """Citation class"""
+
+ def __init__(self, data: Dict[str, Any], source):
+ self._handle: Optional[str] = None
+ self.id = data["id"]
+ self.source = source
+ attr = data.get("attribution", {})
+ self.message = attr.get("changeMessage")
+ self.modified = attr.get("modified")
+
+ @property
+ def handle(self):
+ if not self._handle:
+ self._handle = "_" + os.urandom(10).hex()
+ return self._handle
+
+ def printxml(self, parent_element: Element):
+ citation_element = ET.SubElement(
+ parent_element,
+ "citation",
+ handle=self.handle,
+ change=str(int(self.modified / 1000)),
+ id="C" + str(self.id),
+ )
+ ET.SubElement(citation_element, "confidence").text = "2"
+ ET.SubElement(citation_element, "sourceref", hlink=self.source.handle)
--- /dev/null
+"""Record classes: Note, Source, Fact, Memorie"""
+
+import hashlib
+import os
+import sys
+import xml.etree.ElementTree as ET
+from typing import TYPE_CHECKING, Any, Dict, Optional
+from urllib.parse import unquote, unquote_plus
+from xml.etree.ElementTree import Element
+
+from getmyancestors.classes.constants import FACT_EVEN, FACT_TAGS
+
+from .utils import cont
+
+if TYPE_CHECKING:
+ from .core import Tree
+
+
+class Note:
+ """GEDCOM Note class"""
+
+ def __init__(self, text="", tree=None, num=None, num_prefix=None, note_type=None):
+ self._handle = None
+ self.note_type = note_type or "Source Note"
+ self.num_prefix = num_prefix
+ self.text = text.strip()
+
+ if num:
+ self.num = num
+ else:
+ # Use hash of text for deterministic ID
+ self.num = hashlib.md5(self.text.encode("utf-8")).hexdigest()[:10].upper()
+
+ # Restore debug print if verbose
+ if tree and hasattr(tree, "fs") and getattr(tree.fs, "verbose", False):
+ print(f"##### Creating Note: {num_prefix}, {self.num}", file=sys.stderr)
+
+ if tree:
+ if self in tree.notes:
+ if hasattr(tree, "fs") and getattr(tree.fs, "verbose", False):
+ preview = (
+ self.text[:60].replace("\n", " ") if self.text else "<EMPTY>"
+ )
+ print(
+ f"♻️ Deduplicated {self.note_type}: ID={self.id} Text='{preview}...' (Prefix={self.num_prefix})",
+ file=sys.stderr,
+ )
+ tree.notes.add(self)
+
+ def __eq__(self, other):
+ if not isinstance(other, Note):
+ return False
+ return self.text == other.text and self.num_prefix == other.num_prefix
+
+ def __hash__(self):
+ return hash((self.text, self.num_prefix))
+
+ def __str__(self):
+ return f"{self.num}. {self.text}"
+
+ @property
+ def id(self):
+ return (
+ f"{self.num_prefix}_{self.num}"
+ if self.num_prefix is not None
+ else str(self.num)
+ )
+
+ def print(self, file=sys.stdout):
+
+ # NOTE: print is not passed tree, so we can't check verbose easily unless we store it.
+ # But Note is simple. Maybe skip this one or check global?
+ # The user specifically asked for L34.
+ file.write(cont("0 @N%s@ NOTE %s" % (self.id, self.text)))
+
+ def link(self, file=sys.stdout, level=1):
+
+ file.write("%s NOTE @N%s@\n" % (level, self.id))
+
+ @property
+ def handle(self):
+ if not self._handle:
+ self._handle = "_" + os.urandom(10).hex()
+ return self._handle
+
+ def printxml(self, parent_element: Element) -> None:
+ note_element = ET.SubElement(
+ parent_element,
+ "note",
+ handle=self.handle,
+ id=self.id,
+ type="Source Note",
+ )
+ ET.SubElement(note_element, "text").text = self.text
+
+
+class Source:
+ """GEDCOM Source class"""
+
+ counter: int = 0
+
+ def __init__(self, data=None, tree=None, num=None):
+ if num:
+ self.num = num
+ else:
+ Source.counter += 1
+ self.num = Source.counter
+
+ self._handle = None
+ self.tree = tree
+ self.url = self.citation = self.title = self.fid = None
+ self.notes = set()
+ if data:
+ self.fid = data["id"]
+ if "about" in data:
+ self.url = data["about"].replace(
+ "familysearch.org/platform/memories/memories",
+ "www.familysearch.org/photos/artifacts",
+ )
+ if "citations" in data:
+ self.citation = data["citations"][0]["value"]
+ if "titles" in data:
+ self.title = data["titles"][0]["value"]
+ if "notes" in data:
+ notes = [n["text"] for n in data["notes"] if n["text"]]
+ for _idx, n in enumerate(notes):
+ self.notes.add(
+ Note(
+ n,
+ self.tree,
+ num=None,
+ note_type="Source Note",
+ )
+ )
+ self.modified = data["attribution"]["modified"]
+
+ def __str__(self):
+ return f"{self.num}. {self.title}"
+
+ @property
+ def id(self):
+ return "S" + str(self.fid or self.num)
+
+ @property
+ def handle(self):
+ if not self._handle:
+ self._handle = "_" + os.urandom(10).hex()
+ return self._handle
+
+ def print(self, file=sys.stdout):
+ file.write("0 @S%s@ SOUR \n" % self.id)
+ if self.title:
+ file.write(cont("1 TITL " + self.title))
+ if self.citation:
+ file.write(cont("1 AUTH " + self.citation))
+ if self.url:
+ file.write(cont("1 PUBL " + self.url))
+ for n in sorted(self.notes, key=lambda x: x.id or ""):
+ n.link(file, 1)
+ file.write("1 REFN %s\n" % self.fid)
+
+ def link(self, file=sys.stdout, level=1):
+ file.write("%s SOUR @S%s@\n" % (level, self.id))
+
+ def printxml(self, parent_element: Element) -> None:
+ source_element = ET.SubElement(
+ parent_element,
+ "source",
+ handle=self.handle,
+ change=str(int(self.modified / 1000)),
+ id=self.id,
+ )
+ if self.title:
+ ET.SubElement(source_element, "stitle").text = self.title
+ if self.citation:
+ ET.SubElement(source_element, "sauthor").text = self.citation
+ if self.url:
+ ET.SubElement(source_element, "spubinfo").text = self.url
+ if self.fid:
+ ET.SubElement(source_element, "srcattribute", type="REFN", value=self.fid)
+
+
+class Fact:
+ """GEDCOM Fact class"""
+
+ counter: Dict[str, int] = {}
+
+ def __init__(self, data=None, tree: Optional["Tree"] = None, num_prefix=None):
+ self.value: Optional[str] = None
+ self.type: Optional[str] = None
+ self.date: Optional[str] = None
+ self.date_type: Optional[str] = None
+ self.place = None
+ self.note = None
+ self.map = None
+ self._handle = None
+ if data:
+ if "value" in data:
+ self.value = data["value"]
+ if "type" in data:
+ self.type = data["type"]
+ self.fs_type = self.type
+ if self.type in FACT_EVEN and tree and tree.fs:
+ # Cast or ignore, FS session dynamic attr _
+ self.type = tree.fs._(FACT_EVEN[self.type])
+ elif self.type[:6] == "data:,":
+ self.type = unquote(self.type[6:])
+ elif self.type not in FACT_TAGS:
+ self.type = None
+
+ self.num_prefix = (
+ f"{num_prefix}_{FACT_TAGS[self.type]}"
+ if num_prefix and self.type in FACT_TAGS
+ else num_prefix
+ )
+ Fact.counter[self.num_prefix or "None"] = (
+ Fact.counter.get(self.num_prefix or "None", 0) + 1
+ )
+ self.num = Fact.counter[self.num_prefix or "None"]
+ if data:
+ if "date" in data:
+ if "formal" in data["date"]:
+ self.date = data["date"]["formal"].split("+")[-1].split("/")[0]
+ if data["date"]["formal"].startswith("A+"):
+ self.date_type = "about"
+ elif data["date"]["formal"].startswith("/+"):
+ self.date_type = "before"
+ elif data["date"]["formal"].endswith("/"):
+ self.date_type = "after"
+ else:
+ self.date = data["date"]["original"]
+ if "place" in data:
+ place = data["place"]
+ place_name = place["original"]
+ place_id = (
+ place["description"][1:]
+ if "description" in place
+ and tree
+ and place["description"][1:] in (tree.places or [])
+ else None
+ )
+ # Import Place locally to avoid circular import
+
+ if tree:
+ self.place = tree.ensure_place(place_name, place_id)
+ if "changeMessage" in data["attribution"]:
+ self.note = Note(
+ data["attribution"]["changeMessage"],
+ tree,
+ num_prefix="E" + self.num_prefix if self.num_prefix else None,
+ note_type="Event Note",
+ )
+ if self.type == "http://gedcomx.org/Death" and not (
+ self.date or self.place
+ ):
+ self.value = "Y"
+
+ if tree:
+ tree.facts.add(self)
+
+ @property
+ def id(self):
+ return (
+ f"{self.num_prefix}_{self.num}"
+ if self.num_prefix is not None
+ else str(self.num)
+ )
+
+ @property
+ def handle(self):
+ if not self._handle:
+ self._handle = "_" + os.urandom(10).hex()
+ return self._handle
+
+ def __eq__(self, other):
+ """Facts are equal if type, date, date_type, place, and value match."""
+ if not isinstance(other, Fact):
+ return False
+ # Compare by semantic content, not object identity
+ place_name = self.place.name if self.place else None
+ other_place_name = other.place.name if other.place else None
+ return (
+ self.type == other.type
+ and self.date == other.date
+ and self.date_type == other.date_type
+ and place_name == other_place_name
+ and self.value == other.value
+ and (self.note.text if self.note else None)
+ == (other.note.text if other.note else None)
+ )
+
+ def __hash__(self):
+ """Hash based on semantic content for set deduplication."""
+ place_name = self.place.name if self.place else None
+ return hash(
+ (
+ self.type,
+ self.date,
+ self.date_type,
+ place_name,
+ self.value,
+ self.note.text if self.note else None,
+ )
+ )
+
+ def printxml(self, parent_element):
+ event_element = ET.SubElement(
+ parent_element,
+ "event",
+ handle=self.handle,
+ id=self.id,
+ )
+ ET.SubElement(event_element, "type").text = (
+ unquote_plus(self.type[len("http://gedcomx.org/") :])
+ if self.type and self.type.startswith("http://gedcomx.org/")
+ else self.type
+ )
+ if self.date:
+ params: Dict[str, Any] = {"val": self.date}
+ if self.date_type is not None:
+ params["type"] = self.date_type
+ ET.SubElement(event_element, "datestr", **params)
+ if self.place:
+ ET.SubElement(event_element, "place", hlink=self.place.handle)
+ if self.note:
+ ET.SubElement(event_element, "noteref", hlink=self.note.handle)
+
+ def print(self, file):
+ if self.type in FACT_TAGS:
+ tmp = "1 " + FACT_TAGS[self.type]
+ if self.value:
+ tmp += " " + self.value
+ file.write(cont(tmp))
+ elif self.type:
+ file.write("1 EVEN\n2 TYPE %s\n" % self.type)
+ if self.value:
+ file.write(cont("2 NOTE Description: " + self.value))
+ else:
+ return
+ if self.date:
+ file.write(cont("2 DATE " + self.date))
+ if self.place:
+ self.place.print(file, 2)
+ if self.map:
+ latitude, longitude = self.map
+ file.write("3 MAP\n4 LATI %s\n4 LONG %s\n" % (latitude, longitude))
+ if self.note:
+ self.note.link(file, 2)
+
+
+class Memorie:
+ """GEDCOM Memorie class"""
+
+ def __init__(self, data=None):
+ self.description = self.url = None
+ if data and "links" in data:
+ self.url = data["about"]
+ if "titles" in data:
+ self.description = data["titles"][0]["value"]
+ if "descriptions" in data:
+ self.description = (
+ "" if not self.description else self.description + "\n"
+ ) + data["descriptions"][0]["value"]
+
+ def print(self, file):
+ file.write("1 OBJE\n2 FORM URL\n")
+ if self.description:
+ file.write(cont("2 TITL " + self.description))
+ if self.url:
+ file.write(cont("2 FILE " + self.url))
--- /dev/null
+"""Utility constants and functions for tree package"""
+
+import re
+
+# Constants
+COUNTY = "County"
+COUNTRY = "Country"
+CITY = "City"
+
+
+NAME_MAP = {
+ "preferred": "Preferred Name",
+ "nickname": "Nickname",
+ "birthname": "Birth Name",
+ "aka": "Also Known As",
+ "married": "Married Name",
+}
+
+
+GEONAME_FEATURE_MAP = {
+ "ADM1": COUNTY, # first-order administrative division a primary administrative division of a country, such as a state in the United States
+ "ADM1H": COUNTY, # historical first-order administrative division a former first-order administrative division
+ "ADM2": COUNTY, # second-order administrative division a subdivision of a first-order administrative division
+ "ADM2H": COUNTY, # historical second-order administrative division a former second-order administrative division
+ "ADM3": COUNTY, # third-order administrative division a subdivision of a second-order administrative division
+ "ADM3H": COUNTY, # historical third-order administrative division a former third-order administrative division
+ "ADM4": COUNTY, # fourth-order administrative division a subdivision of a third-order administrative division
+ "ADM4H": COUNTY, # historical fourth-order administrative division a former fourth-order administrative division
+ "ADM5": COUNTY, # fifth-order administrative division a subdivision of a fourth-order administrative division
+ "ADM5H": COUNTY, # historical fifth-order administrative division a former fifth-order administrative division
+ "ADMD": COUNTY, # administrative division an administrative division of a country, undifferentiated as to administrative level
+ "ADMDH": COUNTY, # historical administrative division a former administrative division of a political entity, undifferentiated as to administrative level
+ # 'LTER': leased area a tract of land leased to another country, usually for military installations
+ "PCL": COUNTRY, # political entity
+ "PCLD": COUNTRY, # dependent political entity
+ "PCLF": COUNTRY, # freely associated state
+ "PCLH": COUNTRY, # historical political entity a former political entity
+ "PCLI": COUNTRY, # independent political entity
+ "PCLIX": COUNTRY, # section of independent political entity
+ "PCLS": COUNTRY, # semi-independent political entity
+ "PPL": CITY, # populated place a city, town, village, or other agglomeration of buildings where people live and work
+ "PPLA": CITY, # seat of a first-order administrative division seat of a first-order administrative division (PPLC takes precedence over PPLA)
+ "PPLA2": CITY, # seat of a second-order administrative division
+ "PPLA3": CITY, # seat of a third-order administrative division
+ "PPLA4": CITY, # seat of a fourth-order administrative division
+ "PPLA5": CITY, # seat of a fifth-order administrative division
+ "PPLC": CITY, # capital of a political entity
+ "PPLCH": CITY, # historical capital of a political entity a former capital of a political entity
+ "PPLF": CITY, # farm village a populated place where the population is largely engaged in agricultural activities
+ "PPLG": CITY, # seat of government of a political entity
+ "PPLH": CITY, # historical populated place a populated place that no longer exists
+ "PPLL": CITY, # populated locality an area similar to a locality but with a small group of dwellings or other buildings
+ "PPLQ": CITY, # abandoned populated place
+ "PPLR": CITY, # religious populated place a populated place whose population is largely engaged in religious occupations
+ "PPLS": CITY, # populated places cities, towns, villages, or other agglomerations of buildings where people live and work
+ "PPLW": CITY, # destroyed populated place a village, town or city destroyed by a natural disaster, or by war
+ "PPLX": CITY, # section of populated place
+}
+
+
+def cont(string):
+ """parse a GEDCOM line adding CONT and CONT tags if necessary"""
+ level = int(string[:1]) + 1
+ lines = string.splitlines()
+ res = []
+ max_len = 255
+ for line in lines:
+ c_line = line
+ to_conc = []
+ while len(c_line.encode("utf-8")) > max_len:
+ index = min(max_len, len(c_line) - 2)
+ while (
+ len(c_line[:index].encode("utf-8")) > max_len
+ or re.search(r"[ \t\v]", c_line[index - 1 : index + 1])
+ ) and index > 1:
+ index -= 1
+ to_conc.append(c_line[:index])
+ c_line = c_line[index:]
+ max_len = 248
+ to_conc.append(c_line)
+ res.append(("\n%s CONC " % level).join(to_conc))
+ max_len = 248
+ return ("\n%s CONT " % level).join(res) + "\n"
# global imports
import os
import sys
-from tkinter import (
- Tk,
- PhotoImage,
-)
+
+try:
+ from tkinter import PhotoImage, Tk
+except ImportError:
+ print("\n" + "=" * 60)
+ print("ERROR: Tkinter is not available.")
+ print("=" * 60)
+ print("The graphical interface requires Tkinter.")
+ print("\nInstallation instructions:")
+ print("- Ubuntu/Debian: sudo apt install python3-tk")
+ print("- Fedora/RHEL: sudo dnf install python3-tkinter")
+ print("- macOS: brew install python-tk")
+ print("- Windows: Usually included with Python installation")
+ print("\n" + "=" * 60)
+ sys.exit(1)
# local imports
-from getmyancestors.classes.gui import (
- FStoGEDCOM,
-)
+from getmyancestors.classes.gui import FStoGEDCOM
def main():
--- /dev/null
+#!/usr/bin/env python3
+# coding: utf-8
+
+# global imports
+from __future__ import print_function
+
+import asyncio
+import getpass
+import os
+import re
+import sys
+import time
+from datetime import datetime
+from typing import List
+
+import typer
+
+from getmyancestors.classes.session import CachedSession, GMASession, Session
+from getmyancestors.classes.tree import Tree
+
+app = typer.Typer(
+ help="Retrieve GEDCOM data from FamilySearch Tree",
+ add_completion=True,
+ no_args_is_help=True,
+ context_settings={"help_option_names": ["-h", "--help"]},
+)
+
+
+@app.command()
+def main(
+ username: str = typer.Option(
+ None, "-u", "--username", metavar="<STR>", help="FamilySearch username"
+ ),
+ password: str = typer.Option(
+ None, "-p", "--password", metavar="<STR>", help="FamilySearch password"
+ ),
+ individuals: List[str] = typer.Option(
+ None,
+ "-i",
+ "--individuals",
+ metavar="<STR>",
+ help="List of individual FamilySearch IDs for whom to retrieve ancestors",
+ ),
+ exclude: List[str] = typer.Option(
+ None,
+ "-e",
+ "--exclude",
+ metavar="<STR>",
+ help="List of individual FamilySearch IDs to exclude from the tree",
+ ),
+ ascend: int = typer.Option(
+ 4, "-a", "--ascend", metavar="<INT>", help="Number of generations to ascend [4]"
+ ),
+ descend: int = typer.Option(
+ 0,
+ "-d",
+ "--descend",
+ metavar="<INT>",
+ help="Number of generations to descend [0]",
+ ),
+ distance: int = typer.Option(
+ 0,
+ "--distance",
+ metavar="<INT>",
+ help="The maxium distance from the starting individuals [0]. If distance is set, ascend and descend will be ignored.",
+ ),
+ only_blood_relatives: bool = typer.Option(
+ False,
+ "--only-blood-relatives",
+ help="Only include blood relatives in the tree [False]",
+ ),
+ marriage: bool = typer.Option(
+ False,
+ "-m",
+ "--marriage",
+ help="Add spouses and couples information [False]",
+ ),
+ cache: bool = typer.Option(
+ True, "--cache/--no-cache", help="Enable/Disable http cache [True]"
+ ),
+ cache_control: bool = typer.Option(
+ True,
+ "--cache-control/--no-cache-control",
+ help="Disable cache-control (use dumb cache) [True]",
+ ),
+ get_contributors: bool = typer.Option(
+ False,
+ "-r",
+ "--get-contributors",
+ help="Add list of contributors in notes [False]",
+ ),
+ get_ordinances: bool = typer.Option(
+ False,
+ "-c",
+ "--get_ordinances",
+ help="Add LDS ordinances (need LDS account) [False]",
+ ),
+ verbose: bool = typer.Option(
+ False, "-v", "--verbose", help="Increase output verbosity [False]"
+ ),
+ timeout: int = typer.Option(
+ 60, "-t", "--timeout", metavar="<INT>", help="Timeout in seconds [60]"
+ ),
+ rate_limit: int = typer.Option(
+ 5,
+ "-R",
+ "--rate-limit",
+ metavar="<INT>",
+ help="Maximum requests per second [5]",
+ ),
+ xml: bool = typer.Option(
+ False,
+ "-x",
+ "--xml",
+ help="To print the output in Gramps XML format [False]",
+ ),
+ show_password: bool = typer.Option(
+ False, "--show-password", help="Show password in .settings file [False]"
+ ),
+ save_settings: bool = typer.Option(
+ False, "--save-settings", help="Save settings into file [False]"
+ ),
+ geonames: str = typer.Option(
+ None,
+ "-g",
+ "--geonames",
+ metavar="<STR>",
+ help="Geonames.org username in order to download place data",
+ ),
+ client_id: str = typer.Option(
+ None, "--client_id", metavar="<STR>", help="Use Specific Client ID"
+ ),
+ redirect_uri: str = typer.Option(
+ None, "--redirect_uri", metavar="<STR>", help="Use Specific Redirect Uri"
+ ),
+ creation_date: str = typer.Option(
+ None,
+ "--creation-date",
+ metavar="<ISO8601>",
+ help="Override creation date in GEDCOM header (YYYY-MM-DDTHH:MM:SS)",
+ ),
+ outfile: str = typer.Option(
+ None, "-o", "--outfile", metavar="<FILE>", help="output GEDCOM file [stdout]"
+ ),
+ logfile: str = typer.Option(
+ None, "-l", "--logfile", metavar="<FILE>", help="output log file [stderr]"
+ ),
+ extra_individuals: List[str] = typer.Argument(None, hidden=True),
+):
+ """
+ Retrieve GEDCOM data from FamilySearch Tree
+ """
+ # NOISY DEBUG FOR CI
+ if os.environ.get("GMA_DEBUG"):
+ print(
+ f"DEBUG: GMA_OFFLINE_MODE={os.environ.get('GMA_OFFLINE_MODE')}",
+ file=sys.stderr,
+ )
+ print(f"DEBUG: GMA_DEBUG={os.environ.get('GMA_DEBUG')}", file=sys.stderr)
+ if extra_individuals:
+ if individuals is None:
+ individuals = []
+ individuals.extend(extra_individuals)
+
+ # dummy translation function
+ def _(s):
+ return s
+
+ # Forces stdout to use UTF-8 or at least not crash on unknown characters
+ if hasattr(sys.stdout, "reconfigure"):
+ try:
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+ except Exception:
+ pass
+
+ # Manually handle logfile opening (FileType is deprecated)
+ logfile_handle = None
+ if logfile:
+ try:
+ # pylint: disable=consider-using-with
+ logfile_handle = open(logfile, "w", encoding="UTF-8")
+ except OSError as e:
+ print(f"Could not open logfile: {e}", file=sys.stderr)
+ raise typer.Exit(code=2) from None
+
+ if individuals:
+ for fid in individuals:
+ if not re.fullmatch(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid):
+ print("Invalid FamilySearch ID: " + fid, file=sys.stderr)
+ raise typer.Exit(code=1)
+ if exclude:
+ for fid in exclude:
+ if not re.fullmatch(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid):
+ print("Invalid FamilySearch ID: " + fid, file=sys.stderr)
+ raise typer.Exit(code=1)
+
+ if not username:
+ if verbose:
+ print("⚠️ Warning: getting username from command line, env var not set.")
+ username = input("Enter FamilySearch username: ")
+ if not password:
+ if os.getenv("FAMILYSEARCH_PASS"):
+ if verbose:
+ print("✅ Using password from env var.")
+ password = os.getenv("FAMILYSEARCH_PASS") or ""
+ else:
+ if verbose:
+ print("⚠️ Warning: getting password from command line, env var not set.")
+ password = getpass.getpass("Enter FamilySearch password: ")
+
+ if verbose:
+ print("✅ Using username: " + username)
+ print(f"✅ Using password: {len(password)} digits long.")
+
+ time_count = time.time()
+
+ # Report settings used when getmyancestors is executed
+ if save_settings and outfile and outfile != "<stdout>":
+
+ formatting = "{:74}{:\\t>1}\\n"
+ settings_name = outfile.rsplit(".", 1)[0] + ".settings"
+ try:
+ with open(settings_name, "w", encoding="utf-8") as settings_file:
+ settings_file.write(
+ formatting.format("time stamp: ", time.strftime("%X %x %Z"))
+ )
+ # Reconstruct args for settings file
+ # This is a bit manual since we don't have Namespace, but feasible
+ params = locals()
+ for key, val in params.items():
+ if key in [
+ "settings_file",
+ "formatting",
+ "settings_name",
+ "_",
+ "logfile_handle",
+ "time_count",
+ "params",
+ ]:
+ continue
+ if key == "password" and not show_password:
+ val = "******"
+ settings_file.write(
+ formatting.format(f"--{key.replace('_', '-')}", str(val))
+ )
+
+ except OSError as exc:
+ print(
+ "Unable to write %s: %s" % (settings_name, repr(exc)), file=sys.stderr
+ )
+
+ # initialize a FamilySearch session and a family tree object
+ print(_("Login to FamilySearch..."), file=sys.stderr)
+
+ # Common params
+ session_kwargs = {
+ "username": username,
+ "password": password,
+ "client_id": client_id,
+ "redirect_uri": redirect_uri,
+ "verbose": verbose,
+ "logfile": logfile_handle,
+ "timeout": timeout,
+ "cache_control": cache_control,
+ "requests_per_second": rate_limit,
+ }
+
+ if cache:
+ print(_("Using cache..."), file=sys.stderr)
+ fs: GMASession = CachedSession(**session_kwargs) # type: ignore
+ else:
+ fs = Session(**session_kwargs)
+
+ if not fs.logged:
+ raise typer.Exit(code=2)
+ _ = fs._
+
+ creation_dt = None
+ if creation_date:
+ try:
+ creation_dt = datetime.fromisoformat(creation_date)
+ except ValueError:
+ print(
+ f"Invalid creation date format: {creation_date}. Expected ISO 8601 (YYYY-MM-DDTHH:MM:SS)",
+ file=sys.stderr,
+ )
+ raise typer.Exit(code=1) from None
+
+ tree = Tree(
+ fs,
+ exclude=exclude,
+ geonames_key=geonames,
+ only_blood_relatives=only_blood_relatives,
+ creation_date=creation_dt,
+ )
+
+ # check LDS account
+ if get_ordinances:
+ test = fs.get_url(
+ "/service/tree/tree-data/reservations/person/%s/ordinances" % fs.fid, {}
+ )
+ if not test or test.get("status") != "OK":
+ raise typer.Exit(code=2)
+
+ success = False
+ try:
+ # add list of starting individuals to the family tree
+ todo_list = individuals if individuals else ([fs.fid] if fs.fid else [])
+ if not todo_list:
+ raise typer.Exit(code=1)
+ print(_("Downloading starting individuals..."), file=sys.stderr)
+ tree.add_indis(todo_list)
+
+ # download ancestors
+ if distance == 0:
+ todo = set(tree.indi.keys())
+ done = set()
+ for i in range(ascend):
+ if not todo:
+ break
+ done |= todo
+ print(
+ _("Downloading %s. of generations of ancestors...") % (i + 1),
+ file=sys.stderr,
+ )
+ todo = tree.add_parents(sorted(todo)) - done
+
+ # download descendants
+ todo = set(tree.indi.keys())
+ done = set()
+ for i in range(descend):
+ if not todo:
+ break
+ done |= todo
+ print(
+ _("Downloading %s. of generations of descendants...") % (i + 1),
+ file=sys.stderr,
+ )
+ todo = tree.add_children(sorted(todo)) - done
+
+ # download spouses
+ if marriage:
+ print(
+ _("Downloading spouses and marriage information..."),
+ file=sys.stderr,
+ )
+ todo = set(tree.indi.keys())
+ tree.add_spouses(sorted(todo))
+
+ else:
+ todo_bloodline = set(tree.indi.keys())
+ # TODO: check for regressons here, since we removed a set()
+ done = set()
+ for dist in range(distance):
+ if not todo_bloodline:
+ break
+ done |= todo_bloodline
+ print(
+ _("Downloading individuals at distance %s...") % (dist + 1),
+ file=sys.stderr,
+ )
+ parents = tree.add_parents(sorted(todo_bloodline)) - done
+ children = tree.add_children(sorted(todo_bloodline)) - done
+
+ if marriage:
+ print(
+ _("Downloading spouses and marriage information..."),
+ file=sys.stderr,
+ )
+ todo = set(tree.indi.keys())
+ tree.add_spouses(sorted(todo))
+
+ todo_bloodline = parents | children
+
+ # download ordinances, notes and contributors
+ async def download_stuff(loop):
+ futures = set()
+ for fid, indi in tree.indi.items():
+ futures.add(loop.run_in_executor(None, indi.get_notes))
+ if get_ordinances:
+ futures.add(loop.run_in_executor(None, tree.add_ordinances, fid))
+ if get_contributors:
+ futures.add(loop.run_in_executor(None, indi.get_contributors))
+ for fam in tree.fam.values():
+ futures.add(loop.run_in_executor(None, fam.get_notes))
+ if get_contributors:
+ futures.add(loop.run_in_executor(None, fam.get_contributors))
+ for future in futures:
+ await future
+
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ print(
+ _("Downloading notes")
+ + (
+ (("," if get_contributors else _(" and")) + _(" ordinances"))
+ if get_ordinances
+ else ""
+ )
+ + (_(" and contributors") if get_contributors else "")
+ + "...",
+ file=sys.stderr,
+ )
+ loop.run_until_complete(download_stuff(loop))
+
+ success = True
+
+ finally:
+ if logfile_handle:
+ logfile_handle.close()
+
+ if success:
+ tree.reset_num()
+ output_format = "XML" if xml else "GEDCOM"
+ print(_("Generating output..."), file=sys.stderr)
+ print(
+ _("Generating %s with %d individuals...")
+ % (output_format, len(tree.indi)),
+ file=sys.stderr,
+ )
+ if xml:
+ if outfile:
+ with open(outfile, "wb") as f:
+ tree.printxml(f)
+ else:
+ tree.printxml(sys.stdout.buffer)
+ else:
+ if outfile:
+ with open(outfile, "w", encoding="UTF-8") as f_ged:
+ tree.print(f_ged)
+ else:
+ tree.print(sys.stdout)
+
+ # Statistics printout (abbreviated for brevity)
+ print(
+ _(
+ "Downloaded %s individuals, %s families, %s sources and %s notes "
+ "in %s seconds with %s HTTP requests."
+ )
+ % (
+ str(len(tree.indi)),
+ str(len(tree.fam)),
+ str(len(tree.sources)),
+ str(len(tree.notes)),
+ str(round(time.time() - time_count)),
+ str(fs.counter),
+ ),
+ file=sys.stderr,
+ )
+
+
+if __name__ == "__main__":
+ app()
+++ /dev/null
-# coding: utf-8
-
-# global imports
-from __future__ import print_function
-import re
-import sys
-import time
-from urllib.parse import unquote
-import getpass
-import asyncio
-import argparse
-
-# local imports
-from getmyancestors.classes.tree import Tree
-from getmyancestors.classes.session import Session
-from getmyancestors.classes.session import CachedSession
-
-
-def main():
- parser = argparse.ArgumentParser(
- description="Retrieve GEDCOM data from FamilySearch Tree (4 Jul 2016)",
- add_help=False,
- usage="getmyancestors -u username -p password [options]",
- )
- parser.add_argument(
- "-u", "--username", metavar="<STR>", type=str, help="FamilySearch username"
- )
- parser.add_argument(
- "-p", "--password", metavar="<STR>", type=str, help="FamilySearch password"
- )
- parser.add_argument(
- "-i",
- "--individuals",
- metavar="<STR>",
- nargs="+",
- type=str,
- help="List of individual FamilySearch IDs for whom to retrieve ancestors",
- )
- parser.add_argument(
- "-e",
- "--exclude",
- metavar="<STR>",
- nargs="+",
- type=str,
- help="List of individual FamilySearch IDs to exclude from the tree",
- )
- parser.add_argument(
- "-a",
- "--ascend",
- metavar="<INT>",
- type=int,
- default=4,
- help="Number of generations to ascend [4]",
- )
- parser.add_argument(
- "-d",
- "--descend",
- metavar="<INT>",
- type=int,
- default=0,
- help="Number of generations to descend [0]",
- )
- parser.add_argument(
- '--distance',
- metavar="<INT>",
- type=int,
- default=0,
- help="The maxium distance from the starting individuals [0]. If distance is set, ascend and descend will be ignored.",
- )
- parser.add_argument(
- '--only-blood-relatives',
- action="store_true",
- default=True,
- help="Only include blood relatives in the tree [False]",
- )
- parser.add_argument(
- "-m",
- "--marriage",
- action="store_true",
- default=False,
- help="Add spouses and couples information [False]",
- )
- parser.add_argument(
- "--cache",
- action="store_true",
- default=False,
- help="Use of http cache to reduce requests during testing [False]",
- )
- parser.add_argument(
- "-r",
- "--get-contributors",
- action="store_true",
- default=False,
- help="Add list of contributors in notes [False]",
- )
- parser.add_argument(
- "-c",
- "--get_ordinances",
- action="store_true",
- default=False,
- help="Add LDS ordinances (need LDS account) [False]",
- )
- parser.add_argument(
- "-v",
- "--verbose",
- action="store_true",
- default=False,
- help="Increase output verbosity [False]",
- )
- parser.add_argument(
- "-t",
- "--timeout",
- metavar="<INT>",
- type=int,
- default=60,
- help="Timeout in seconds [60]",
- )
-
- parser.add_argument(
- "-x",
- "--xml",
- action="store_true",
- default=False,
- help="To print the output in Gramps XML format [False]",
- )
- parser.add_argument(
- "--show-password",
- action="store_true",
- default=False,
- help="Show password in .settings file [False]",
- )
- parser.add_argument(
- "--save-settings",
- action="store_true",
- default=False,
- help="Save settings into file [False]",
- )
- parser.add_argument(
- "-g",
- "--geonames",
- metavar="<STR>",
- type=str,
- help="Geonames.org username in order to download place data",
- )
- try:
- parser.add_argument(
- "-o",
- "--outfile",
- metavar="<FILE>",
- # type=argparse.FileType("w", encoding="UTF-8"),
- # default=sys.stdout,
- help="output GEDCOM file [stdout]",
- )
- parser.add_argument(
- "-l",
- "--logfile",
- metavar="<FILE>",
- type=argparse.FileType("w", encoding="UTF-8"),
- default=False,
- help="output log file [stderr]",
- )
- except TypeError:
- sys.stderr.write("Python >= 3.4 is required to run this script\n")
- sys.stderr.write("(see https://docs.python.org/3/whatsnew/3.4.html#argparse)\n")
- sys.exit(2)
-
- # extract arguments from the command line
- try:
- parser.error = parser.exit
- args = parser.parse_args()
- except SystemExit:
- parser.print_help(file=sys.stderr)
- sys.exit(2)
- if args.individuals:
- for fid in args.individuals:
- if not re.match(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid):
- sys.exit("Invalid FamilySearch ID: " + fid)
- if args.exclude:
- for fid in args.exclude:
- if not re.match(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid):
- sys.exit("Invalid FamilySearch ID: " + fid)
-
- args.username = (
- args.username if args.username else input("Enter FamilySearch username: ")
- )
- args.password = (
- args.password
- if args.password
- else getpass.getpass("Enter FamilySearch password: ")
- )
-
- time_count = time.time()
-
- # Report settings used when getmyancestors is executed
- if args.save_settings and args.outfile.name != "<stdout>":
-
- def parse_action(act):
- if not args.show_password and act.dest == "password":
- return "******"
- value = getattr(args, act.dest)
- return str(getattr(value, "name", value))
-
- formatting = "{:74}{:\t>1}\n"
- settings_name = args.outfile.name.split(".")[0] + ".settings"
- try:
- with open(settings_name, "w") as settings_file:
- settings_file.write(
- formatting.format("time stamp: ", time.strftime("%X %x %Z"))
- )
- for action in parser._actions:
- settings_file.write(
- formatting.format(
- action.option_strings[-1], parse_action(action)
- )
- )
- except OSError as exc:
- print(
- "Unable to write %s: %s" % (settings_name, repr(exc)), file=sys.stderr
- )
-
- # initialize a FamilySearch session and a family tree object
- print("Login to FamilySearch...", file=sys.stderr)
- if args.cache:
- print("Using cache...", file=sys.stderr)
- fs = CachedSession(args.username, args.password, args.verbose, args.logfile, args.timeout)
- else:
- fs = Session(args.username, args.password, args.verbose, args.logfile, args.timeout)
- if not fs.logged:
- sys.exit(2)
- _ = fs._
- tree = Tree(
- fs,
- exclude=args.exclude,
- geonames_key=args.geonames,
- )
-
- # check LDS account
- if args.get_ordinances:
- test = fs.get_url(
- "/service/tree/tree-data/reservations/person/%s/ordinances" % fs.fid, {}
- )
- if test["status"] != "OK":
- sys.exit(2)
-
- try:
- # add list of starting individuals to the family tree
- todo = args.individuals if args.individuals else [fs.fid]
- print(_("Downloading starting individuals..."), file=sys.stderr)
- tree.add_indis(todo)
-
-
-
- # download ancestors
- if args.distance == 0:
- todo = set(tree.indi.keys())
- done = set()
- for i in range(args.ascend):
- if not todo:
- break
- done |= todo
- print(
- _("Downloading %s. of generations of ancestors...") % (i + 1),
- file=sys.stderr,
- )
- todo = tree.add_parents(todo) - done
-
- # download descendants
- todo = set(tree.indi.keys())
- done = set()
- for i in range(args.descend):
- if not todo:
- break
- done |= todo
- print(
- _("Downloading %s. of generations of descendants...") % (i + 1),
- file=sys.stderr,
- )
- todo = tree.add_children(todo) - done
-
- # download spouses
- if args.marriage:
- print(_("Downloading spouses and marriage information..."), file=sys.stderr)
- todo = set(tree.indi.keys())
- tree.add_spouses(todo)
-
- else:
- todo_bloodline = set(tree.indi.keys())
- todo_others = set()
- done = set()
- for distance in range(args.distance):
-
- if not todo_bloodline and not todo_others:
- break
- done |= todo_bloodline
- print(
- _("Downloading individuals at distance %s...") % (distance + 1),
- file=sys.stderr,
- )
- parents = tree.add_parents(todo_bloodline) - done
- children = tree.add_children(todo_bloodline) - done
-
- # download spouses
- if args.marriage:
- print(_("Downloading spouses and marriage information..."), file=sys.stderr)
- todo = set(tree.indi.keys())
- tree.add_spouses(todo)
-
- # spouses = tree.add_spouses(todo_bloodline) - done
-
- todo_bloodline = parents | children
- # if args.only_blood_relatives:
- # # Downloading non bloodline parents
- # tree.add_parents(todo_others)
-
- # # TODO what is a non bloodline person becomes bloodline on another branch?
- # todo_others = spouses
- # else:
- # todo_bloodline |= spouses
-
- # download ordinances, notes and contributors
- async def download_stuff(loop):
- futures = set()
- for fid, indi in tree.indi.items():
- futures.add(loop.run_in_executor(None, indi.get_notes))
- if args.get_ordinances:
- futures.add(loop.run_in_executor(None, tree.add_ordinances, fid))
- if args.get_contributors:
- futures.add(loop.run_in_executor(None, indi.get_contributors))
- for fam in tree.fam.values():
- futures.add(loop.run_in_executor(None, fam.get_notes))
- if args.get_contributors:
- futures.add(loop.run_in_executor(None, fam.get_contributors))
- for future in futures:
- await future
-
- loop = asyncio.get_event_loop()
- print(
- _("Downloading notes")
- + (
- (("," if args.get_contributors else _(" and")) + _(" ordinances"))
- if args.get_ordinances
- else ""
- )
- + (_(" and contributors") if args.get_contributors else "")
- + "...",
- file=sys.stderr,
- )
- loop.run_until_complete(download_stuff(loop))
-
- finally:
- # compute number for family relationships and print GEDCOM file
- tree.reset_num()
- if args.xml:
- with open(args.outfile, "wb") as f:
- tree.printxml(f)
- else:
- with open(args.outfile, "w", encoding="UTF-8") as f:
- tree.print(f)
- print(
- _(
- "Downloaded %s individuals, %s families, %s sources and %s notes "
- "in %s seconds with %s HTTP requests."
- )
- % (
- str(len(tree.indi)),
- str(len(tree.fam)),
- str(len(tree.sources)),
- str(len(tree.notes)),
- str(round(time.time() - time_count)),
- str(fs.counter),
- ),
- file=sys.stderr,
- )
-
-
-if __name__ == "__main__":
- main()
--- /dev/null
+#!/usr/bin/env python3
+# coding: utf-8
+import os
+import sys
+from datetime import datetime
+from typing import Any, List, Optional
+
+try:
+ from typing import Annotated
+except ImportError:
+ from typing_extensions import Annotated
+
+import typer
+
+from getmyancestors.classes.gedcom import Gedcom
+from getmyancestors.classes.tree import Fam, Indi, Tree
+
+# Hack to play nice in script mode
+sys.path.append(os.path.dirname(sys.argv[0]))
+
+app = typer.Typer(
+ help="Merge GEDCOM data from FamilySearch Tree (4 Jul 2016)",
+ add_completion=True,
+ no_args_is_help=False, # script might be piped stdin
+ context_settings={"help_option_names": ["-h", "--help"]},
+)
+
+
+def _warn(msg: str):
+ """Write a warning message to stderr with optional color (if TTY)."""
+ use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "")
+ if use_color:
+ sys.stderr.write(f"\033[33m{msg}\033[0m\n")
+ else:
+ sys.stderr.write(f"{msg}\n")
+
+
+@app.command()
+def main(
+ files: Annotated[
+ Optional[List[str]],
+ typer.Option("-i", metavar="<FILE>", help="input GEDCOM files [stdin]"),
+ ] = None,
+ outfile: Annotated[
+ Optional[str],
+ typer.Option("-o", metavar="<FILE>", help="output GEDCOM files [stdout]"),
+ ] = None,
+ creation_date: Annotated[
+ Optional[str],
+ typer.Option(
+ "--creation-date",
+ metavar="<ISO8601>",
+ help="Override creation date in GEDCOM header (YYYY-MM-DDTHH:MM:SS)",
+ ),
+ ] = None,
+ extra_files: List[str] = typer.Argument(None, hidden=True),
+):
+ """
+ Merge GEDCOM data from FamilySearch Tree
+ """
+ if extra_files:
+ if files is None:
+ files = []
+ files.extend(extra_files)
+
+ # Force generic usage usage help logic if needed, but Typer handles it.
+
+ creation_dt = None
+ if creation_date:
+ try:
+ creation_dt = datetime.fromisoformat(creation_date)
+ except ValueError:
+ print(
+ f"Invalid creation date format: {creation_date}. Expected ISO 8601 (YYYY-MM-DDTHH:MM:SS)",
+ file=sys.stderr,
+ )
+ raise typer.Exit(code=1) from None
+
+ tree = Tree(creation_date=creation_dt)
+
+ # Track used IDs to prevent collisions when merging multiple files
+ used_indi_nums = set()
+ used_fam_nums = set()
+
+ # Determine input sources
+ input_handles: List[Any] = []
+ if files:
+ for fpath in files:
+ try:
+ # Open in read mode with utf-8 encoding
+ # pylint: disable=consider-using-with
+ f = open(fpath, "r", encoding="UTF-8")
+ input_handles.append(f)
+ except OSError as e:
+ print(f"Error opening file {fpath}: {e}", file=sys.stderr)
+ raise typer.Exit(code=2) from None
+ else:
+ # Default to stdin
+ input_handles.append(sys.stdin)
+
+ try:
+ # read the GEDCOM data
+ for file in input_handles:
+ # Determine filename for logging
+ filename = getattr(file, "name", "stdin")
+ # If it's a relative path, might want basename to keep it short
+ if filename != "stdin":
+ filename = os.path.basename(filename)
+
+ ged = Gedcom(file, tree)
+
+ # Deduplicate names by string representation
+ def merge_names(target_set, source_set):
+ target_set.update(source_set)
+
+ # Helper for whitespace normalization in quotes
+ def norm_space(s):
+ return " ".join(s.split()) if s else ""
+
+ # add information about individuals
+ new_indi = 0
+ merged_indi = 0
+ for fid, indi in sorted(ged.indi.items()):
+ if fid not in tree.indi:
+ new_indi += 1
+
+ # Try to reuse the original GEDCOM ID (indi.num)
+ # If it collides with an existing ID in the merged tree, generate a new one
+ candidate_num = indi.num
+ original_candidate = candidate_num
+ suffix_counter = 1
+ while candidate_num in used_indi_nums:
+ # Collision detected! Append suffix
+ candidate_num = f"{original_candidate}_{suffix_counter}"
+ suffix_counter += 1
+
+ used_indi_nums.add(candidate_num)
+ tree.indi[fid] = Indi(indi.fid, tree, num=candidate_num)
+
+ # Track origin file
+ tree.indi[fid].origin_file = filename
+ else:
+ merged_indi += 1
+
+ # UNION data from both sources (superset)
+ tree.indi[fid].fams_fid |= indi.fams_fid
+ tree.indi[fid].famc_fid |= indi.famc_fid
+
+ merge_names(tree.indi[fid].birthnames, indi.birthnames)
+ merge_names(tree.indi[fid].nicknames, indi.nicknames)
+ merge_names(tree.indi[fid].aka, indi.aka)
+ merge_names(tree.indi[fid].married, indi.married)
+
+ # Deduplicate facts by type/date/value/place
+ existing_facts = {
+ (f.type, f.date, f.value, f.place.name if f.place else None)
+ for f in tree.indi[fid].facts
+ }
+ # Sort facts to ensure deterministic winner on collision
+ for f in sorted(
+ indi.facts,
+ key=lambda fa: (
+ fa.type or "",
+ fa.date or "",
+ fa.value or "",
+ fa.place.name if fa.place else "",
+ fa.note.text if fa.note else "",
+ ),
+ ):
+ fact_key = (
+ f.type,
+ f.date,
+ f.value,
+ f.place.name if f.place else None,
+ )
+ if fact_key not in existing_facts:
+ tree.indi[fid].facts.add(f)
+ existing_facts.add(fact_key)
+
+ # Manually merge notes to avoid duplication by text content
+ # Sort notes for consistent order (though order in SET doesn't matter, processing order might)
+ for n in sorted(indi.notes, key=lambda note: note.text or ""):
+ is_dup = any(x.text == n.text for x in tree.indi[fid].notes)
+ if not is_dup:
+ tree.indi[fid].notes.add(n)
+
+ # Deduplicate sources by (source.fid, normalized_quote)
+ existing_sources = {
+ (s.fid, norm_space(q)) for s, q in tree.indi[fid].sources
+ }
+ # Sort sources
+ for s, q in sorted(
+ indi.sources,
+ key=lambda src: (
+ src[0].title or "",
+ src[0].fid or "",
+ src[1] or "",
+ ),
+ ):
+ source_key = (s.fid, norm_space(q))
+ if source_key not in existing_sources:
+ tree.indi[fid].sources.add((s, q))
+ existing_sources.add(source_key)
+
+ # Deduplicate memories by URL (primary) or Description (fallback)
+ def get_mem_key(mem):
+ return mem.url if mem.url else (None, mem.description)
+
+ existing_memories = {get_mem_key(m) for m in tree.indi[fid].memories}
+ # Sort memories
+ for m in sorted(
+ indi.memories,
+ key=lambda mem: (mem.url or "", mem.description or ""),
+ ):
+ key = get_mem_key(m)
+ if key not in existing_memories:
+ tree.indi[fid].memories.add(m)
+ existing_memories.add(key)
+
+ # Update ordinance fields if they are missing in the target
+ if not tree.indi[fid].baptism:
+ tree.indi[fid].baptism = indi.baptism
+ if not tree.indi[fid].confirmation:
+ tree.indi[fid].confirmation = indi.confirmation
+ if not tree.indi[fid].initiatory:
+ tree.indi[fid].initiatory = indi.initiatory
+ if not tree.indi[fid].endowment:
+ tree.indi[fid].endowment = indi.endowment
+ if not tree.indi[fid].sealing_child:
+ tree.indi[fid].sealing_child = indi.sealing_child
+
+ # Only update simple fields if they are missing (first file wins for stability)
+ if not tree.indi[fid].name:
+ tree.indi[fid].name = indi.name
+ if not tree.indi[fid].gender:
+ tree.indi[fid].gender = indi.gender
+
+ # add information about families
+ # Key by fam.fid to preserve unique family records
+ # (keying by (husb, wife) incorrectly merges different families with same parents)
+ new_fam = 0
+ merged_fam = 0
+ for fid, fam in sorted(ged.fam.items()):
+ if fid not in tree.fam:
+ new_fam += 1
+
+ # Try to reuse the original GEDCOM ID (fam.num)
+ candidate_num = fam.num
+ original_candidate = candidate_num
+ suffix_counter = 1
+ while candidate_num in used_fam_nums:
+ candidate_num = f"{original_candidate}_{suffix_counter}"
+ suffix_counter += 1
+
+ used_fam_nums.add(candidate_num)
+
+ tree.fam[fid] = Fam(
+ tree.indi.get(fam.husb_fid),
+ tree.indi.get(fam.wife_fid),
+ tree,
+ candidate_num,
+ )
+ tree.fam[fid].tree = tree
+ # Track origin file
+ tree.fam[fid].origin_file = filename
+
+ # Copy husb_fid/wife_fid for proper linking later
+ tree.fam[fid].husb_fid = fam.husb_fid
+ tree.fam[fid].wife_fid = fam.wife_fid
+ else:
+ merged_fam += 1
+
+ # UNION data
+ # Deduplicate facts
+ existing_facts = {
+ (f.type, f.date, f.value, f.place.name if f.place else None)
+ for f in tree.fam[fid].facts
+ }
+ for f in sorted(
+ fam.facts,
+ key=lambda fa: (
+ fa.type or "",
+ fa.date or "",
+ fa.value or "",
+ fa.place.name if fa.place else "",
+ fa.note.text if fa.note else "",
+ ),
+ ):
+ fact_key = (
+ f.type,
+ f.date,
+ f.value,
+ f.place.name if f.place else None,
+ )
+ if fact_key not in existing_facts:
+ tree.fam[fid].facts.add(f)
+ existing_facts.add(fact_key)
+
+ # Manually merge notes
+ for n in sorted(fam.notes, key=lambda note: note.text or ""):
+ if not any(x.text == n.text for x in tree.fam[fid].notes):
+ tree.fam[fid].notes.add(n)
+
+ # Deduplicate sources
+
+ existing_sources = {
+ (s.fid, norm_space(q)) for s, q in tree.fam[fid].sources
+ }
+ for s, q in sorted(
+ fam.sources,
+ key=lambda src: (
+ src[0].title or "",
+ src[0].fid or "",
+ src[1] or "",
+ ),
+ ):
+ source_key = (s.fid, norm_space(q))
+ if source_key not in existing_sources:
+ tree.fam[fid].sources.add((s, q))
+ existing_sources.add(source_key)
+
+ if not tree.fam[fid].sealing_spouse:
+ tree.fam[fid].sealing_spouse = fam.sealing_spouse
+
+ if not tree.fam[fid].fid:
+ tree.fam[fid].fid = fam.fid
+
+ # Always merge children - set union prevents duplicates
+ tree.fam[fid].chil_fid |= fam.chil_fid
+
+ # Notes already have stable IDs from content hashing in classes/tree/records.py
+ # No renumbering needed.
+
+ # Link families to individuals and vice versa
+ # This creates the actual object references needed for GEDCOM output
+ for _fam_fid, fam in tree.fam.items():
+ # Link husband to this family
+ if fam.husb_fid and fam.husb_fid in tree.indi:
+ fam.husband = tree.indi[fam.husb_fid]
+ tree.indi[fam.husb_fid].fams.add(fam)
+ # Link wife to this family
+ if fam.wife_fid and fam.wife_fid in tree.indi:
+ fam.wife = tree.indi[fam.wife_fid]
+ tree.indi[fam.wife_fid].fams.add(fam)
+ # Link children to this family
+ for chil_fid in fam.chil_fid:
+ if chil_fid in tree.indi:
+ fam.children.add(tree.indi[chil_fid])
+ tree.indi[chil_fid].famc.add(fam)
+
+ # compute number for family relationships and print GEDCOM file
+ tree.reset_num()
+
+ if outfile:
+ try:
+ with open(outfile, "w", encoding="UTF-8") as out:
+ tree.print(out)
+ except OSError as e:
+ print(f"Error opening output file {outfile}: {e}", file=sys.stderr)
+ raise typer.Exit(code=2) from None
+ else:
+ tree.print(sys.stdout)
+
+ finally:
+ # Close handles that are not stdin
+ for f in input_handles:
+ if f is not sys.stdin:
+ f.close()
+
+
+if __name__ == "__main__":
+ app()
+++ /dev/null
-# coding: utf-8
-
-from __future__ import print_function
-
-# global imports
-import os
-import sys
-import argparse
-
-# local imports
-from getmyancestors.classes.tree import Indi, Fam, Tree
-from getmyancestors.classes.gedcom import Gedcom
-
-sys.path.append(os.path.dirname(sys.argv[0]))
-
-
-def main():
- parser = argparse.ArgumentParser(
- description="Merge GEDCOM data from FamilySearch Tree (4 Jul 2016)",
- add_help=False,
- usage="mergemyancestors -i input1.ged input2.ged ... [options]",
- )
- try:
- parser.add_argument(
- "-i",
- metavar="<FILE>",
- nargs="+",
- type=argparse.FileType("r", encoding="UTF-8"),
- default=[sys.stdin],
- help="input GEDCOM files [stdin]",
- )
- parser.add_argument(
- "-o",
- metavar="<FILE>",
- nargs="?",
- type=argparse.FileType("w", encoding="UTF-8"),
- default=sys.stdout,
- help="output GEDCOM files [stdout]",
- )
- except TypeError:
- sys.stderr.write("Python >= 3.4 is required to run this script\n")
- sys.stderr.write("(see https://docs.python.org/3/whatsnew/3.4.html#argparse)\n")
- exit(2)
-
- # extract arguments from the command line
- try:
- parser.error = parser.exit
- args = parser.parse_args()
- except SystemExit as e:
- print(e.code)
- parser.print_help()
- exit(2)
-
- tree = Tree()
-
- indi_counter = 0
- fam_counter = 0
-
- # read the GEDCOM data
- for file in args.i:
- ged = Gedcom(file, tree)
-
- # add information about individuals
- for num in ged.indi:
- fid = ged.indi[num].fid
- if fid not in tree.indi:
- indi_counter += 1
- tree.indi[fid] = Indi(ged.indi[num].fid, tree, num=indi_counter)
- tree.indi[fid].fams_fid |= ged.indi[num].fams_fid
- tree.indi[fid].famc_fid |= ged.indi[num].famc_fid
- tree.indi[fid].name = ged.indi[num].name
- tree.indi[fid].birthnames = ged.indi[num].birthnames
- tree.indi[fid].nicknames = ged.indi[num].nicknames
- tree.indi[fid].aka = ged.indi[num].aka
- tree.indi[fid].married = ged.indi[num].married
- tree.indi[fid].gender = ged.indi[num].gender
- tree.indi[fid].facts = ged.indi[num].facts
- tree.indi[fid].notes = ged.indi[num].notes
- tree.indi[fid].sources = ged.indi[num].sources
- tree.indi[fid].memories = ged.indi[num].memories
- tree.indi[fid].baptism = ged.indi[num].baptism
- tree.indi[fid].confirmation = ged.indi[num].confirmation
- tree.indi[fid].initiatory = ged.indi[num].initiatory
- tree.indi[fid].endowment = ged.indi[num].endowment
- if not (tree.indi[fid].sealing_child and tree.indi[fid].sealing_child.famc):
- tree.indi[fid].sealing_child = ged.indi[num].sealing_child
-
- # add information about families
- for num in ged.fam:
- husb, wife = (ged.fam[num].husb_fid, ged.fam[num].wife_fid)
- if (husb, wife) not in tree.fam:
- fam_counter += 1
- tree.fam[(husb, wife)] = Fam(husb, wife, tree, fam_counter)
- tree.fam[(husb, wife)].tree = tree
- tree.fam[(husb, wife)].chil_fid |= ged.fam[num].chil_fid
- if ged.fam[num].fid:
- tree.fam[(husb, wife)].fid = ged.fam[num].fid
- if ged.fam[num].facts:
- tree.fam[(husb, wife)].facts = ged.fam[num].facts
- if ged.fam[num].notes:
- tree.fam[(husb, wife)].notes = ged.fam[num].notes
- if ged.fam[num].sources:
- tree.fam[(husb, wife)].sources = ged.fam[num].sources
- tree.fam[(husb, wife)].sealing_spouse = ged.fam[num].sealing_spouse
-
- # merge notes by text
- tree.notes = sorted(tree.notes, key=lambda x: x.text)
- for i, n in enumerate(tree.notes):
- if i == 0:
- n.num = 1
- continue
- if n.text == tree.notes[i - 1].text:
- n.num = tree.notes[i - 1].num
- else:
- n.num = tree.notes[i - 1].num + 1
-
- # compute number for family relationships and print GEDCOM file
- tree.reset_num()
- tree.print(args.o)
-
-
-if __name__ == "__main__":
- main()
--- /dev/null
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from getmyancestors.classes.session import Session
+
+
+@pytest.fixture
+def mock_session():
+ """
+ Creates a Session object where the network layer is mocked out.
+ """
+ with patch("getmyancestors.classes.session.Session.login"):
+ session = Session("test_user", "test_pass", verbose=False)
+
+ # Mock cookies
+ session.cookies.update({"fssessionid": "mock_session_id", "XSRF-TOKEN": "mock_token"}) # type: ignore
+
+ # Mock session attributes required by Tree
+ session.lang = "en" # type: ignore
+ session.fid = "KW7V-Y32" # type: ignore
+
+ # Mock the network methods
+ session.get = MagicMock() # type: ignore
+ session.post = MagicMock() # type: ignore
+ session.get_url = MagicMock() # type: ignore
+
+ # Mock the translation method
+ session._ = lambda s: s # type: ignore
+
+ yield session
+
+
+@pytest.fixture
+def sample_person_json():
+ return {
+ "persons": [
+ {
+ "id": "KW7V-Y32",
+ "living": False,
+ "display": {
+ "name": "John Doe",
+ "gender": "Male",
+ "lifespan": "1900-1980",
+ },
+ "facts": [
+ {
+ "type": "http://gedcomx.org/Birth",
+ "date": {"original": "1 Jan 1900"},
+ "place": {"original": "New York"},
+ "attribution": {"changeMessage": "Initial import"},
+ }
+ ],
+ "names": [
+ {
+ "nameForms": [{"fullText": "John Doe"}],
+ "preferred": True,
+ "type": "http://gedcomx.org/BirthName",
+ "attribution": {"changeMessage": "Initial import"},
+ }
+ ],
+ "attribution": {"changeMessage": "Initial import"},
+ }
+ ]
+ }
+
+
+@pytest.fixture
+def mock_user_data():
+ return {
+ "users": [
+ {
+ "personId": "KW7V-Y32",
+ "preferredLanguage": "en",
+ "displayName": "Test User",
+ }
+ ]
+ }
+
+
+@pytest.fixture(autouse=True)
+def suppress_license_prompt():
+ """Automatically suppress license prompt for all tests"""
+ with patch.dict(
+ os.environ, {"GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT": "1"}
+ ):
+ yield
--- /dev/null
+import os
+import shutil
+import unittest
+from concurrent.futures import ThreadPoolExecutor
+
+import requests
+from requests_cache import CachedSession
+
+
+class TestCacheConcurrency(unittest.TestCase):
+ def setUp(self):
+ self.cache_name = ".tmp/test_concurrency_cache"
+ self.backend = "filesystem"
+ # Ensure clean state
+ if os.path.exists(self.cache_name):
+ shutil.rmtree(self.cache_name, ignore_errors=True)
+ os.makedirs(".tmp", exist_ok=True)
+
+ def tearDown(self):
+ if os.path.exists(self.cache_name):
+ shutil.rmtree(self.cache_name, ignore_errors=True)
+
+ def test_concurrent_writes(self):
+ """
+ Verify thread safety with concurrent writes using 'filesystem' backend.
+ This backend handles concurrency via file locking and is much more robust than SQLite for this use case.
+ """
+
+ # Filesystem backend doesn't need check_same_thread
+ with CachedSession(
+ self.cache_name, backend=self.backend, expire_after=3600
+ ) as session:
+
+ exceptions = []
+
+ def stress_cache(i):
+ try:
+ # Simulate "Check Cache" -> "Write Cache" race
+ key = f"key_{i}"
+ if not session.cache.contains(key):
+ # Create a REAL response object to avoid mock serialization errors
+ response = requests.Response()
+ response.status_code = 200
+ # pylint: disable=protected-access
+ response._content = b"test"
+ response.url = "http://test.com"
+
+ # Attach dummy request for serialization
+ req = requests.Request(
+ method="GET", url="http://test.com"
+ ).prepare()
+ response.request = req
+
+ # Mock raw response for requests-cache compatibility
+ class MockRaw:
+ _request_url = "http://test.com"
+
+ def read(
+ self, *args, **kwargs
+ ): # pylint: disable=unused-argument
+ return b""
+
+ def close(self):
+ pass
+
+ def stream(
+ self, *args, **kwargs
+ ): # pylint: disable=unused-argument
+ return []
+
+ response.raw = MockRaw()
+
+ # Write to cache
+ session.cache.save_response(response, key)
+ except Exception as e:
+ exceptions.append(e)
+
+ # Run concurrent threads with 10 threads
+ with ThreadPoolExecutor(max_workers=10) as executor:
+ for i in range(100):
+ executor.submit(stress_cache, i)
+
+ # Filter out known transient errors from requests-cache filesystem backend
+ # These can occur under heavy concurrent writes but don't indicate real bugs
+ # Note: requests-cache uses SQLite internally even with filesystem backend for metadata
+ transient_errors = ["bad parameter", "database is locked"]
+ real_exceptions = [
+ e
+ for e in exceptions
+ if not any(msg in str(e).lower() for msg in transient_errors)
+ ]
+
+ # Count transient errors - fail if too many (potential real issue)
+ transient_count = len(exceptions) - len(real_exceptions)
+ transient_threshold = 10 # More than 10% of 100 requests = potential issue
+
+ if real_exceptions:
+ print(f"Encountered {len(real_exceptions)} real exceptions:")
+ unique_errors = set(str(e) for e in real_exceptions)
+ for e in unique_errors:
+ print(f"- {e}")
+ self.fail(f"Concurrency test failed with {len(real_exceptions)} exceptions")
+ elif transient_count > transient_threshold:
+ # Too many transient errors may indicate a real problem
+ self.fail(
+ f"Too many transient errors ({transient_count} > {transient_threshold}), "
+ "may indicate cache corruption"
+ )
+ elif transient_count > 0:
+ # Log but don't fail for small number of transient errors
+ print(
+ f"Note: {transient_count} transient cache errors (expected under heavy threading)"
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()
--- /dev/null
+import traceback
+from unittest.mock import patch
+
+from typer.testing import CliRunner
+
+from getmyancestors.getmyanc import app
+
+runner = CliRunner()
+
+
+class TestCLI:
+
+ @patch("getmyancestors.getmyanc.Session")
+ @patch("getmyancestors.getmyanc.CachedSession")
+ @patch("getmyancestors.getmyanc.Tree")
+ def test_basic_args(self, mock_tree, mock_cached_session, _mock_session):
+ """Test that arguments are parsed and passed to classes correctly"""
+
+ # Typer/Click arguments (no need for program name "getmyancestors" in list)
+ test_args = [
+ "-u",
+ "myuser",
+ "-p",
+ "mypass",
+ "-i",
+ "KW7V-Y32",
+ "--verbose",
+ ]
+
+ # Setup the session to appear logged in
+ mock_cached_session.return_value.logged = True
+
+ result = runner.invoke(app, test_args)
+ if result.exc_info:
+ traceback.print_exception(*result.exc_info)
+
+ # Verify exit code
+ assert result.exit_code == 0
+
+ # Verify Session was initialized with CLI args
+ mock_cached_session.assert_called_once()
+ _args, kwargs = mock_cached_session.call_args
+ assert kwargs["username"] == "myuser"
+ assert kwargs["password"] == "mypass"
+ assert kwargs["verbose"] is True
+ assert kwargs["cache_control"] is True
+
+ # Verify Tree started
+ # Typer parses "-i KW..." into a list
+ mock_tree.return_value.add_indis.assert_called_with(["KW7V-Y32"])
+
+ def test_arg_validation(self):
+ """Test that invalid ID formats cause an exit"""
+ test_args = ["-u", "u", "-p", "p", "-i", "BAD_ID"]
+
+ result = runner.invoke(app, test_args)
+ print("STDOUT:", result.stdout)
+
+ # Should exit with code 1 due to validation error
+ assert result.exit_code == 1
+ # Click/Typer might print to stdout or stderr depending on context/runner
+ output = result.stdout + (result.stderr if result.stderr else "")
+ assert "Invalid FamilySearch ID: BAD_ID" in output
--- /dev/null
+import unittest
+from unittest.mock import MagicMock, patch
+
+from getmyancestors.classes.constants import FACT_TAGS
+from getmyancestors.classes.tree.core import Fam, Indi, Tree
+
+
+class TestForkFeatures(unittest.TestCase):
+ def setUp(self):
+ self.mock_session = MagicMock()
+ self.mock_session.lang = "en" # Needed for Tree init
+ self.tree = Tree(self.mock_session)
+ assert self.tree.fs is not None
+ self.tree.fs._ = lambda x: x # type: ignore # Mock translation
+
+ def _setup_mock_api(self, changelog=None, agent_data=None):
+ """Helper to mock API responses"""
+
+ def side_effect(url, _headers=None):
+ if "changes" in url:
+ return changelog
+ if "agents" in url:
+ # Naive matching for test simplicity
+ return agent_data
+ return None
+
+ assert self.tree.fs is not None
+ self.tree.fs.get_url = MagicMock(side_effect=side_effect) # type: ignore
+
+ def test_immigration_tag(self):
+ """Verify Immigration tag mapping exists"""
+ self.assertIn("http://gedcomx.org/Immigration", FACT_TAGS)
+ self.assertEqual(FACT_TAGS["http://gedcomx.org/Immigration"], "IMMI")
+
+ def test_exclude_logic_parents(self):
+ """Verify add_parents respects exclude list"""
+ # Setup: Main person I1, Parent I2
+ i1 = Indi("I1", self.tree)
+ self.tree.indi["I1"] = i1
+
+ # Manually populate parents list for I1
+ i1.parents = {("I2", "I3")} # Father, Mother
+
+ # Case 1: No exclude
+ self.tree.exclude = []
+ with patch.object(self.tree, "add_indis") as mock_add_indis:
+ self.tree.add_parents({"I1"})
+ # verify add_indis called with {"I2", "I3"}
+ args, _ = mock_add_indis.call_args
+ self.assertEqual(args[0], {"I2", "I3"})
+
+ # Case 2: Exclude I2
+ self.tree.exclude = ["I2"]
+ with patch.object(self.tree, "add_indis") as mock_add_indis:
+ self.tree.add_parents({"I1"})
+ # verify add_indis called with {"I3"} only
+ args, _ = mock_add_indis.call_args
+ self.assertEqual(args[0], {"I3"})
+
+ def test_exclude_logic_children(self):
+ """Verify add_children respects exclude list"""
+ # Setup: Main person I1, Child I4
+ i1 = Indi("I1", self.tree)
+ self.tree.indi["I1"] = i1
+
+ # Manually populate children
+ i1.children = {("I1", "I3", "I4"), ("I1", "I3", "I5")}
+
+ # Case 1: No exclude
+ self.tree.exclude = []
+ with patch.object(self.tree, "add_indis") as mock_add_indis:
+ self.tree.add_children({"I1"})
+ mock_add_indis.assert_called()
+ args, _ = mock_add_indis.call_args
+ self.assertTrue("I4" in args[0])
+ self.assertTrue("I5" in args[0])
+
+ # Case 2: Exclude I5 (filter out filtered_indis)
+ self.tree.exclude = ["I5"]
+ with patch.object(self.tree, "add_indis") as mock_add_indis:
+ self.tree.add_children({"I1"})
+ args, _ = mock_add_indis.call_args
+ self.assertTrue("I4" in args[0])
+ self.assertFalse("I5" in args[0])
+
+ def test_get_contributors(self):
+ """Verify get_contributors fetches and parses agent data"""
+ # Setup Indi
+ i1 = Indi("I1", self.tree)
+ self.tree.indi["I1"] = i1
+
+ # Mock API responses
+ # 1. Changelog
+ changelog = {
+ "entries": [
+ {
+ "contributors": [
+ {
+ "name": "AgentName",
+ "uri": "https://www.familysearch.org/agents/123",
+ }
+ ]
+ }
+ ]
+ }
+ # 2. Agent Data
+ agent_data = {
+ "agents": [
+ {
+ "names": [{"value": "Real Name"}],
+ "emails": [{"resource": "mailto:test@example.com"}],
+ "phones": [{"resource": "tel:555-1234"}],
+ }
+ ]
+ }
+
+ def side_effect(url, _headers=None):
+ if "changes" in url:
+ return changelog
+ if "agents/123" in url:
+ return agent_data
+ return None
+
+ assert self.tree.fs is not None
+ self.tree.fs.get_url = MagicMock(side_effect=side_effect) # type: ignore
+
+ # Action
+ i1.get_contributors()
+
+ # Verify
+ self.assertEqual(len(i1.notes), 1)
+ note = list(i1.notes)[0]
+ self.assertIn("AgentName", note.text)
+ self.assertIn("Real Name", note.text) # Display name
+ self.assertIn("test@example.com", note.text)
+ self.assertIn("555-1234", note.text)
+
+ def test_get_contributors_family(self):
+ """Verify get_contributors works for Families"""
+ fam = Fam(tree=self.tree, num="F1")
+ fam.fid = "F1"
+ self.tree.fam["F1"] = fam
+
+ changelog = {
+ "entries": [
+ {
+ "contributors": [
+ {
+ "name": "FamAgent",
+ "uri": "https://www.familysearch.org/agents/456",
+ }
+ ]
+ }
+ ]
+ }
+ agent_data = {
+ "agents": [{"names": [{"value": "Fam Agent"}], "emails": [], "phones": []}]
+ }
+
+ self._setup_mock_api(changelog, agent_data)
+
+ fam.get_contributors()
+
+ self.assertEqual(len(fam.notes), 1)
+ note = list(fam.notes)[0]
+ self.assertIn("FamAgent", note.text)
+ self.assertIn("Fam Agent", note.text)
+
+ def test_get_contributors_duplicates_and_missing(self):
+ """Verify duplicate contributors are deduped and missing fields handled"""
+ i1 = Indi("I1", self.tree)
+ self.tree.indi["I1"] = i1
+
+ # Two entries, same agent
+ changelog = {
+ "entries": [
+ {
+ "contributors": [
+ {
+ "name": "AgentX",
+ "uri": "https://www.familysearch.org/agents/X",
+ }
+ ]
+ },
+ {
+ "contributors": [
+ {
+ "name": "AgentX",
+ "uri": "https://www.familysearch.org/agents/X",
+ }
+ ]
+ },
+ ]
+ }
+ # Agent has no email/phone
+ agent_data = {
+ "agents": [{"names": [{"value": "Agent X"}], "emails": [], "phones": []}]
+ }
+
+ self._setup_mock_api(changelog, agent_data)
+
+ i1.get_contributors()
+
+ note = list(i1.notes)[0]
+ # Should only list AgentX once
+ self.assertEqual(note.text.count("AgentX"), 1)
+ # Should not crash on missing email/phone
--- /dev/null
+import io
+import unittest
+
+from getmyancestors.classes.gedcom import Gedcom
+from getmyancestors.classes.tree import Fact, Indi, Name, Tree
+
+SAMPLE_GEDCOM = """0 HEAD
+1 CHAR UTF-8
+1 GEDC
+2 VERS 5.5.1
+2 FORM LINEAGE-LINKED
+0 @I1@ INDI
+1 NAME John /Doe/
+2 GIVN John
+2 SURN Doe
+1 SEX M
+1 BIRT
+2 DATE 1 JAN 1980
+2 PLAC Springfield
+1 FAMC @F1@
+1 _FSFTID KW7V-Y32
+0 @I2@ INDI
+1 NAME Jane /Smith/
+1 SEX F
+1 FAMS @F1@
+1 _FSFTID KW7V-Y33
+0 @F1@ FAM
+1 HUSB @I1@
+1 WIFE @I2@
+1 CHIL @I3@
+1 _FSFTID F123-456
+0 @I3@ INDI
+1 NAME Baby /Doe/
+1 SEX M
+1 FAMC @F1@
+1 _FSFTID KW7V-Y34
+0 TRLR
+"""
+
+
+class TestGedcomLogic(unittest.TestCase):
+ def test_parse_gedcom(self):
+ """Test parsing of a GEDCOM string using the Gedcom class."""
+ f = io.StringIO(SAMPLE_GEDCOM)
+ tree = Tree()
+
+ # The Gedcom class takes a file-like object and a tree
+ ged = Gedcom(f, tree)
+
+ # Verify Individuals
+ # The parser seems to use the number from @I{num}@ as the key in ged.indi
+ self.assertIn("1", ged.indi)
+ self.assertIn("2", ged.indi)
+ self.assertIn("3", ged.indi)
+
+ john = ged.indi["1"]
+ self.assertEqual(john.gender, "M")
+ self.assertEqual(john.fid, "KW7V-Y32")
+
+ # Check Name - The parsing logic for names is a bit complex in __get_name
+ # It populates birthnames by default if no type is specified
+ # BUT the first name found is assigned to self.name, NOT birthnames
+ self.assertIsNotNone(john.name)
+ self.assertEqual(john.name.given, "John")
+ self.assertEqual(john.name.surname, "Doe")
+
+ # Verify birthnames if any additional names present (none in this sample)
+ # self.assertTrue(len(john.birthnames) > 0)
+
+ # Verify Family
+ self.assertIn("1", ged.fam)
+ fam = ged.fam["1"]
+ self.assertEqual(fam.husb_num, "1") # Points to I1
+ self.assertEqual(fam.wife_num, "2") # Points to I2
+ self.assertIn("3", fam.chil_num) # Points to I3
+ self.assertEqual(fam.fid, "F123-456")
+
+ def test_tree_export(self):
+ """Test that a Tree object can be exported to GEDCOM format."""
+ tree = Tree()
+ tree.display_name = "Test User"
+ tree.lang = "en"
+
+ # Create Individual
+ indi = Indi("KW7V-Y32", tree, num=1)
+ indi.gender = "M"
+
+ name = Name()
+ name.given = "John"
+ name.surname = "Doe"
+ # name.full = "John Doe" # Removed: Name class has no 'full' attribute
+ indi.birthnames.add(name)
+
+ fact = Fact()
+ fact.type = "http://gedcomx.org/Birth"
+ fact.date = "1 JAN 1980"
+ fact.place = tree.ensure_place("Springfield")
+ indi.facts.add(fact)
+
+ tree.indi["KW7V-Y32"] = indi
+
+ # Validate output
+ output = io.StringIO()
+ tree.print(output)
+ content = output.getvalue()
+
+ self.assertIn("0 HEAD", content)
+ self.assertIn("1 NAME John /Doe/", content)
+ # ID is derived from fid if present
+ self.assertIn("0 @IKW7V-Y32@ INDI", content)
+ self.assertIn("1 SEX M", content)
+ self.assertIn("1 BIRT", content)
+ self.assertIn("2 DATE 1 JAN 1980", content)
+ self.assertIn("0 TRLR", content)
+
+
+if __name__ == "__main__":
+ unittest.main()
--- /dev/null
+import json
+import os
+import traceback
+import unittest
+from unittest.mock import MagicMock, PropertyMock, patch
+
+import requests
+from requests.models import PreparedRequest, Response
+from typer.testing import CliRunner
+
+from getmyancestors import getmyanc as getmyancestors
+
+runner = CliRunner()
+
+
+class TestFullIntegration(unittest.TestCase):
+ @patch.dict(
+ os.environ, {"GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT": "1"}
+ )
+ @patch("getmyancestors.classes.session.LimiterAdapter")
+ # @patch("builtins.print")
+ @patch(
+ "getmyancestors.classes.session.GMASession.login", autospec=True
+ ) # Mock login to prevent network calls
+ @patch(
+ "getmyancestors.classes.session.GMASession.logged", new_callable=PropertyMock
+ )
+ @patch("requests.Session.get")
+ @patch("requests.Session.post")
+ def test_main_execution(
+ self,
+ mock_post,
+ mock_get,
+ mock_logged,
+ mock_login,
+ # mock_print,
+ mock_adapter,
+ ):
+ """
+ Integration test for the main execution flow.
+ Bypasses login logic and mocks network responses with static data.
+ """
+ # Suppress unused argument warnings
+ _ = (mock_adapter,)
+
+ # Setup mocks
+ mock_logged.return_value = True
+
+ # Define a fake login that sets FID directly without network call
+ def fake_login(self):
+ self.fid = "TEST-123"
+ self.lang = "en"
+ # Set cookie/header so the 'logged' property returns True
+ # Set cookie/header so the 'logged' property returns True
+ self.cookies["fssessionid"] = "mock_session_id"
+
+ mock_login.side_effect = fake_login
+
+ # Setup generic response for any GET request
+ # users/current -> sets lang='en'
+ generic_json = {
+ "users": [
+ {
+ "personId": "TEST-123",
+ "preferredLanguage": "en",
+ "displayName": "Integrator",
+ }
+ ],
+ "persons": [
+ {
+ "id": "TEST-123",
+ "living": True,
+ "names": [
+ {
+ "preferred": True,
+ "type": "http://gedcomx.org/BirthName",
+ "nameForms": [
+ {
+ "fullText": "Test Person",
+ "parts": [
+ {
+ "type": "http://gedcomx.org/Given",
+ "value": "Test",
+ },
+ {
+ "type": "http://gedcomx.org/Surname",
+ "value": "Person",
+ },
+ ],
+ }
+ ],
+ "attribution": {"changeMessage": "Automated update"},
+ }
+ ],
+ "notes": [], # Added notes list for get_notes()
+ "facts": [],
+ "display": {
+ "name": "Test Person",
+ "gender": "Male",
+ "lifespan": "1900-2000",
+ },
+ }
+ ],
+ "childAndParentsRelationships": [],
+ "parentAndChildRelationships": [],
+ }
+
+ mock_response = Response()
+ mock_response.status_code = 200
+ mock_response.url = "https://api.familysearch.org/test"
+ mock_response.headers = requests.structures.CaseInsensitiveDict(
+ {"Content-Type": "application/json"}
+ )
+ # pylint: disable=protected-access
+ mock_response._content = json.dumps(generic_json).encode("utf-8")
+ # mock_response.headers is already a CaseInsensitiveDict by default in Response()
+
+ # requests_cache needs response.request to be set
+ mock_req = PreparedRequest()
+ mock_req.url = "https://api.familysearch.org/test"
+ mock_req.method = "GET"
+ mock_req.headers = requests.structures.CaseInsensitiveDict({})
+ # mock_req.cookies = {} # PreparedRequest doesn't have public cookies dict usually, avoiding access
+ mock_response.request = mock_req
+
+ # requests_cache needs response.raw (urllib3 response)
+ # It accesses ._request_url
+ mock_response.raw = MagicMock()
+ # pylint: disable=protected-access
+ mock_response.raw._request_url = "https://api.familysearch.org/test"
+
+ # Configure LimiterAdapter mock to return our response
+ mock_adapter_instance = mock_adapter.return_value
+ mock_adapter_instance.send.return_value = mock_response
+
+ # When Session.get is called, it returns our mock response
+ def side_effect_get(url, *args, **kwargs): # pylint: disable=unused-argument
+ # print(f"DEBUG: Mock GET called for {url}")
+ return mock_response
+
+ mock_get.side_effect = side_effect_get
+ mock_post.return_value = mock_response
+
+ # Output file path in .tmp directory
+ output_file = os.path.abspath(".tmp/test_output.ged")
+ settings_file = os.path.abspath(".tmp/test_output.settings")
+
+ # Create the .tmp directory if it doesn't exist
+ tmp_dir = os.path.dirname(output_file)
+ os.makedirs(tmp_dir, exist_ok=True)
+
+ # Prepare arguments mimicking CLI usage (Typer args, no program name)
+ test_args = [
+ "-u",
+ "testuser",
+ "-p",
+ "testpass",
+ "--no-cache",
+ "--outfile",
+ output_file,
+ ]
+
+ # Invoke via CliRunner
+ # Note: we invoke getmyancestors.app
+ result = runner.invoke(getmyancestors.app, test_args)
+
+ if result.exit_code != 0:
+ print(f"STDOUT: {result.stdout}")
+ if result.exc_info:
+ traceback.print_exception(*result.exc_info)
+ self.fail(f"App exited with code {result.exit_code}")
+
+ # Basic assertions
+ self.assertTrue(mock_login.called, "Login should have been called")
+ self.assertTrue(mock_get.called, "Should have attempted network calls")
+
+ self.assertTrue(
+ os.path.exists(output_file),
+ f"Output file should have been created at {output_file}",
+ )
+
+ if os.path.exists(output_file):
+ self.addCleanup(os.remove, output_file)
+ if os.path.exists(settings_file):
+ self.addCleanup(os.remove, settings_file)
+
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)
--- /dev/null
+"""Test __main__ functionality."""
+
+import sys
+import unittest
+from unittest.mock import patch
+
+
+class TestMain(unittest.TestCase):
+ """Test __main__ module."""
+
+ def test_main_module_can_be_imported(self):
+ """Test that __main__ module can be imported without error."""
+ # Mock getmyanc.app to avoid execution when importing __main__
+ with patch("getmyancestors.getmyanc.app"):
+ # Mock sys.argv to avoid argument parsing errors
+ with patch.object(sys, "argv", ["getmyancestors", "--help"]):
+ # Import should work without error
+ import getmyancestors.__main__ # pylint: disable=import-outside-toplevel
+
+ self.assertTrue(hasattr(getmyancestors.__main__, "__name__"))
+
+ def test_main_execution_with_mock(self):
+ """Test that importing __main__ triggers getmyanc.main() call."""
+ # pylint: disable=import-outside-toplevel
+ import runpy
+
+ # Create a mock for getmyanc.app
+ with patch("getmyancestors.getmyanc.app") as mock_app:
+ # Mock sys.argv
+ with patch.object(sys, "argv", ["getmyancestors", "--help"]):
+ # pylint: disable=import-outside-toplevel,no-name-in-module
+ runpy.run_module("getmyancestors.__main__", run_name="__main__")
+
+ self.assertTrue(mock_app.called)
+
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)
--- /dev/null
+"""Test merge idempotency - ensure re-merging produces no duplicates."""
+
+import io
+import os
+import shutil
+import tempfile
+import unittest
+
+from getmyancestors.classes.gedcom import Gedcom
+from getmyancestors.classes.tree import Fam, Indi, Tree
+
+
+class TestMergeIdempotency(unittest.TestCase):
+ """Test that merging is idempotent - merging A+B then (A+B)+A should equal A+B."""
+
+ def setUp(self):
+ """Create sample GEDCOM content for testing."""
+ # Simple GEDCOM with one individual (simulating FamilySearch output)
+ self.gedcom_a = """0 HEAD
+1 SOUR getmyancestors
+1 GEDC
+2 VERS 5.5
+1 CHAR UTF-8
+0 @I1@ INDI
+1 NAME John /Doe/
+1 SEX M
+1 _FSFTID AAAA-111
+1 BIRT
+2 DATE 1 JAN 1900
+2 PLAC New York, USA
+1 NOTE This is a test note
+0 @F1@ FAM
+1 HUSB @I1@
+1 _FSFTID FFFF-111
+0 TRLR
+"""
+
+ # Different GEDCOM with different individual
+ self.gedcom_b = """0 HEAD
+1 SOUR getmyancestors
+1 GEDC
+2 VERS 5.5
+1 CHAR UTF-8
+0 @I2@ INDI
+1 NAME Jane /Smith/
+1 SEX F
+1 _FSFTID BBBB-222
+1 BIRT
+2 DATE 15 MAR 1905
+2 PLAC Boston, USA
+0 @F2@ FAM
+1 WIFE @I2@
+1 _FSFTID FFFF-222
+0 TRLR
+"""
+
+ self.temp_dir = tempfile.mkdtemp()
+ self.addCleanup(shutil.rmtree, self.temp_dir)
+
+ self.file_a = os.path.join(self.temp_dir, "a.ged")
+ self.file_b = os.path.join(self.temp_dir, "b.ged")
+ with open(self.file_a, "w", encoding="utf-8") as f:
+ f.write(self.gedcom_a)
+ with open(self.file_b, "w", encoding="utf-8") as f:
+ f.write(self.gedcom_b)
+
+ def _count_data_lines(self, tree: Tree) -> int:
+ """Count output lines."""
+ output = io.StringIO()
+ tree.print(output)
+ lines = output.getvalue().strip().split("\n")
+ return len(lines)
+
+ def _merge_files(self, *files) -> Tree:
+ """Merge multiple GEDCOM files into a single tree."""
+ contents = []
+ for fpath in files:
+ with open(fpath, "r", encoding="utf-8") as f:
+ contents.append(f.read())
+ return self._merge_gedcoms(*contents)
+
+ def _merge_gedcoms(self, *gedcom_strings) -> Tree:
+ """Merge multiple GEDCOM strings into a single tree."""
+ tree = Tree()
+ indi_counter = 0
+ fam_counter = 0
+
+ for gedcom_str in gedcom_strings:
+ file = io.StringIO(gedcom_str)
+ ged = Gedcom(file, tree)
+
+ # Replicate merge logic from mergemyancestors.py
+ for _, indi in ged.indi.items():
+ fid = indi.fid
+ if fid not in tree.indi:
+ indi_counter += 1
+ tree.indi[fid] = Indi(indi.fid, tree, num=indi_counter)
+ tree.indi[fid].fams_fid |= indi.fams_fid
+ tree.indi[fid].famc_fid |= indi.famc_fid
+ tree.indi[fid].name = indi.name
+ tree.indi[fid].birthnames |= indi.birthnames
+ tree.indi[fid].nicknames |= indi.nicknames
+ tree.indi[fid].aka |= indi.aka
+ tree.indi[fid].married |= indi.married
+ tree.indi[fid].gender = indi.gender
+ tree.indi[fid].facts |= indi.facts
+ # Manually merge notes to avoid duplication by text content
+ for n in indi.notes:
+ if not any(x.text == n.text for x in tree.indi[fid].notes):
+ tree.indi[fid].notes.add(n)
+ tree.indi[fid].sources |= indi.sources
+ tree.indi[fid].memories |= indi.memories
+ tree.indi[fid].baptism = indi.baptism
+ tree.indi[fid].confirmation = indi.confirmation
+ tree.indi[fid].initiatory = indi.initiatory
+ tree.indi[fid].endowment = indi.endowment
+ sc = tree.indi[fid].sealing_child
+ if not (sc and sc.famc):
+ tree.indi[fid].sealing_child = indi.sealing_child
+
+ for _, fam in ged.fam.items():
+ husb, wife = (fam.husb_fid, fam.wife_fid)
+ # Use standard ID generation to satisfy Dict[str, Fam] type
+ fam_key = Fam.gen_id(tree.indi.get(husb), tree.indi.get(wife))
+
+ if fam_key not in tree.fam:
+ fam_counter += 1
+ tree.fam[fam_key] = Fam(
+ tree.indi.get(husb), tree.indi.get(wife), tree, fam_counter
+ )
+ tree.fam[fam_key].tree = tree
+ tree.fam[fam_key].chil_fid |= fam.chil_fid
+ if fam.fid:
+ tree.fam[fam_key].fid = fam.fid
+ tree.fam[fam_key].facts |= fam.facts
+ # Manually merge notes
+ for n in fam.notes:
+ if not any(x.text == n.text for x in tree.fam[fam_key].notes):
+ tree.fam[fam_key].notes.add(n)
+ tree.fam[fam_key].sources |= fam.sources
+ tree.fam[fam_key].sealing_spouse = fam.sealing_spouse
+
+ # Merge notes by text
+ tree.notes = sorted(tree.notes, key=lambda x: x.text) # type: ignore
+ for i, n in enumerate(tree.notes):
+ if i == 0:
+ n.num = 1
+ continue
+ if n.text == tree.notes[i - 1].text: # type: ignore
+ n.num = tree.notes[i - 1].num # type: ignore
+ else:
+ n.num = tree.notes[i - 1].num + 1 # type: ignore
+
+ tree.reset_num()
+ return tree
+
+ def _tree_to_gedcom_string(self, tree: Tree) -> str:
+ """Convert tree back to GEDCOM string."""
+ output = io.StringIO()
+ tree.print(output)
+ return output.getvalue()
+
+ def test_merge_is_idempotent(self):
+ """
+ Test that merging A+B then re-merging with A produces no duplicates.
+
+ If merge is idempotent:
+ lines(A+B) == lines((A+B)+A)
+ """
+ # First merge: A + B
+ merged_tree = self._merge_gedcoms(self.gedcom_a, self.gedcom_b)
+ merged_lines = self._count_data_lines(merged_tree)
+
+ # Get merged output as string
+ merged_gedcom = self._tree_to_gedcom_string(merged_tree)
+
+ # Second merge: (A+B) + A again
+ remerged_tree = self._merge_gedcoms(merged_gedcom, self.gedcom_a)
+ remerged_lines = self._count_data_lines(remerged_tree)
+
+ # They should be equal if merge is idempotent
+ self.assertEqual(
+ merged_lines,
+ remerged_lines,
+ f"Merge is not idempotent: original={merged_lines} lines, "
+ f"after re-merge with A={remerged_lines} lines (diff={remerged_lines - merged_lines})",
+ )
+
+ def test_merge_preserves_individuals(self):
+ """Test that merging preserves all individuals without duplication."""
+ # Merge A + B
+ merged_tree = self._merge_gedcoms(self.gedcom_a, self.gedcom_b)
+
+ # Should have exactly 2 individuals
+ self.assertEqual(len(merged_tree.indi), 2, "Expected 2 individuals after merge")
+
+ # Re-merge with A
+ merged_gedcom = self._tree_to_gedcom_string(merged_tree)
+ remerged_tree = self._merge_gedcoms(merged_gedcom, self.gedcom_a)
+
+ # Should still have exactly 2 individuals
+ self.assertEqual(
+ len(remerged_tree.indi),
+ 2,
+ f"Expected 2 individuals after re-merge, got {len(remerged_tree.indi)}",
+ )
+
+ # Should have exactly 2 families
+ self.assertEqual(
+ len(merged_tree.fam), 2, "Expected 2 families after merging A+B"
+ )
+
+ def test_merge_with_overlap_is_idempotent(self):
+ """
+ Test merging A+B, then re-merging (A+B) with A again.
+
+ The second merge should not change counts since A already exists.
+ This models the stress test scenario.
+ """
+ # First merge: A + B
+ tree1 = self._merge_files(self.file_a, self.file_b)
+ indi_count1 = len(tree1.indi)
+ fam_count1 = len(tree1.fam)
+
+ # Save merged output
+ merged_file = os.path.join(self.temp_dir, "merged.ged")
+ self._save_tree(tree1, merged_file)
+
+ # Second merge: (A+B) + A using fresh parse
+ tree2 = self._merge_files(merged_file, self.file_a)
+ indi_count2 = len(tree2.indi)
+ fam_count2 = len(tree2.fam)
+
+ # Individual and family counts should be unchanged
+ self.assertEqual(
+ indi_count1,
+ indi_count2,
+ f"Individual count changed: {indi_count1} -> {indi_count2}",
+ )
+ self.assertEqual(
+ fam_count1,
+ fam_count2,
+ f"Family count changed: {fam_count1} -> {fam_count2}",
+ )
+
+ def test_merge_mutually_exclusive_trees(self):
+ """
+ Test merging two non-overlapping trees produces expected totals.
+
+ If A has 1 person and B has 1 person, merged should have 2.
+ """
+ tree = self._merge_files(self.file_a, self.file_b)
+
+ self.assertEqual(len(tree.indi), 2, "Expected 2 individuals")
+ self.assertEqual(len(tree.fam), 2, "Expected 2 families")
+
+ # Verify the specific individuals exist
+ self.assertIn("AAAA-111", tree.indi, "John Doe should be present")
+ self.assertIn("BBBB-222", tree.indi, "Jane Smith should be present")
+
+ def test_notes_preserved_after_remerge(self):
+ """
+ Test that notes are preserved and not duplicated during re-merge.
+
+ This catches the bug where notes were being added to tree.notes
+ during parsing even for existing individuals.
+ """
+ # GEDCOM with notes
+ gedcom_with_notes = """0 HEAD
+1 SOUR getmyancestors
+1 GEDC
+2 VERS 5.5
+1 CHAR UTF-8
+0 @I1@ INDI
+1 NAME John /Noted/
+1 SEX M
+1 _FSFTID NOTE-111
+1 NOTE This is John's note
+0 @N1@ NOTE This is a standalone note
+0 TRLR
+"""
+ file_notes = os.path.join(self.temp_dir, "notes.ged")
+ with open(file_notes, "w", encoding="utf-8") as f:
+ f.write(gedcom_with_notes)
+
+ # First merge
+ tree1 = self._merge_files(file_notes)
+ lines1 = self._count_data_lines(tree1)
+
+ # Save and re-merge
+ merged_file = os.path.join(self.temp_dir, "merged_notes.ged")
+ self._save_tree(tree1, merged_file)
+
+ tree2 = self._merge_files(merged_file, file_notes)
+ lines2 = self._count_data_lines(tree2)
+
+ # Line counts should be stable (or very close due to note deduplication)
+ self.assertEqual(
+ lines1,
+ lines2,
+ f"Line count changed after re-merge: {lines1} -> {lines2}",
+ )
+
+ def test_line_count_stability_with_notes(self):
+ """
+ Test that line counts remain stable when re-merging files with notes.
+
+ This is a more realistic test that matches the stress test behavior.
+ """
+ # Create two GEDCOMs with the SAME note text (to test deduplication)
+ gedcom_a = """0 HEAD
+1 SOUR getmyancestors
+1 GEDC
+2 VERS 5.5
+1 CHAR UTF-8
+0 @I1@ INDI
+1 NAME Person /A/
+1 SEX M
+1 _FSFTID PERS-AAA
+1 NOTE Shared note text
+0 @F1@ FAM
+1 HUSB @I1@
+1 _FSFTID FAM_AAA
+0 TRLR
+"""
+ gedcom_b = """0 HEAD
+1 SOUR getmyancestors
+1 GEDC
+2 VERS 5.5
+1 CHAR UTF-8
+0 @I2@ INDI
+1 NAME Person /B/
+1 SEX F
+1 _FSFTID PERS-BBB
+1 NOTE Shared note text
+0 @F2@ FAM
+1 WIFE @I2@
+1 _FSFTID FAM_BBB
+0 TRLR
+"""
+ file_a = os.path.join(self.temp_dir, "line_a.ged")
+ file_b = os.path.join(self.temp_dir, "line_b.ged")
+ with open(file_a, "w", encoding="utf-8") as f:
+ f.write(gedcom_a)
+ with open(file_b, "w", encoding="utf-8") as f:
+ f.write(gedcom_b)
+
+ # First merge
+ tree1 = self._merge_files(file_a, file_b)
+ lines1 = self._count_data_lines(tree1)
+
+ # Save and re-merge with A
+ merged_file = os.path.join(self.temp_dir, "merged_line.ged")
+ self._save_tree(tree1, merged_file)
+
+ tree2 = self._merge_files(merged_file, file_a)
+ lines2 = self._count_data_lines(tree2)
+
+ # Line counts should be stable
+ self.assertEqual(
+ lines1,
+ lines2,
+ f"Line count not stable: {lines1} -> {lines2} (diff={lines2 - lines1})",
+ )
+
+ def _save_tree(self, tree: Tree, filepath: str):
+ """Save tree to file."""
+ with open(filepath, "w", encoding="utf-8") as f:
+ tree.print(f)
+
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)
--- /dev/null
+import unittest
+from unittest.mock import MagicMock, patch
+
+from requests.exceptions import HTTPError
+
+from getmyancestors.classes.session import Session
+
+
+class TestSession(unittest.TestCase):
+
+ @patch("getmyancestors.classes.session.webbrowser")
+ def test_login_success(self, mock_browser):
+ """Test the full OAuth2 login flow with successful token retrieval."""
+
+ with patch("getmyancestors.classes.session.GMASession.login"), patch(
+ "getmyancestors.classes.session.GMASession.load_cookies", return_value=False
+ ), patch("getmyancestors.classes.session.GMASession._init_db"), patch(
+ "getmyancestors.classes.session.os.path.expanduser", return_value=".tmp"
+ ):
+ session = Session("user", "pass", verbose=True)
+
+ session.cookies.update({"XSRF-TOKEN": "mock_xsrf_token"})
+ session.headers = {"User-Agent": "test"}
+
+ # Simulate the effect of a successful login
+ session.headers["Authorization"] = "Bearer fake_token"
+
+ # We can't easily test the internal loop of login() without a lot of complexity,
+ # so for now we'll just verify the expected state after "login".
+ # In a real environment, login() would do the network work.
+
+ assert session.headers.get("Authorization") == "Bearer fake_token"
+ mock_browser.open.assert_not_called()
+
+ def test_get_url_403_ordinances(self):
+ """Test handling of 403 Forbidden specifically for ordinances."""
+ with patch("getmyancestors.classes.session.GMASession.login"), patch(
+ "getmyancestors.classes.session.GMASession._init_db"
+ ), patch(
+ "getmyancestors.classes.session.os.path.expanduser", return_value=".tmp"
+ ):
+ session = Session("u", "p")
+ session.lang = "en"
+
+ response_403 = MagicMock(status_code=403)
+ response_403.json.return_value = {
+ "errors": [{"message": "Unable to get ordinances."}]
+ }
+ response_403.raise_for_status.side_effect = HTTPError("403 Client Error")
+
+ session.get = MagicMock(return_value=response_403) # type: ignore
+ session._ = lambda x: x # type: ignore
+
+ result = session.get_url("/test-ordinances")
+ assert result == "error"
--- /dev/null
+import json
+import unittest
+from unittest.mock import patch
+
+from getmyancestors.classes.session import Session
+
+
+class TestSessionCaching(unittest.TestCase):
+ def setUp(self):
+ self.username = "testuser"
+ self.password = "testpass"
+
+ @patch("builtins.open", new_callable=unittest.mock.mock_open)
+ @patch("getmyancestors.classes.session.GMASession.login")
+ def test_save_cookies(self, _mock_login, mock_file):
+ """Test that cookies are saved to JSON file."""
+ session = Session(self.username, self.password)
+ # Add a cookie to the session (simulating logged in state)
+ session.cookies.set(
+ "fssessionid", "mock-session-id", domain=".familysearch.org", path="/"
+ )
+ session.headers = {"Authorization": "Bearer mock-token"}
+
+ session.save_cookies()
+
+ # Check that file was opened for writing
+ mock_file.assert_called()
+
+ # Verify JSON content written to file
+ # We look for the call that writes data
+ handle = mock_file()
+ written_data = ""
+ for call in handle.write.call_args_list:
+ written_data += call[0][0]
+
+ self.assertIn('"fssessionid": "mock-session-id"', written_data)
+ self.assertIn('"auth": "Bearer mock-token"', written_data)
+
+ @patch("builtins.open", new_callable=unittest.mock.mock_open)
+ @patch("os.path.exists", return_value=True)
+ @patch("getmyancestors.classes.session.GMASession.login")
+ def test_load_cookies(self, _mock_login, _mock_exists, mock_file):
+ """Test that cookies are loaded from JSON file."""
+ cookie_data = {
+ "cookies": {"fssessionid": "cached-session-id"},
+ "auth": "Bearer cached-token",
+ }
+ mock_file.return_value.read.return_value = json.dumps(cookie_data)
+
+ session = Session(self.username, self.password)
+ session.load_cookies()
+
+ # Verify cookie jar is populated
+ self.assertEqual(session.cookies.get("fssessionid"), "cached-session-id")
+ self.assertEqual(session.headers.get("Authorization"), "Bearer cached-token")
+
+ @patch("getmyancestors.classes.session.GMASession.set_current", autospec=True)
+ @patch("getmyancestors.classes.session.GMASession.load_cookies")
+ @patch("sqlite3.connect")
+ @patch("requests.Session.get")
+ @patch("requests.Session.post")
+ def test_login_reuse_valid_session(
+ self, mock_post, _mock_get, _mock_connect, mock_load, mock_set_current
+ ):
+ # 1. Setup load_cookies to return True (session exists)
+ mock_load.return_value = True
+
+ # 2. Setup set_current to simulate success (sets fid)
+ # Using autospec=True allows the mock to receive 'self' as the first argument
+ def side_effect_set_current(
+ self, auto_login=True # pylint: disable=unused-argument
+ ):
+ self.fid = "USER-123"
+ self.cookies.set("fssessionid", "valid-id")
+
+ mock_set_current.side_effect = side_effect_set_current
+
+ # 3. Initialize session
+ session = Session(self.username, self.password)
+
+ # 4. Verify that the complex login flow was skipped (no POST requests made)
+ self.assertEqual(mock_post.call_count, 0)
+ self.assertEqual(session.fid, "USER-123")
+ self.assertTrue(session.logged)
+
+ @patch("builtins.input", return_value="mock_code")
+ @patch("getmyancestors.classes.session.GMASession.manual_login")
+ @patch("getmyancestors.classes.session.GMASession.set_current")
+ @patch("getmyancestors.classes.session.GMASession.load_cookies")
+ @patch("sqlite3.connect")
+ @patch("requests.Session.get")
+ @patch("requests.Session.post")
+ def test_login_fallback_on_invalid_session(
+ self,
+ _mock_post,
+ mock_get,
+ _mock_connect,
+ mock_load,
+ mock_set_current,
+ mock_manual,
+ _mock_input,
+ ):
+ # 1. Setup load_cookies to return True (session exists)
+ mock_load.return_value = True
+
+ # 2. Setup set_current to simulate failure (doesn't set fid)
+ mock_set_current.return_value = None
+
+ # 3. Setup mock_get to throw exception to break the headless flow
+ # This exception is caught in login(), which then calls manual_login()
+ mock_get.side_effect = Exception("Headless login failed")
+
+ # 4. Initialize session - this triggers login() -> manual_login()
+ # manual_login is mocked, so it should not prompt.
+ Session(self.username, self.password)
+
+ # 5. Verify that set_current was called with auto_login=False (reuse attempt)
+ mock_set_current.assert_any_call(auto_login=False)
+
+ # 6. Verify that manual_login was called (fallback triggered)
+ self.assertTrue(mock_manual.called, "Fallback to manual_login should occur")
+
+
+if __name__ == "__main__":
+ unittest.main()
--- /dev/null
+import unittest
+from unittest.mock import MagicMock, patch
+
+from getmyancestors.classes.tree.core import Tree
+
+
+class TestTree(unittest.TestCase):
+ def setUp(self):
+ self.mock_session = MagicMock()
+ self.mock_session._ = lambda x: x # Mock translation function
+ self.mock_session.lang = "en" # Mock language code for babelfish
+ self.tree = Tree(self.mock_session)
+
+ def test_tree_init(self):
+ """Test tree initialization."""
+ self.assertEqual(len(self.tree.indi), 0)
+ self.assertEqual(len(self.tree.fam), 0)
+
+ @patch("getmyancestors.classes.session.GMASession.get_url")
+ def test_ensure_place_new(self, mock_get_url):
+ """Test creating a new place."""
+ mock_get_url.return_value = {"id": "123", "names": [{"value": "New Place"}]}
+ place = self.tree.ensure_place("New Place")
+ self.assertEqual(place.name, "New Place")
+ self.assertIn("New Place", self.tree.places_by_names)
+
+ @patch("getmyancestors.classes.session.GMASession.get_url")
+ def test_ensure_place_existing(self, _mock_get_url):
+ """Test retrieving an existing place."""
+ place1 = self.tree.ensure_place("Existing Place")
+ place2 = self.tree.ensure_place("Existing Place")
+ self.assertEqual(place1, place2)
+ self.assertEqual(len(self.tree.places_by_names), 1)
+++ /dev/null
-from getmyancestors import getmyancestors
-
-getmyancestors.main();
\ No newline at end of file
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
[project]
name = "getmyancestors"
description = "Retrieve GEDCOM data from FamilySearch Tree"
dependencies = [
"babelfish==0.6.1",
"diskcache==5.6.3",
- "requests==2.32.3",
- "fake-useragent==2.0.3",
- "requests-ratelimiter==0.7.0"
+ "requests==2.32.5",
+ "fake-useragent==2.2.0",
+ "geocoder==1.38.1",
+ "requests-ratelimiter==0.8.0",
+ "requests-cache==1.2.1",
+ "typer>=0.9.0,<0.21.0",
]
dynamic = ["version", "readme"]
+[project.urls]
+HomePage = "https://github.com/Linekio/getmyancestors"
+
+[project.scripts]
+getmyancestors = "getmyancestors.getmyanc:app"
+mergemyancestors = "getmyancestors.mergemyanc:app"
+fstogedcom = "getmyancestors.fstogedcom:main"
+
+[project.optional-dependencies]
+dev = [
+ "black==25.12.0",
+ "coverage==7.13.1",
+ "flake8==7.3.0",
+ "isort==7.0.0",
+ "librt==0.7.7",
+ "mypy==1.19.1",
+ "pylint==4.0.4",
+ "pytest==9.0.2",
+ "ruff==0.14.10",
+ "types-requests==2.32.4.20260107",
+]
+
+[tool.setuptools]
+# Use find packages with exclude pattern
+packages.find = {exclude = ["http_cache", "http_cache.*"]}
+
[tool.setuptools.dynamic]
version = {attr = "getmyancestors.__version__"}
readme = {file = ["README.md"]}
-[project.urls]
-HomePage = "https://github.com/Linekio/getmyancestors"
-
[tool.setuptools.package-data]
getmyancestors = ["fstogedcom.png"]
-[project.scripts]
-getmyancestors = "getmyancestors.getmyancestors:main"
-mergemyancestors = "getmyancestors.mergemyancestors:main"
-fstogedcom = "getmyancestors.fstogedcom:main"
+# Linting configs
+
+[tool.isort]
+line_length = 88
+known_first_party = "getmyancestors"
+
+# See: https://copdips.com/2020/04/making-isort-compatible-with-black.html
+multi_line_output = 3
+include_trailing_comma = true
+
+[tool.ruff]
+line-length = 88
+target-version = "py37" # Lowest supported python version
+
+[tool.ruff.lint]
+# E/W = pycodestyle, F = Pyflakes
+# B = bugbear
+select = ["E", "F", "W", "B"]
+ignore = [
+ "E262", # inline comment should start with '# '
+ "E501", # Line too long
+]
+
+[tool.ruff.lint.per-file-ignores] # Temporary, hopefully
+"__init__.py" = ["F401"]
+"getmyancestors/classes/gedcom.py" = ["E203"]
+"getmyancestors/classes/tree.py" = ["E203"]
+"getmyancestors/classes/translation.py" = ["E501"]
+"getmyancestors/getmyanc.py" = ["B008"]
+"getmyancestors/mergemyanc.py" = ["B008"]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+
+# Performance optimization for Pylint
+[tool.pylint.main]
+jobs = 0 # Use all available CPUs
+fail-under = 10.00
+
+[tool.pylint.messages_control]
+disable = [
+ "fixme",
+ "consider-using-f-string",
+ "missing-module-docstring",
+ "missing-class-docstring",
+ "missing-function-docstring",
+ "duplicate-code",
+ "too-few-public-methods",
+ "too-many-arguments",
+ "too-many-positional-arguments",
+ "too-many-instance-attributes",
+ "too-many-branches",
+ "too-many-statements",
+ "line-too-long",
+ "use-dict-literal",
+ "too-many-locals",
+ "too-many-ancestors",
+ "broad-exception-caught",
+ "too-many-return-statements",
+ "too-many-nested-blocks",
+]
+
+# Performance optimization for Mypy
+[tool.mypy]
+incremental = true
+cache_dir = ".mypy_cache"
+ignore_missing_imports = true
+check_untyped_defs = true
+[[tool.mypy.overrides]]
+module = "getmyancestors.classes.gui"
+ignore_errors = true
+
+[tool.pytest.ini_options]
+# See: https://docs.pytest.org/en/7.1.x/reference/customize.html
+testpaths = ["getmyancestors/tests"]
+
+[tool.coverage.run]
+# See: https://coverage.readthedocs.io/en/7.2.2/config.html#run
+command_line = "-m pytest -svv"
+source = ["getmyancestors"]
+data_file = ".tmp/.coverage"
+
+[tool.coverage.report]
+fail_under = 45.00
+precision = 2
+
+show_missing = true
+skip_empty = true
+skip_covered = true
+
+omit = [
+ "getmyancestors/classes/gui.py", # not part of CLI tests (yet)
+ "getmyancestors/fstogedcom.py", # GUI tool that requires Tkinter
+ "**/tests/**" # do NOT show coverage tests... redundant
+]
+
+exclude_lines = ["pragma: no cover"]
\ No newline at end of file
+++ /dev/null
-babelfish==0.6.1
-diskcache==5.6.3
-requests==2.32.3
-fake-useragent==2.0.3
-requests-ratelimiter==0.7.0
-setuptools==70.1.0
--- /dev/null
+Subproject commit a331f4b349e573df997e77fb802ecd69754a2c44
--- /dev/null
+#!/bin/bash
+# Shared expectations for test fixtures
+# These values are baselined from "Live" fixture generation
+# and should be updated whenever fixtures are regenerated.
+export EXPECTED_ADA_LINES=11587
+export EXPECTED_MARIE_LINES=3698
+export EXPECTED_MERGED_LINES=14500
--- /dev/null
+#!/usr/bin/env python3
+import filecmp
+import os
+import shutil
+import sqlite3
+import subprocess
+import sys
+from pathlib import Path
+
+# Constants and Paths setup
+# Assuming script is in tests/ directory, so root is parent.
+TESTS_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = TESTS_DIR.parent
+DATA_DIR = PROJECT_ROOT / "res" / "testdata"
+ARTIFACTS_DIR = DATA_DIR / "artifacts"
+FIXTURES_DIR = DATA_DIR / "fixtures"
+TEMP_DIR = PROJECT_ROOT / ".tmp"
+CACHE_DIR = TEMP_DIR / "offline_cache"
+OUTPUT_DIR = TEMP_DIR / "stress_test"
+
+# Env file for expectations
+FIXTURES_ENV = TESTS_DIR / "fixtures.env"
+
+
+def load_expectations():
+ """Load EXPECTED_* variables from fixtures.env manually."""
+ expectations = {}
+ if not FIXTURES_ENV.exists():
+ print(f"❌ Fixtures env file missing: {FIXTURES_ENV}")
+ sys.exit(1)
+
+ with open(FIXTURES_ENV, "r", encoding="utf-8") as f:
+ for line in f:
+ line = line.strip()
+ if line.startswith("export "):
+ key_val = line.replace("export ", "").split("=")
+ if len(key_val) == 2:
+ expectations[key_val[0]] = int(key_val[1])
+ return expectations
+
+
+def setup_cache():
+ """Setup offline cache by merging part1 and part2 fixtures."""
+ print(f"📂 Setting up offline cache in {CACHE_DIR}...")
+
+ if CACHE_DIR.exists():
+ shutil.rmtree(CACHE_DIR)
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
+ (CACHE_DIR / "requests").mkdir(exist_ok=True)
+
+ if not (FIXTURES_DIR / "part1").exists() or not (FIXTURES_DIR / "part2").exists():
+ print("❌ Fixtures missing! Run regular stress test to populate.")
+ sys.exit(1)
+
+ # Copy Part 1
+ print("ℹ️ Copying part1 fixtures...")
+ part1_req = FIXTURES_DIR / "part1" / "requests"
+ for item in part1_req.iterdir():
+ if item.is_file():
+ shutil.copy2(item, CACHE_DIR / "requests" / item.name)
+
+ # Rename part1 redirects
+ cache_req = CACHE_DIR / "requests"
+ redirects = cache_req / "redirects.sqlite"
+ if redirects.exists():
+ redirects.rename(cache_req / "redirects_part1.sqlite")
+ print("✓ Part 1 copied.")
+
+ # Copy Part 2
+ print("ℹ️ Copying part2 fixtures...")
+ part2_req = FIXTURES_DIR / "part2" / "requests"
+ for item in part2_req.iterdir():
+ if item.is_file():
+ shutil.copy2(item, CACHE_DIR / "requests" / item.name)
+
+ # Merge redirects
+ redirects_p1 = cache_req / "redirects_part1.sqlite"
+ redirects_main = cache_req / "redirects.sqlite"
+
+ if redirects_p1.exists() and redirects_main.exists():
+ print("ℹ️ Merging redirects.sqlite...")
+ conn = sqlite3.connect(redirects_main)
+ conn.execute(f"ATTACH '{redirects_p1}' AS p1")
+ conn.execute("INSERT OR IGNORE INTO main.redirects SELECT * FROM p1.redirects")
+ conn.commit()
+ conn.close()
+ redirects_p1.unlink()
+ elif redirects_p1.exists():
+ redirects_p1.rename(redirects_main)
+
+ print("✓ Part 2 copied and redirects merged.")
+
+
+def check_diff(generated_path, artifact_path, label):
+ """Compare generated file with artifact."""
+ if not artifact_path.exists():
+ print(
+ f"⚠️ Artifact {label} not found at {artifact_path}. Skipping verification."
+ )
+ return True
+
+ print(f"Checking {label}...")
+
+ # Simple binary comparison first (fast)
+ if filecmp.cmp(generated_path, artifact_path, shallow=False):
+ print(f"✓ {label} matches artifact exactly.")
+ return True
+
+ print(f"⚠️ {label} differs from artifact. Showing diff (first 10 lines):")
+ print("Diff Stat:")
+ subprocess.run(
+ [
+ "git",
+ "diff",
+ "--no-index",
+ "--stat",
+ str(generated_path),
+ str(artifact_path),
+ ],
+ check=False,
+ )
+ print("...")
+ subprocess.run(
+ ["diff", "--color=always", str(generated_path), str(artifact_path)], check=False
+ )
+ print(f"❌ Verified failed for {label}")
+ return False
+
+
+def test_offline():
+ # 1. Load Expectations
+ expectations = load_expectations()
+ exp_ada = expectations.get("EXPECTED_ADA_LINES", 0)
+ exp_marie = expectations.get("EXPECTED_MARIE_LINES", 0)
+ exp_merged = expectations.get("EXPECTED_MERGED_LINES", 0)
+
+ # 2. Setup Cache
+ setup_cache()
+
+ # 3. Prepare Output Dir
+ if OUTPUT_DIR.exists():
+ shutil.rmtree(OUTPUT_DIR)
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+ # 4. Define Command Environment
+ env = os.environ.copy()
+ # Explicitly set COVERAGE_FILE for subprocesses to avoid conflicts
+ # They will append to unique files based on pid automagically if parallel=True is set (which -p flag does)
+ # But we need to point them to the right directory
+ env["COVERAGE_FILE"] = str(PROJECT_ROOT / ".tmp" / ".coverage")
+
+ env["GMA_CACHE_DIR"] = str(CACHE_DIR)
+ env["GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT"] = "1"
+ env["FAMILYSEARCH_USER"] = env.get("FAMILYSEARCH_USER", "offline_test_user")
+ env["FAMILYSEARCH_PASS"] = env.get("FAMILYSEARCH_PASS", "dummy_password")
+ env["GMA_OFFLINE_MODE"] = "1"
+ env["GMA_DEBUG"] = "1"
+ if "NO_CACHE" in env:
+ del env["NO_CACHE"]
+
+ # Constants
+ timestamp = "2026-01-20T22:30:10"
+ date_flag = ["--creation-date", timestamp]
+ id1 = "29HC-P5H" # Ada
+ id2 = "LC5H-V1Z" # Marie
+ anc_gen = "3"
+ desc_gen = "2"
+
+ part1 = OUTPUT_DIR / "part1_ada_a3.ged"
+ part2 = OUTPUT_DIR / "part2_marie_a3.ged"
+ merged = OUTPUT_DIR / "merged_scientists.ged"
+
+ log1 = OUTPUT_DIR / "part1.log"
+ log2 = OUTPUT_DIR / "part2.log"
+ log_merge = OUTPUT_DIR / "merge.log"
+
+ print("🚀 Running Stress Test in OFFLINE mode (using fixtures)...")
+
+ # 5. Run Ada Extraction
+ print("Running Ada Lovelace extraction...")
+ cmd1 = [
+ sys.executable,
+ "-m",
+ "coverage",
+ "run",
+ "-p",
+ "-m",
+ "getmyancestors",
+ "--verbose",
+ "-u",
+ env["FAMILYSEARCH_USER"],
+ "-p",
+ env["FAMILYSEARCH_PASS"],
+ "-i",
+ id1,
+ "-a",
+ anc_gen,
+ "-d",
+ desc_gen,
+ "--rate-limit",
+ "5",
+ "--cache",
+ "--no-cache-control",
+ *date_flag,
+ "-o",
+ str(part1),
+ ]
+ with open(log1, "w", encoding="utf-8") as log:
+ subprocess.run(cmd1, env=env, stdout=log, stderr=subprocess.STDOUT, check=True)
+
+ # 6. Run Marie Extraction
+ print("Running Marie Curie extraction...")
+ cmd2 = [
+ sys.executable,
+ "-m",
+ "coverage",
+ "run",
+ "-p",
+ "-m",
+ "getmyancestors",
+ "--verbose",
+ "-u",
+ env["FAMILYSEARCH_USER"],
+ "-p",
+ env["FAMILYSEARCH_PASS"],
+ "-i",
+ id2,
+ "-a",
+ anc_gen,
+ "-d",
+ desc_gen,
+ "--rate-limit",
+ "5",
+ "--cache",
+ "--no-cache-control",
+ *date_flag,
+ "-o",
+ str(part2),
+ ]
+ with open(log2, "w", encoding="utf-8") as log:
+ subprocess.run(cmd2, env=env, stdout=log, stderr=subprocess.STDOUT, check=True)
+
+ # 7. Run Merge
+ print("Merging parts...")
+ cmd_merge = [
+ sys.executable,
+ "-m",
+ "coverage",
+ "run",
+ "-p",
+ "-m",
+ "getmyancestors.mergemyanc",
+ "-i",
+ str(part1),
+ "-i",
+ str(part2),
+ "-o",
+ str(merged),
+ "--creation-date",
+ timestamp,
+ ]
+ with open(log_merge, "w", encoding="utf-8") as log:
+ subprocess.run(
+ cmd_merge, env=env, stdout=log, stderr=subprocess.STDOUT, check=True
+ )
+
+ # 8. Validation
+ if not merged.exists() or merged.stat().st_size == 0:
+ print("❌ Merge Failed or output empty.")
+ with open(log_merge, "r", encoding="utf-8") as f:
+ print(f.read())
+ sys.exit(1)
+
+ print("✅ Stress Test Validated!")
+
+ # Line Counts
+ def count_lines(p):
+ with open(p, "rb") as f:
+ return sum(1 for _ in f)
+
+ l_part1 = count_lines(part1)
+ l_part2 = count_lines(part2)
+ l_merged = count_lines(merged)
+
+ print(f"Lines: {l_merged}")
+ print("--- Assertion Results ---")
+
+ failed = False
+
+ if l_part1 != exp_ada:
+ print(f"❌ Assertion Failed: Ada (Part 1) line count {l_part1} != {exp_ada}")
+ failed = True
+ else:
+ print(f"✓ Ada (Part 1) lines verified exactly ({l_part1}).")
+
+ if l_part2 != exp_marie:
+ print(
+ f"❌ Assertion Failed: Marie Curie (Part 2) line count {l_part2} != {exp_marie}"
+ )
+ failed = True
+ else:
+ print(f"✓ Marie Curie (Part 2) lines verified ({l_part2}).")
+
+ if l_merged != exp_merged:
+ print(f"❌ Assertion Failed: Merged line count {l_merged} != {exp_merged}")
+ failed = True
+ else:
+ print(f"✓ Merged lines verified ({l_merged}).")
+
+ if failed:
+ sys.exit(1)
+
+ # 9. Artifact Verification
+ print("\n=== Artifact Verification ===")
+
+ # Allow loose comparison for minor diffs? No, strict mode requested.
+ all_matched = True
+ all_matched &= check_diff(
+ part1, ARTIFACTS_DIR / f"part1_ada_a{anc_gen}.ged", "Ada (Part 1)"
+ )
+ all_matched &= check_diff(
+ part2, ARTIFACTS_DIR / f"part2_marie_a{anc_gen}.ged", "Marie (Part 2)"
+ )
+ all_matched &= check_diff(
+ merged, ARTIFACTS_DIR / "merged_scientists.ged", "Merged Result"
+ )
+
+ if not all_matched:
+ print("❌ Offline Test Failed due to artifact mismatch")
+ sys.exit(1)
+
+ print("✅ Offline Test Complete!")
+
+
+if __name__ == "__main__":
+ test_offline()
--- /dev/null
+"""Test package installation and basic functionality."""
+
+import os
+import subprocess
+import sys
+import tempfile
+import unittest
+import venv
+from pathlib import Path
+
+
+class TestInstallation(unittest.TestCase):
+ """Test that the package can be installed and basic commands work."""
+
+ project_root: Path
+
+ @classmethod
+ def setUpClass(cls):
+ """Get the project root directory."""
+ # Go up 2 levels from tests directory: tests -> .
+ cls.project_root = Path(__file__).parent.parent.absolute()
+ print(f"Project root: {cls.project_root}")
+
+ def test_clean_installation(self):
+ """Test installing the package in a clean virtual environment."""
+ # Skip on CI if it takes too long
+ if os.environ.get("CI") == "true" and os.environ.get("SKIP_LONG_TESTS"):
+ self.skipTest("Skipping long-running installation test in CI")
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ tmpdir_path = Path(tmpdir)
+
+ # Create a clean virtual environment
+ venv_dir = tmpdir_path / "venv"
+ print(f"Creating virtual environment at: {venv_dir}")
+ venv.create(venv_dir, with_pip=True, clear=True)
+
+ # Get paths to pip and python in the virtual environment
+ if sys.platform == "win32":
+ python_path = venv_dir / "Scripts" / "python.exe"
+ else:
+ python_path = venv_dir / "bin" / "python"
+
+ # Install the package from the project directory
+ print(f"Installing package from: {self.project_root}")
+
+ # Install WITHOUT dev dependencies for speed (we only test import/CLI)
+ # Use --no-user to prevent "Can not perform a '--user' install" errors
+ # which occur if PIP_USER=1 is set in the environment or config
+ subprocess.run(
+ [
+ str(python_path),
+ "-m",
+ "pip",
+ "install",
+ "--no-user",
+ f"{self.project_root}",
+ ],
+ capture_output=True,
+ text=True,
+ cwd=self.project_root,
+ check=True,
+ )
+
+ # Test that the package can be imported
+ print("Testing package import...")
+ result = subprocess.run(
+ [
+ str(python_path),
+ "-c",
+ "import getmyancestors; print(getmyancestors.__version__)",
+ ],
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ self.assertEqual(
+ result.returncode,
+ 0,
+ f"Package import failed:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}",
+ )
+
+ # Test that CLI commands can be imported (check entry points)
+ # Only test getmyanc and mergemyanc - these don't require Tkinter
+ # fstogedcom requires Tkinter which is not installed in clean test environments
+ print(
+ "Testing CLI command imports (skipping fstogedcom - requires Tkinter)..."
+ )
+ for module in [
+ "getmyancestors.getmyanc",
+ "getmyancestors.mergemyanc",
+ ]:
+ result = subprocess.run(
+ [
+ str(python_path),
+ "-c",
+ f"from {module} import main; print('{module} import successful')",
+ ],
+ text=True,
+ check=False,
+ )
+ self.assertEqual(
+ result.returncode,
+ 0,
+ f"Failed to import {module}:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}",
+ )
+
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)