From: Shane Jaroch Date: Thu, 22 Jan 2026 22:01:33 +0000 (-0500) Subject: Major refactor: Typer CLI, comprehensive tests, and CI improvements X-Git-Url: https://git.nutra.tk/v2?a=commitdiff_plain;h=d7b494250a78248dc82e92a2d4f958d741a6a45c;p=gamesguru%2Fgetmyancestors.git Major refactor: Typer CLI, comprehensive tests, and CI improvements - Migrate CLI to Typer for tab completion and better argument handling - Split tree.py into modular tree/ package (core, records, elements, utils) - Add comprehensive test suite with merge idempotency, session caching, etc. - Add offline test fixtures via git submodule (res/testdata) - Set up GitHub Actions CI with Coveralls integration - Add git-sqlite-filter for reproducible SQLite fixture handling - Improve merge deduplication for names, facts, notes, and sources - Fix session cookie/caching and rate limiting - Add Makefile for common dev tasks Modernizes the tool with a new CLI, faster and safer networking, and a refactored tree/session architecture to improve reliability and performance. Adds CI and tests, and consolidates packaging for easier development. - **New Features** - New CLI with exclude list and distance options; UTF‑8-safe output. - HTTP caching (requests-cache), optional cache-control, and request rate limiting; persistent cookie cache. - Offline mode to use cached data without logging in. - Place enrichment via Geonames/geocoder with structured Place objects. - Clear GUI Tkinter error; first‑run license prompt stored locally. - Immigration fact mapping and contributor attribution from change logs. - **Refactors** - New GMASession with cached session; switched to FS IDs; tree split into elements/records; legacy getmyancestors.py removed; entry points updated. - Added unit/integration tests and GitHub Actions (Linux/macOS/Windows); Makefile targets; linting/formatting gates. - Dependencies consolidated in pyproject; added requests-cache, requests-ratelimiter, geocoder; removed requirements.txt. - Fixed merge duplication; idempotent merges via set-based dedup in mergemyanc. --- diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..b83b506 --- /dev/null +++ b/.envrc @@ -0,0 +1,9 @@ +if [ -f .venv/bin/activate ]; then + source .venv/bin/activate +fi +unset PS1 +if [ -f .env ]; then + dotenv .env +fi +export PYTHONPATH=. + diff --git a/.geminiignore b/.geminiignore new file mode 100644 index 0000000..ee08843 --- /dev/null +++ b/.geminiignore @@ -0,0 +1,13 @@ +!.gemini/ +!test_debug.py + +.venv/ +build +*.egg-info +!.envrc +!.env + +.coverage +http_cache +.pytest_cache +.tmp diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..660ac87 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# SQLite files use git-sqlite-filter for readable diffs +*.sqlite filter=sqlite diff=sqlite diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bb8bcc1..467a313 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -4,4 +4,4 @@ Current behavior: Expected behavior: -Corresponding Familysearch link (optional): \ No newline at end of file +Corresponding Familysearch link (optional): diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..b25f1d4 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,81 @@ +--- +name: ci + +"on": + push: {} + +permissions: + contents: read + +jobs: + test-core: + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + env: + SKIP_VENV: 1 + steps: + - name: Install git-sqlite-filter + run: | + pip install pipx + pipx install git+https://github.com/gamesguru/git-sqlite-filter.git@c2a4089 + pipx ensurepath + echo "$HOME/.local/bin" >> $GITHUB_PATH + git config --global filter.sqlite.clean "git-sqlite-clean %f" + git config --global filter.sqlite.smudge "git-sqlite-smudge %f" + git config --global filter.sqlite.required true + + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Fetch master + run: git fetch origin master + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3 + cache: "pip" + + - name: Install dependencies + run: pip install ".[dev]" + + - name: Format Check + run: make format + if: runner.os == 'Linux' + + - name: Lint + run: make lint + if: runner.os == 'Linux' + + - name: Test [Unit] + run: make test/unit + + - name: Test [Offline Verification] + shell: bash + # Fails due to bash script not being cross-platform out of the box + continue-on-error: ${{ matrix.os == 'windows-latest' }} + run: make test/offline + + - name: Combine Coverage + run: make test/cov + + - name: Coveralls + uses: coverallsapp/github-action@v2 + with: + parallel: true + flag-name: run-${{ matrix.os }} + + finish: + needs: test-core + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Coveralls Finished + uses: coverallsapp/github-action@v2 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + parallel-finished: true diff --git a/.gitignore b/.gitignore index 0679b96..ab1628e 100644 --- a/.gitignore +++ b/.gitignore @@ -133,15 +133,21 @@ dmypy.json # Redis dump.rdb -# Dotfiles -.* -!.gitignore -!.readthedocs.yml - # vscode .vscode/ # getmyancestors stuff *.log +*.txt *.settings -*.ged \ No newline at end of file +*.ged +*.db +*.sqlite +*.sqlite3 + +!.geminiignore +/test_debug.py + +.tmp/ +.tmp +tests/cache/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e661fd3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/data"] + path = res/testdata + url = https://gitlab.com/gg-io/getmyancestors-tests-data.git diff --git a/.mailmap b/.mailmap new file mode 100644 index 0000000..01b8a5d --- /dev/null +++ b/.mailmap @@ -0,0 +1,35 @@ +# Benoît Fontaine +Linekio Benoît Fontaine +Linekio Linekio +Linekio +Linekio +Linekio +Linekio linek +Linekio Benoît +Linekio benoit-phytocontrol + +# Adriaan Joubert +adriaanjoubert <45142747+adriaanjoubert@users.noreply.github.com> +adriaanjoubert Adriaan Joubert + +# Fred Wright +fhgwright Fred Wright + +# Melroy van den Berg +melroy89 Melroy van den Berg + +# Giulio Genovese +a2800276 Giulio Genovese + +# Tim Becker +freeseek Tim Becker + +# Jadson Matos +jadsongmatos jadsongmatos + +# Clean IDs (easy mappings) +changeling +bsudy +gamesguru +josemando +sebdu66 <52951441+sebdu66@users.noreply.github.com> diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f0eae5c --- /dev/null +++ b/Makefile @@ -0,0 +1,142 @@ +SHELL:=/bin/bash +PYTHON ?= python3 +.DEFAULT_GOAL=_help + +.PHONY: _help +_help: + @printf "\nUsage: make , valid commands:\n\n" + @grep -h "##H@@" $(MAKEFILE_LIST) | grep -v IGNORE_ME | sed -e 's/##H@@//' | column -t -s $$'\t' + +# help: ## Show this help +# @grep -Eh '\s##\s' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + + +# -include .env + + +.PHONY: test/unit +test/unit: ##H@@ Run Unit tests only + $(PYTHON) -m coverage run -p -m pytest getmyancestors/tests + +# Installation +.PHONY: deps +deps: ##H@@ Install dependencies + $(PYTHON) -m pip install --no-user ".[dev]" + +# Installation tests +.PHONY: test/install +test/install: ##H@@ Run installation tests + $(PYTHON) -m coverage run -p -m pytest tests/test_installation.py + +.PHONY: test/offline +test/offline: ##H@@ Run offline verification (requires fixtures) + $(PYTHON) -m pytest tests/offline_test.py + + +# Generate targets for all test files (enables autocomplete) +TEST_FILES := $(wildcard getmyancestors/tests/test_*.py) +TEST_TARGETS := $(patsubst getmyancestors/tests/%.py,test/unit/%,$(TEST_FILES)) + +.PHONY: $(TEST_TARGETS) +$(TEST_TARGETS): test/unit/%: + pytest getmyancestors/tests/$*.py -v + +.PHONY: test/ +test/: ##H@@ Run unit & E2E tests +test/: test/unit test/offline test/cov + +.PHONY: test/cov +test/cov: ##H@@ Combine all coverage data and show report + -$(PYTHON) -m coverage combine + $(PYTHON) -m coverage report + + +REMOTE_HEAD ?= origin/master +PY_CHANGED_FILES ?= $(shell git diff --name-only --diff-filter=MACU $(REMOTE_HEAD) '*.py') +PY_CHANGED_FILES_FLAG ?= $(if $(PY_CHANGED_FILES),1,) +SH_ALL_FILES ?= $(shell git ls-files '*.sh') +PRETTIER_ALL_FILES ?= $(shell git ls-files '*.js' '*.css' '*.html' '*.md' '*.yaml' '*.yml') + +.PHONY: format +format: ##H@@ Format with black & isort + # ================================================== + # formatting + # ================================================== + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # prettier (optional) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -prettier --write ${PRETTIER_ALL_FILES} + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # shfmt (optional) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -shfmt -w ${SH_ALL_FILES} + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # isort + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \ + isort ${PY_CHANGED_FILES}; \ + fi + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # black + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \ + black ${PY_CHANGED_FILES}; \ + fi + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # ruff (format) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \ + ruff check --fix --exit-zero $(ARGS) ${PY_CHANGED_FILES}; \ + fi + +.PHONY: lint/ +lint/: lint/ruff lint/pylint lint/mypy +lint/: ##H@@ Lint with ruff, pylint, and mypy + +.PHONY: lint +lint: lint/ + +.PHONY: lint/ruff +lint/ruff: + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # ruff (lint) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \ + ruff check ${PY_CHANGED_FILES}; \ + fi + +.PHONY: lint/pylint +lint/pylint: + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # pylint + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \ + pylint -j 0 ${PY_CHANGED_FILES}; \ + fi + +.PHONY: lint/mypy +lint/mypy: + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # mypy + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -if [ -n "${PY_CHANGED_FILES_FLAG}" ]; then \ + mypy ${PY_CHANGED_FILES}; \ + fi + +.PHONY: pylint +pylint: lint/pylint + +.PHONY: mypy +mypy: lint/mypy + + +.PHONY: clean +clean: ##H@@ Clean up build files/cache + rm -rf *.egg-info build dist .coverage .coverage.* + rm -rf .tmp .pytest_cache .ruff_cache .mypy_cache + # One unified find command to clean python artifacts while ignoring .venv + find . -type d -name ".venv" -prune -o \ + \( \( -name "__pycache__" -o -name "http_cache" \) -type d -o \ + \( -name "*.pyc" -o -name "*.pyo" -o -name "*.pyd" -o -name "*.so" \) -type f \) \ + -exec rm -rf {} + + @echo "✓ Cleaned build files, caches, and test artifacts" diff --git a/README.md b/README.md index 100fb5e..97b840f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -getmyancestors -============== +# getmyancestors _getmyancestors_ is a python3 package that downloads family trees in GEDCOM format from FamilySearch. @@ -9,9 +8,7 @@ The project is maintained at https://github.com/Linekio/getmyancestors. Visit he This script requires python3 and the modules indicated in the requirements.txt file. To install the modules, run in your terminal: - -Installation -============ +# Installation The easiest way to install _getmyancestors_ is to use pip: @@ -21,74 +18,19 @@ Otherwise, you can download the source package and then execute in the folder: `pip install .` -How to use -========== - -With graphical user interface: - -``` -fstogedcom -``` - -Command line examples: - -Download four generations of ancestors for the main individual in your tree and output gedcom on stdout (will prompt for username and password): - -``` -getmyancestors -``` - -Download four generations of ancestors and output gedcom to a file while generating a verbode stderr (will prompt for username and password): - -``` -getmyancestors -o out.ged -v -``` - -Download four generations of ancestors for individual LF7T-Y4C and generate a verbose log file: - -``` -getmyancestors -u username -p password -i LF7T-Y4C -o out.ged -l out.log -v -``` +For development with linting and testing tools: -Download six generations of ancestors for individual LF7T-Y4C and generate a verbose log file: +`pip install ".[dev]"` -``` -getmyancestors -a 6 -u username -p password -i LF7T-Y4C -o out.ged -l out.log -v -``` +### GUI Installation (optional) -Download four generations of ancestors for individual LF7T-Y4C including all their children and their children spouses: +For the graphical interface (`fstogedcom`), you may need to install Tkinter: -``` -getmyancestors -d 1 -m -u username -p password -i LF7T-Y4C -o out.ged -``` +- **Ubuntu/Debian**: `sudo apt install python3-tk` +- **Fedora/RHEL**: `sudo dnf install python3-tkinter` +- **macOS**: `brew install python-tk` or use the official Python installer +- **Windows**: Usually included with Python installation -Download six generations of ancestors for individuals L4S5-9X4 and LHWG-18F including all their children, grandchildren and their spouses: +# How to use -``` -getmyancestors -a 6 -d 2 -m -u username -p password -i L4S5-9X4 LHWG-18F -o out.ged -``` - -Download four generations of ancestors for individual LF7T-Y4C including LDS ordinances (need LDS account) - -``` -getmyancestors -c -u username -p password -i LF7T-Y4C -o out.ged -``` - -Merge two Gedcom files - -``` -mergemyancestors -i file1.ged file2.ged -o out.ged -``` - - -Support -======= - -Submit questions or suggestions, or feature requests by opening an Issue at https://github.com/Linekio/getmyancestors/issues - -Donation -======== - -If this project help you, you can give me a tip :) - -[![paypal](https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=98X3CY93XTAYJ) +With graphical user interface: diff --git a/getmyancestors/__init__.py b/getmyancestors/__init__.py index 79d2b16..91a0d77 100644 --- a/getmyancestors/__init__.py +++ b/getmyancestors/__init__.py @@ -1,6 +1,4 @@ # coding: utf-8 -from . import getmyancestors -from . import mergemyancestors -__version__ = "1.0.6" +__version__ = "1.1.2" diff --git a/getmyancestors/__main__.py b/getmyancestors/__main__.py index 3b766b3..9eba18a 100644 --- a/getmyancestors/__main__.py +++ b/getmyancestors/__main__.py @@ -1,3 +1,4 @@ -from getmyancestors import getmyancestors +from .getmyanc import app -getmyancestors.main() +if __name__ == "__main__": + app() diff --git a/getmyancestors/classes/constants.py b/getmyancestors/classes/constants.py index 9b80a64..3f428f1 100644 --- a/getmyancestors/classes/constants.py +++ b/getmyancestors/classes/constants.py @@ -4,11 +4,11 @@ MAX_PERSONS = 200 FACT_TAG_EVENT_TYPE = { - 'BIRT': 'Birth', - 'DEAT': 'Death', - 'BURI': 'Burial', - 'CREM': 'Cremation', - 'NATU': 'Naturalization', + "BIRT": "Birth", + "DEAT": "Death", + "BURI": "Burial", + "CREM": "Cremation", + "NATU": "Naturalization", } FACT_TAGS = { @@ -32,6 +32,7 @@ FACT_TAGS = { "http://gedcomx.org/Cremation": "CREM", "http://gedcomx.org/Caste": "CAST", "http://gedcomx.org/Nationality": "NATI", + "http://gedcomx.org/Immigration": "IMMI", } FACT_EVEN = { @@ -52,8 +53,9 @@ ORDINANCES_STATUS = { "NotNeeded": "INFANT", } + # mergemyancestors constants and functions -def reversed_dict(d): +def reversed_dict(d: dict) -> dict: return {val: key for key, val in d.items()} diff --git a/getmyancestors/classes/gedcom.py b/getmyancestors/classes/gedcom.py index 3dbb10d..7b7b17e 100644 --- a/getmyancestors/classes/gedcom.py +++ b/getmyancestors/classes/gedcom.py @@ -1,15 +1,27 @@ -# mergemyancestors classes +import os +import sys +from typing import Optional + +from getmyancestors.classes.constants import FACT_TYPES, ORDINANCES from getmyancestors.classes.tree import ( - Indi, Fact, Fam, + Indi, Memorie, Name, Note, Ordinance, Source, ) -from getmyancestors.classes.constants import FACT_TYPES, ORDINANCES + + +def _warn(msg: str): + """Write a warning message to stderr with optional color (if TTY).""" + use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "") + if use_color: + sys.stderr.write(f"\033[33m{msg}\033[0m\n") + else: + sys.stderr.write(f"{msg}\n") class Gedcom: @@ -21,8 +33,8 @@ class Gedcom: self.tree = tree self.level = 0 self.pointer = None - self.tag = None - self.data = None + self.tag: Optional[str] = None + self.data: Optional[str] = None self.flag = False self.indi = dict() self.fam = dict() @@ -34,22 +46,22 @@ class Gedcom: def __parse(self): """Parse the GEDCOM file into self.tree""" while self.__get_line(): - if self.tag == "INDI": - self.num = int(self.pointer[2 : len(self.pointer) - 1]) + if self.tag == "INDI" and self.pointer: + self.num = self.pointer[2 : len(self.pointer) - 1] self.indi[self.num] = Indi(tree=self.tree, num=self.num) self.__get_indi() - elif self.tag == "FAM": - self.num = int(self.pointer[2 : len(self.pointer) - 1]) + elif self.tag == "FAM" and self.pointer: + self.num = self.pointer[2 : len(self.pointer) - 1] if self.num not in self.fam: self.fam[self.num] = Fam(tree=self.tree, num=self.num) self.__get_fam() - elif self.tag == "NOTE": - self.num = int(self.pointer[2 : len(self.pointer) - 1]) + elif self.tag == "NOTE" and self.pointer: + self.num = self.pointer[2 : len(self.pointer) - 1] if self.num not in self.note: self.note[self.num] = Note(tree=self.tree, num=self.num) self.__get_note() elif self.tag == "SOUR" and self.pointer: - self.num = int(self.pointer[2 : len(self.pointer) - 1]) + self.num = self.pointer[2 : len(self.pointer) - 1] if self.num not in self.sour: self.sour[self.num] = Source(num=self.num) self.__get_source() @@ -107,16 +119,19 @@ class Gedcom: elif self.tag == "SLGC": self.indi[self.num].sealing_child = self.__get_ordinance() elif self.tag == "FAMS": - self.indi[self.num].fams_num.add(int(self.data[2 : len(self.data) - 1])) + if self.data: + self.indi[self.num].fams_num.add(self.data[2 : len(self.data) - 1]) elif self.tag == "FAMC": - self.indi[self.num].famc_num.add(int(self.data[2 : len(self.data) - 1])) + if self.data: + self.indi[self.num].famc_num.add(self.data[2 : len(self.data) - 1]) elif self.tag == "_FSFTID": self.indi[self.num].fid = self.data elif self.tag == "NOTE": - num = int(self.data[2 : len(self.data) - 1]) - if num not in self.note: - self.note[num] = Note(tree=self.tree, num=num) - self.indi[self.num].notes.add(self.note[num]) + if self.data: + num = self.data[2 : len(self.data) - 1] + if num not in self.note: + self.note[num] = Note(tree=self.tree, num=num) + self.indi[self.num].notes.add(self.note[num]) elif self.tag == "SOUR": self.indi[self.num].sources.add(self.__get_link_source()) elif self.tag == "OBJE": @@ -127,11 +142,14 @@ class Gedcom: """Parse a family""" while self.__get_line() and self.level > 0: if self.tag == "HUSB": - self.fam[self.num].husb_num = int(self.data[2 : len(self.data) - 1]) + if self.data: + self.fam[self.num].husb_num = self.data[2 : len(self.data) - 1] elif self.tag == "WIFE": - self.fam[self.num].wife_num = int(self.data[2 : len(self.data) - 1]) + if self.data: + self.fam[self.num].wife_num = self.data[2 : len(self.data) - 1] elif self.tag == "CHIL": - self.fam[self.num].chil_num.add(int(self.data[2 : len(self.data) - 1])) + if self.data: + self.fam[self.num].chil_num.add(self.data[2 : len(self.data) - 1]) elif self.tag in FACT_TYPES: self.fam[self.num].facts.add(self.__get_fact()) elif self.tag == "SLGS": @@ -139,10 +157,11 @@ class Gedcom: elif self.tag == "_FSFTID": self.fam[self.num].fid = self.data elif self.tag == "NOTE": - num = int(self.data[2 : len(self.data) - 1]) - if num not in self.note: - self.note[num] = Note(tree=self.tree, num=num) - self.fam[self.num].notes.add(self.note[num]) + if self.data: + num = self.data[2 : len(self.data) - 1] + if num not in self.note: + self.note[num] = Note(tree=self.tree, num=num) + self.fam[self.num].notes.add(self.note[num]) elif self.tag == "SOUR": self.fam[self.num].sources.add(self.__get_link_source()) self.flag = True @@ -171,13 +190,14 @@ class Gedcom: added = True elif self.tag == "NICK": nick = Name() - nick.given = self.data + nick.given = self.data or "" self.indi[self.num].nicknames.add(nick) elif self.tag == "NOTE": - num = int(self.data[2 : len(self.data) - 1]) - if num not in self.note: - self.note[num] = Note(tree=self.tree, num=num) - name.note = self.note[num] + if self.data: + num = self.data[2 : len(self.data) - 1] + if num not in self.note: + self.note[num] = Note(tree=self.tree, num=num) + name.note = self.note[num] if not added: self.indi[self.num].birthnames.add(name) self.flag = True @@ -194,21 +214,22 @@ class Gedcom: if self.tag == "DATE": fact.date = self.__get_text() elif self.tag == "PLAC": - fact.place = self.__get_text() + fact.place = self.tree.ensure_place(self.__get_text()) elif self.tag == "MAP": fact.map = self.__get_map() elif self.tag == "NOTE": - if self.data[:12] == "Description:": + if self.data and self.data[:12] == "Description:": fact.value = self.data[13:] continue - num = int(self.data[2 : len(self.data) - 1]) - if num not in self.note: - self.note[num] = Note(tree=self.tree, num=num) - fact.note = self.note[num] + if self.data: + num = self.data[2 : len(self.data) - 1] + if num not in self.note: + self.note[num] = Note(tree=self.tree, num=num) + fact.note = self.note[num] elif self.tag == "CONT": - fact.value += "\n" + self.data + fact.value = (fact.value or "") + "\n" + (self.data or "") elif self.tag == "CONC": - fact.value += self.data + fact.value = (fact.value or "") + (self.data or "") self.flag = True return fact @@ -226,12 +247,12 @@ class Gedcom: def __get_text(self): """Parse a multiline text""" - text = self.data + text = self.data or "" while self.__get_line(): if self.tag == "CONT": - text += "\n" + self.data + text += "\n" + (self.data if self.data else "") elif self.tag == "CONC": - text += self.data + text += self.data if self.data else "" else: break self.flag = True @@ -253,15 +274,19 @@ class Gedcom: else: self.tree.sources[self.data] = self.sour[self.num] elif self.tag == "NOTE": - num = int(self.data[2 : len(self.data) - 1]) - if num not in self.note: - self.note[num] = Note(tree=self.tree, num=num) - self.sour[self.num].notes.add(self.note[num]) + if self.data: + num = self.data[2 : len(self.data) - 1] + if num not in self.note: + self.note[num] = Note(tree=self.tree, num=num) + self.sour[self.num].notes.add(self.note[num]) self.flag = True def __get_link_source(self): """Parse a link to a source""" - num = int(self.data[2 : len(self.data) - 1]) + num = "0" + if self.data: + num = self.data[2 : len(self.data) - 1] + if num not in self.sour: self.sour[num] = Source(num=num) page = None @@ -298,28 +323,54 @@ class Gedcom: elif self.tag == "STAT": ordinance.status = ORDINANCES[self.data] elif self.tag == "FAMC": - num = int(self.data[2 : len(self.data) - 1]) - if num not in self.fam: - self.fam[num] = Fam(tree=self.tree, num=num) - ordinance.famc = self.fam[num] + if self.data: + num = self.data[2 : len(self.data) - 1] + if num not in self.fam: + self.fam[num] = Fam(tree=self.tree, num=num) + ordinance.famc = self.fam[num] self.flag = True return ordinance def __add_id(self): """Reset GEDCOM identifiers""" - for num in self.fam: - if self.fam[num].husb_num: - self.fam[num].husb_fid = self.indi[self.fam[num].husb_num].fid - if self.fam[num].wife_num: - self.fam[num].wife_fid = self.indi[self.fam[num].wife_num].fid - for chil in self.fam[num].chil_num: - self.fam[num].chil_fid.add(self.indi[chil].fid) - for num in self.indi: - for famc in self.indi[num].famc_num: - self.indi[num].famc_fid.add( - (self.fam[famc].husb_fid, self.fam[famc].wife_fid) + # Set fallback fid from GEDCOM pointer if _FSFTID was not present + for num, indi in self.indi.items(): + if indi.fid is None: + name_str = str(indi.name) if indi.name else "Unknown" + _warn( + f"Warning: Individual @I{num}@ ({name_str}) missing _FSFTID tag, " + f"using GEDCOM pointer as fallback." ) - for fams in self.indi[num].fams_num: - self.indi[num].fams_fid.add( - (self.fam[fams].husb_fid, self.fam[fams].wife_fid) + indi.fid = num # Use GEDCOM pointer ID as fallback + + for num, fam in self.fam.items(): + if fam.fid is None: + husb_name = "Unknown" + if fam.husb_num and fam.husb_num in self.indi: + h = self.indi[fam.husb_num] + husb_name = str(h.name) if h.name else "Unknown" + + wife_name = "Unknown" + if fam.wife_num and fam.wife_num in self.indi: + w = self.indi[fam.wife_num] + wife_name = str(w.name) if w.name else "Unknown" + + _warn( + f"Warning: Family @F{num}@ ({husb_name} & {wife_name}) missing _FSFTID tag, " + f"using GEDCOM pointer as fallback." ) + fam.fid = num # Use GEDCOM pointer ID as fallback + + for _num, fam in self.fam.items(): + if fam.husb_num: + fam.husb_fid = self.indi[fam.husb_num].fid + if fam.wife_num: + fam.wife_fid = self.indi[fam.wife_num].fid + for chil in fam.chil_num: + fam.chil_fid.add(self.indi[chil].fid) + for _num, indi in self.indi.items(): + for famc in indi.famc_num: + # Store fam.fid instead of (husb, wife) tuple for consistent keying + indi.famc_fid.add(self.fam[famc].fid) + for fams in indi.fams_num: + indi.fams_fid.add(self.fam[fams].fid) diff --git a/getmyancestors/classes/gui.py b/getmyancestors/classes/gui.py index 4b4c7d9..bc27bd1 100644 --- a/getmyancestors/classes/gui.py +++ b/getmyancestors/classes/gui.py @@ -1,26 +1,20 @@ # fstogedcom classes and functions +import asyncio import os import re -import time -import asyncio import tempfile +import time from threading import Thread +from tkinter import IntVar, Menu, StringVar, TclError, filedialog, messagebox +from tkinter.ttk import Button, Checkbutton, Entry, Frame, Label, Notebook, Treeview +from typing import Literal, cast + from diskcache import Cache -from tkinter import ( - StringVar, - IntVar, - filedialog, - messagebox, - Menu, - TclError, -) -from tkinter.ttk import Frame, Label, Entry, Button, Checkbutton, Treeview, Notebook - -from getmyancestors.classes.tree import Indi, Fam, Tree from getmyancestors.classes.gedcom import Gedcom from getmyancestors.classes.session import Session from getmyancestors.classes.translation import translations +from getmyancestors.classes.tree import Fam, Indi, Tree tmp_dir = os.path.join(tempfile.gettempdir(), "fstogedcom") cache = Cache(tmp_dir) @@ -48,8 +42,11 @@ class EntryWithMenu(Entry): state = "normal" except TclError: state = "disabled" - menu.add_command(label=_("Copy"), command=self.copy, state=state) - menu.add_command(label=_("Cut"), command=self.cut, state=state) + + # Cast to Literal for mypy + state_lit = cast(Literal["normal", "disabled"], state) + menu.add_command(label=_("Copy"), command=self.copy, state=state_lit) + menu.add_command(label=_("Cut"), command=self.cut, state=state_lit) menu.add_command(label=_("Paste"), command=self.paste) menu.post(event.x_root, event.y_root) @@ -96,9 +93,19 @@ class FilesToMerge(Treeview): _("Error"), message=_("File not found: ") + os.path.basename(filename) ) return - file = open(filename, "r", encoding="utf-8") - new_id = self.insert("", 0, text=os.path.basename(filename)) - self.files[new_id] = file + try: + # pylint: disable=consider-using-with + file = open(filename, "r", encoding="utf-8") + except OSError as e: + messagebox.showinfo(_("Error"), message=_("Error opening file: ") + str(e)) + return + + try: + new_id = self.insert("", 0, text=os.path.basename(filename)) + self.files[new_id] = file + except TclError: + file.close() + messagebox.showinfo(_("Error"), message=_("Error adding file to list")) def popup(self, event): """open menu to remove item""" @@ -176,46 +183,46 @@ class Merge(Frame): ged = Gedcom(file, tree) # add informations about individuals - for num in ged.indi: - fid = ged.indi[num].fid + for _num, indi in ged.indi.items(): + fid = indi.fid if fid not in tree.indi: indi_counter += 1 tree.indi[fid] = Indi(tree=tree, num=indi_counter) tree.indi[fid].tree = tree - tree.indi[fid].fid = ged.indi[num].fid - tree.indi[fid].fams_fid |= ged.indi[num].fams_fid - tree.indi[fid].famc_fid |= ged.indi[num].famc_fid - tree.indi[fid].name = ged.indi[num].name - tree.indi[fid].birthnames = ged.indi[num].birthnames - tree.indi[fid].nicknames = ged.indi[num].nicknames - tree.indi[fid].aka = ged.indi[num].aka - tree.indi[fid].married = ged.indi[num].married - tree.indi[fid].gender = ged.indi[num].gender - tree.indi[fid].facts = ged.indi[num].facts - tree.indi[fid].notes = ged.indi[num].notes - tree.indi[fid].sources = ged.indi[num].sources - tree.indi[fid].memories = ged.indi[num].memories - tree.indi[fid].baptism = ged.indi[num].baptism - tree.indi[fid].confirmation = ged.indi[num].confirmation - tree.indi[fid].endowment = ged.indi[num].endowment + tree.indi[fid].fid = indi.fid + tree.indi[fid].fams_fid |= indi.fams_fid + tree.indi[fid].famc_fid |= indi.famc_fid + tree.indi[fid].name = indi.name + tree.indi[fid].birthnames |= indi.birthnames + tree.indi[fid].nicknames |= indi.nicknames + tree.indi[fid].aka |= indi.aka + tree.indi[fid].married |= indi.married + tree.indi[fid].gender = indi.gender + tree.indi[fid].facts |= indi.facts + tree.indi[fid].notes |= indi.notes + tree.indi[fid].sources |= indi.sources + tree.indi[fid].memories |= indi.memories + tree.indi[fid].baptism = indi.baptism + tree.indi[fid].confirmation = indi.confirmation + tree.indi[fid].endowment = indi.endowment if not ( tree.indi[fid].sealing_child and tree.indi[fid].sealing_child.famc ): - tree.indi[fid].sealing_child = ged.indi[num].sealing_child + tree.indi[fid].sealing_child = indi.sealing_child # add informations about families - for num in ged.fam: - husb, wife = (ged.fam[num].husb_fid, ged.fam[num].wife_fid) + for _num, fam in ged.fam.items(): + husb, wife = (fam.husb_fid, fam.wife_fid) if (husb, wife) not in tree.fam: fam_counter += 1 tree.fam[(husb, wife)] = Fam(husb, wife, tree, fam_counter) tree.fam[(husb, wife)].tree = tree - tree.fam[(husb, wife)].chil_fid |= ged.fam[num].chil_fid - tree.fam[(husb, wife)].fid = ged.fam[num].fid - tree.fam[(husb, wife)].facts = ged.fam[num].facts - tree.fam[(husb, wife)].notes = ged.fam[num].notes - tree.fam[(husb, wife)].sources = ged.fam[num].sources - tree.fam[(husb, wife)].sealing_spouse = ged.fam[num].sealing_spouse + tree.fam[(husb, wife)].chil_fid |= fam.chil_fid + tree.fam[(husb, wife)].fid = fam.fid + tree.fam[(husb, wife)].facts |= fam.facts + tree.fam[(husb, wife)].notes |= fam.notes + tree.fam[(husb, wife)].sources |= fam.sources + tree.fam[(husb, wife)].sealing_spouse = fam.sealing_spouse # merge notes by text tree.notes = sorted(tree.notes, key=lambda x: x.text) @@ -258,7 +265,13 @@ class SignIn(Frame): self.save_password = IntVar() self.save_password.set(cache.get("save_password") or 0) - check_save_password = Checkbutton(self, text=_("Save Password"), variable=self.save_password, onvalue=1, offvalue=0) + check_save_password = Checkbutton( + self, + text=_("Save Password"), + variable=self.save_password, + onvalue=1, + offvalue=0, + ) label_username.grid(row=0, column=0, pady=15, padx=(0, 5)) entry_username.grid(row=0, column=1) @@ -468,7 +481,7 @@ class Download(Frame): def login(self): """log in FamilySearch""" - global _ + global _ # pylint: disable=global-statement username = self.sign_in.username.get() password = self.sign_in.password.get() if not (username and password): @@ -478,6 +491,7 @@ class Download(Frame): return self.btn_valid.config(state="disabled") self.info(_("Login to FamilySearch...")) + # pylint: disable=consider-using-with self.logfile = open("download.log", "w", encoding="utf-8") self.fs = Session( self.sign_in.username.get(), @@ -511,8 +525,13 @@ class Download(Frame): cache.delete("save_password") cache.add("save_password", save_pass) - url = "/service/tree/tree-data/reservations/person/%s/ordinances" % self.fs.fid - lds_account = self.fs.get_url(url, {}).get("status") == "OK" + url = ( + "https://www.familysearch.org/service/tree/tree-data/reservations/person/%s/ordinances" + % self.fs.fid + ) + # Restore no_api=True to query main site service instead of API + response = self.fs.get_url(url, {}, no_api=True) + lds_account = response and response.get("status") == "OK" self.options = Options(self.form, lds_account) self.info("") self.sign_in.destroy() diff --git a/getmyancestors/classes/session.py b/getmyancestors/classes/session.py index 30ed47a..630aea2 100644 --- a/getmyancestors/classes/session.py +++ b/getmyancestors/classes/session.py @@ -1,20 +1,78 @@ -# global imports +import contextlib +import getpass +import hashlib +import json +import logging +import os +import re +import sqlite3 import sys +import threading import time -from urllib.parse import urlparse, parse_qs +import traceback +import webbrowser +from urllib.parse import parse_qs, urlencode, urlparse import requests from requests_cache import CachedSession as CSession -from fake_useragent import UserAgent - from requests_ratelimiter import LimiterAdapter -# local imports from getmyancestors.classes.translation import translations +DEFAULT_CLIENT_ID = "a02j000000KTRjpAAH" +DEFAULT_REDIRECT_URI = "https://misbach.github.io/fs-auth/index_raw.html" + + +class SecureLogFilter(logging.Filter): + """Filter to censor sensitive data in logs""" + + SENSITIVE_RE = re.compile( + r"(Authorization: Bearer |Cookie: |XSRF-TOKEN=|SESSION=|password=|_csrf=|username=)[^ \r\n&]+" + ) + + def filter(self, record): + if isinstance(record.msg, (str, bytes)): + msg = ( + record.msg + if isinstance(record.msg, str) + else record.msg.decode("utf-8", "ignore") + ) + record.msg = self.SENSITIVE_RE.sub(r"\1***", msg) + return True + + +LICENSE_AGREEMENT = """ +================================================================================ + getmyancestors - License & Terms of Use +================================================================================ + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +By using this software to access FamilySearch, you also agree to: -# class Session(requests.Session): -class GMASession: +1. Comply with FamilySearch's Terms of Use (https://www.familysearch.org/terms) +2. Not abuse the API through excessive requests or automated scraping +3. If you experience a bug or a network loop, close the program and file a bug! +4. Only use the tool for personal, non-commercial purposes. +5. Respect the privacy of living individuals in any downloaded data +6. Accept that FamilySearch may revoke API access for violations + +DO NOT USE THE TOOL EXCESSIVELY! +DOWNLOAD YOUR FAMILY'S GEDCOM AND USE IT OFFLINE. +BE RESPECTFUL OF FAMILYSEARCH'S SERVERS AND RESPECT THEIR TERMS OF USE. + +================================================================================ +""" + + +class GMASession(requests.Session): """Create a FamilySearch session :param username and password: valid FamilySearch credentials :param verbose: True to active verbose mode @@ -22,47 +80,327 @@ class GMASession: :param timeout: time before retry a request """ - def __init__(self, username, password, verbose=False, logfile=False, timeout=60): - # super().__init__('http_cache', backend='filesystem', expire_after=86400) - # super().__init__() + DEFAULT_HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept-Language": "en-US,en;q=0.9", + } + + def __init__( + self, + username, + password, + client_id=None, + redirect_uri=None, + verbose=False, + logfile=None, + timeout=60, + requests_per_second=5, + ): + requests.Session.__init__(self) self.username = username self.password = password + self.lock = threading.Lock() + self.client_id = client_id or DEFAULT_CLIENT_ID + if redirect_uri: + self.redirect_uri = redirect_uri + else: + self.redirect_uri = DEFAULT_REDIRECT_URI + # Warn about using fallback redirect URI - check TTY before coloring + # Suppress in offline mode as we don't login + if not os.environ.get("GMA_OFFLINE_MODE"): + use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "") + msg = ( + "⚠ WARNING: Using fallback redirect URI (misbach.github.io)\n" + " This is a third-party OAuth callback. Consider registering your own.\n" + " See: https://www.familysearch.org/developers/\n" + ) + if use_color: + sys.stderr.write(f"\033[33m{msg}\033[0m") + else: + sys.stderr.write(msg) self.verbose = verbose self.logfile = logfile self.timeout = timeout - self.fid = self.lang = self.display_name = None + self.fid = None + self.lang = None + self.display_name = None self.counter = 0 - self.headers = {"User-Agent": UserAgent().firefox} - # Apply a rate-limit (5 requests per second) to all requests - adapter = LimiterAdapter(per_second=5) - self.mount('http://', adapter) - self.mount('https://', adapter) + # Persistence setup - use ~/.cache/getmyancestors/ by default + cache_dir = os.environ.get( + "GMA_CACHE_DIR", os.path.expanduser("~/.cache/getmyancestors") + ) + os.makedirs(cache_dir, exist_ok=True) + self.db_path = os.path.join(cache_dir, "session.sqlite") + # Cookie file is now stored in cache directory too + self.cookie_file = os.path.join(cache_dir, "cookies.json") + self._init_db() + self.check_license() - self.login() + # Debug logging toggle + # Debug logging toggle + if os.environ.get("GMA_DEBUG"): + logger = logging.getLogger() + logger.setLevel(logging.DEBUG) + # Add secure filter + secure_filter = SecureLogFilter() + for handler in logger.handlers: + handler.addFilter(secure_filter) + if not logger.handlers: + handler = logging.StreamHandler(sys.stderr) + handler.addFilter(secure_filter) + logger.addHandler(handler) + + # Optional: Enable full HTTP level logging if GMA_TRACE is set + if os.environ.get("GMA_TRACE"): + import http.client as http_client # pylint: disable=import-outside-toplevel + + http_client.HTTPConnection.debuglevel = 1 + self.write_log( + "🐞 TRACE MODE ENABLED - WARNING: Logs will contain sensitive data unless filtered by SecureLogFilter." + ) + + self.write_log("🐞 DEBUG MODE ENABLED - Censored logging active.") + + # Hardcode robust User-Agent to avoid bot detection + with self.lock: + self.headers.update(self.DEFAULT_HEADERS) + + # Apply a rate-limit (default 5 requests per second) to all requests + # Credit: Josemando Sobral + adapter = LimiterAdapter(per_second=requests_per_second) + self.mount("https://", adapter) + + # Defer login to subclasses to ensure initialization is complete + # self.login() + + def _init_db(self): + """Initialize SQLite database for session storage""" + with sqlite3.connect(self.db_path) as conn: + conn.execute( + "CREATE TABLE IF NOT EXISTS session (key TEXT PRIMARY KEY, value TEXT)" + ) + conn.commit() + + def check_license(self): + """Check if user has accepted the current license agreement""" + # Allow tests/CI to bypass this check explicitly + if os.environ.get("GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT"): + return + + # Hash combines license text AND username so acceptance is per-user + current_hash = hashlib.sha256( + (LICENSE_AGREEMENT + self.username).encode("utf-8") + ).hexdigest() + accepted_hash = None + + # 1. Check external license file + # We store license acceptance in a separate JSON file so it survives cache clearing + license_file = os.path.join( + os.path.dirname(self.db_path), "..", "license-agreement.json" + ) + license_file = os.path.abspath(license_file) + + if os.path.exists(license_file): + try: + with open(license_file, "r", encoding="utf-8") as f: + data = json.load(f) + if data.get("license_hash") == current_hash: + accepted_hash = data["license_hash"] + except Exception: + pass # Ignore file errors + + if accepted_hash == current_hash: + return + + # 2. Prompt user if mismatch (NO lock held) + if not sys.stdin.isatty(): + sys.stderr.write( + "ERROR: License agreement has changed or not yet been accepted.\n" + "Please run this tool interactively to accept the license.\n" + ) + sys.exit(1) + + print(LICENSE_AGREEMENT) + try: + response = ( + input("Do you agree to the terms above? (yes/no): ").strip().lower() + ) + if response != "yes": + print("License not accepted. Exiting.") + sys.exit(1) + + # 3. Write new hash to JSON file + try: + data = {"license_hash": current_hash} + with open(license_file, "w", encoding="utf-8") as f: + json.dump(data, f) + except Exception as e: + # Fallback or just print warning if we can't save + if self.verbose: + print( + f"Warning: Could not save license agreement to {license_file}: {e}" + ) + + print("License accepted.\n") + + except (EOFError, KeyboardInterrupt): + print("\nLicense acceptance cancelled. Exiting.") + sys.exit(1) @property def logged(self): - return bool(self.cookies.get("fssessionid")) + with self.lock: + return bool( + self.cookies.get("fssessionid") or self.headers.get("Authorization") + ) + + def save_cookies(self): + """save cookies and authorization header to JSON (explicitly NOT sqlite for security)""" + try: + with self.lock: + cookies_export = requests.utils.dict_from_cookiejar(self.cookies) + auth_header = self.headers.get("Authorization") + + data = { + "cookies": cookies_export, + "auth": auth_header, + } + # Save to separate JSON file + cookie_file = os.path.join( + os.path.dirname(self.db_path), "..", "cookies.json" + ) + cookie_file = os.path.abspath(cookie_file) + + with open(cookie_file, "w", encoding="utf-8") as f: + json.dump(data, f) + + if self.verbose: + self.write_log("Session saved to JSON: " + cookie_file) + except Exception as e: + self.write_log("Error saving session: " + str(e)) + + def load_cookies(self): + """load cookies and authorization header from JSON""" + cookie_file = os.path.join(os.path.dirname(self.db_path), "..", "cookies.json") + cookie_file = os.path.abspath(cookie_file) + + if os.path.exists(cookie_file): + try: + with open(cookie_file, "r", encoding="utf-8") as f: + data = json.load(f) + self._apply_session_data(data) + if self.verbose: + self.write_log("Session loaded from JSON: " + cookie_file) + return True + except Exception as e: + self.write_log("Error loading session from JSON: " + str(e)) + + # 2. Legacy Migration: checking old cookie file if it exists + if os.path.exists(self.cookie_file): + try: + with open(self.cookie_file, "r", encoding="utf-8") as f: + data = json.load(f) + self._apply_session_data(data) + # We do NOT auto-save to new JSON here to respect read-only/security. + # It will save to new JSON only on next login/save_cookies call. + if self.verbose: + self.write_log( + "Session loaded (migrated) from legacy JSON: " + + self.cookie_file + ) + return True + except Exception as e: + self.write_log("Error loading legacy cookie file: " + str(e)) + + return False + + def _apply_session_data(self, data): + """Internal helper to apply session dict to current session""" + if isinstance(data, dict) and ("cookies" in data or "auth" in data): + cookies_dict = data.get("cookies", {}) + auth_header = data.get("auth") + else: + cookies_dict = data + auth_header = None + + with self.lock: + self.cookies.update(requests.utils.cookiejar_from_dict(cookies_dict)) + if auth_header: + self.headers.update({"Authorization": auth_header}) + + # ANSI color codes for terminal output + COLOR_RESET = "\033[0m" + COLOR_RED = "\033[91m" + COLOR_YELLOW = "\033[93m" + + def write_log(self, text, level="info"): + """write text in the log file with optional color""" + timestamp = time.strftime("%Y-%m-%d %H:%M:%S") + log = f"[{timestamp}]: {text}\n" - def write_log(self, text): - """write text in the log file""" - log = "[%s]: %s\n" % (time.strftime("%Y-%m-%d %H:%M:%S"), text) if self.verbose: - sys.stderr.write(log) + # Apply color if TTY or FORCE_COLOR is set (for piped output like tee) + use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "") + if level == "error" and use_color: + sys.stderr.write(f"{self.COLOR_RED}{log}{self.COLOR_RESET}") + elif level == "warning" and use_color: + sys.stderr.write(f"{self.COLOR_YELLOW}{log}{self.COLOR_RESET}") + else: + sys.stderr.write(log) + if self.logfile: - self.logfile.write(log) + self.logfile.write(log) # No color in log files + # pylint: disable=inconsistent-return-statements def login(self): """retrieve FamilySearch session ID (https://familysearch.org/developers/docs/guides/oauth2) """ - while True: + if self.load_cookies(): + if self.verbose: + self.write_log("Attempting to reuse cached session...") + # Use auto_login=False to prevent recursion if session is invalid + # Force network verification to prevent infinite loops with stale cache + context = ( + self.cache_disabled() + if hasattr(self, "cache_disabled") + else contextlib.nullcontext() + ) + with context: + self.set_current(auto_login=False) + if self.logged and self.fid: + if self.verbose: + self.write_log("Successfully reused cached session.") + return True + if self.verbose: + self.write_log("Cached session invalid or expired.") + + # Define context manager for disabling cache + if hasattr(self, "cache_disabled"): + cache_context = self.cache_disabled() + else: + cache_context = contextlib.nullcontext() + + with cache_context: try: + if not self.username or not self.password: + return self.manual_login() + + # Clear cookies to ensure fresh start for new login + with self.lock: + self.cookies.clear() + url = "https://www.familysearch.org/auth/familysearch/login" self.write_log("Downloading: " + url) - self.get(url, headers=self.headers) - xsrf = self.cookies["XSRF-TOKEN"] + + # Use the temp session for requests + self.get(url, headers=self.headers, timeout=self.timeout) + xsrf = self.cookies.get("XSRF-TOKEN") + if not xsrf: + self.write_log("No XSRF token found. Switching to manual login.") + return self.manual_login() + url = "https://ident.familysearch.org/login" self.write_log("Downloading: " + url) res = self.post( @@ -73,164 +411,471 @@ class GMASession: "password": self.password, }, headers=self.headers, + timeout=self.timeout, ) + try: data = res.json() except ValueError: - self.write_log("Invalid auth request") - self.write_log(res.headers) - self.write_log(res.text) - - raise "Invalid auth request" - # continue - if "loginError" in data: - self.write_log(data["loginError"]) - return + self.write_log(f"Headless Login Failed. Status: {res.status_code}") + self.write_log(f"Response Preview: {res.text[:200]}") + self.write_log("Switching to manual login.") + return self.manual_login() + if "redirectUrl" not in data: - self.write_log(res.text) - continue + self.write_log("Redirect URL not found in response.") + return self.manual_login() url = data["redirectUrl"] self.write_log("Downloading: " + url) - res = self.get(url, headers=self.headers) - res.raise_for_status() + self.get(url, headers=self.headers, timeout=self.timeout) - url = f"https://ident.familysearch.org/cis-web/oauth2/v3/authorization?response_type=code&scope=openid profile email qualifies_for_affiliate_account country&client_id=a02j000000KTRjpAAH&redirect_uri=https://misbach.github.io/fs-auth/index_raw.html&username={self.username}" + params = urlencode( + { + "response_type": "code", + "scope": "openid profile email qualifies_for_affiliate_account country", + "client_id": self.client_id, + "redirect_uri": self.redirect_uri, + "username": self.username, + } + ) + url = f"https://ident.familysearch.org/cis-web/oauth2/v3/authorization?{params}" self.write_log("Downloading: " + url) - response = self.get(url, allow_redirects=False, headers=self.headers) - location = response.headers["location"] - code = parse_qs(urlparse(location).query).get("code") + + # Allow redirects so we follow the chain to the callback URI + response = self.get( + url, + allow_redirects=True, + headers=self.headers, + timeout=self.timeout, + ) + + # Check if we landed on the redirect URI (or have the code in the URL) + final_url = response.url + code = None + + if "code=" in final_url: + code = parse_qs(urlparse(final_url).query).get("code") + + # If not in final URL, check history (in case of a meta refresh or stop) + if not code and response.history: + for resp in response.history: + if "code=" in resp.headers.get("Location", ""): + code = parse_qs( + urlparse(resp.headers["Location"]).query + ).get("code") + if code: + break + + if not code: + self.write_log(f"Code not found in URL: {final_url}") + return self.manual_login(response.url) + + if isinstance(code, list): + code_str = code[0] + else: + code_str = code + + # Use raw requests to avoid cache interference just in case url = "https://ident.familysearch.org/cis-web/oauth2/v3/token" self.write_log("Downloading: " + url) - res = self.post( + res = requests.post( url, data={ "grant_type": "authorization_code", - "client_id": "a02j000000KTRjpAAH", - "code": code, - "redirect_uri": "https://misbach.github.io/fs-auth/index_raw.html", + "client_id": self.client_id, + "code": code_str, + "redirect_uri": self.redirect_uri, }, headers=self.headers, + timeout=self.timeout, ) - try: - data = res.json() - except ValueError: - self.write_log("Invalid auth request") - continue + data = res.json() + if "access_token" in data: + with self.lock: + self.headers.update( + {"Authorization": f"Bearer {data['access_token']}"} + ) + self.set_current(auto_login=False) + if self.logged: + self.save_cookies() + return True + except Exception as e: + self.write_log("Headless login error: " + str(e)) + self.write_log(traceback.format_exc()) + return self.manual_login() + + # pylint: disable=inconsistent-return-statements + def manual_login(self, auth_url=None): + """Perform manual login""" + if not auth_url: + auth_url = f"https://ident.familysearch.org/cis-web/oauth2/v3/authorization?response_type=code&scope=openid profile email qualifies_for_affiliate_account country&client_id={self.client_id}&redirect_uri={self.redirect_uri}&username={self.username}" - if "access_token" not in data: - self.write_log(res.text) + print("\n" + "=" * 60) + print("Headless login failed. Manual login required.") + print("=" * 60) + print(f"Opening browser to login: {auth_url}") + + # Only open browser if we really are in a terminal context, but user asked to stop? + # We will open it because otherwise they can't login. + try: + webbrowser.open(auth_url) + except Exception: # Catch specific exception + pass + + print("\n" + "-" * 30) + print("MANUAL FALLBACK:") + print("1. Log in to FamilySearch in the opened window.") + print("2. Once logged in, you will be redirected.") + print( + "3. Copy the 'code' from the URL or simply copy the FULL destination URL." + ) + print( + " (If it says 'code already used', assume you need to re-login or check for Access Token)" + ) + print("-" * 30) + + while True: + try: + user_input = getpass.getpass( + "Paste the code, token, or full redirect URL here: " + ).strip() + if not user_input: + sys.exit(2) + + code = None + session_id = None + + # Check for Access Token first + if "access_token=" in user_input: + try: + parsed = urlparse(user_input) + if parsed.fragment: + qs = parse_qs(parsed.fragment) + if "access_token" in qs: + session_id = qs["access_token"][0] + if not session_id and parsed.query: + qs = parse_qs(parsed.query) + if "access_token" in qs: + session_id = qs["access_token"][0] + except Exception: # Catch specific exception + pass + + if ( + not session_id + and len(user_input) > 50 + and "=" not in user_input + and "http" not in user_input + ): + session_id = user_input + + if session_id: + with self.lock: + self.headers.update({"Authorization": f"Bearer {session_id}"}) + self.cookies.set( + "fssessionid", session_id, domain=".familysearch.org" + ) + self.set_current(auto_login=False) + if self.logged and self.fid: + self.save_cookies() + print("\nSuccess! Session established via Token.") + return True + + print("\nToken appeared invalid. Try again.") continue - access_token = data["access_token"] - self.headers.update({"Authorization": f"Bearer {access_token}"}) - except requests.exceptions.ReadTimeout: - self.write_log("Read timed out") - continue - except requests.exceptions.ConnectionError: - self.write_log("Connection aborted") - time.sleep(self.timeout) - continue - except requests.exceptions.HTTPError: - self.write_log("HTTPError") - time.sleep(self.timeout) - continue - except KeyError: - self.write_log("KeyError") - time.sleep(self.timeout) - continue - except ValueError: - self.write_log("ValueError") - time.sleep(self.timeout) - continue - if self.logged: - self.set_current() - break + # Check for Code + if "code=" in user_input: + try: + parsed = urlparse(user_input) + qs = parse_qs(parsed.query) + if "code" in qs: + code = qs["code"][0] + except Exception: # Catch specific exception + pass + elif len(user_input) < 50: + code = user_input + + if code: + url = "https://ident.familysearch.org/cis-web/oauth2/v3/token" + try: + # Raw request to avoid cache + res = requests.post( + url, + data={ + "grant_type": "authorization_code", + "client_id": self.client_id, + "code": code, + "redirect_uri": self.redirect_uri, + }, + headers=self.headers, + timeout=self.timeout, + ) + + data = res.json() + if "access_token" in data: + session_id = data["access_token"] + with self.lock: + self.headers.update( + {"Authorization": f"Bearer {session_id}"} + ) + self.cookies.set( + "fssessionid", + session_id, + domain=".familysearch.org", + ) + self.set_current(auto_login=False) + if self.logged and self.fid: + self.save_cookies() + print("\nSuccess! Session established via Code.") + return True + + error_desc = data.get( + "error_description", data.get("error", "Unknown error") + ) + print(f"\nToken exchange failed: {error_desc}") + + except Exception as e: + print(f"\nError during token exchange: {e}") + + print("Invalid input or failed login. Please try again.") - def get_url(self, url, headers=None): + except (EOFError, KeyboardInterrupt): + print("\nLogin cancelled.") + sys.exit(2) + + def get_url(self, url, headers=None, auto_login=True, no_api=False): """retrieve JSON structure from a FamilySearch URL""" self.counter += 1 if headers is None: headers = {"Accept": "application/x-gedcomx-v1+json"} - headers.update(self.headers) + # headers.update(self.headers) - redundant, requests merges session headers automatically while True: try: self.write_log("Downloading: " + url) + # Used HEAD logic here (explicit API URL) + full_url = url if no_api else "https://api.familysearch.org" + url r = self.get( - "https://api.familysearch.org" + url, + full_url, timeout=self.timeout, headers=headers, ) except requests.exceptions.ReadTimeout: - self.write_log("Read timed out") + self.write_log("Read timed out", level="warning") continue except requests.exceptions.ConnectionError: - self.write_log("Connection aborted") + self.write_log("Connection aborted", level="warning") time.sleep(self.timeout) continue - self.write_log("Status code: %s" % r.status_code) + except sqlite3.InterfaceError as e: + # Cache corruption from threading - log and retry without cache + self.write_log( + "Cache error (sqlite3.InterfaceError): %s - Retrying without cache" + % e, + level="warning", + ) + with self.cache_disabled(): # type: ignore[attr-defined] + try: + r = self.get( + full_url, + timeout=self.timeout, + headers=headers, + ) + except requests.exceptions.RequestException as retry_err: + self.write_log( + "Retry blocked by network error: %s" % retry_err, + level="warning", + ) + # Let the outer loop retry or fail gracefully + continue + # Color status codes based on severity + if r.status_code >= 500: + self.write_log("Status code: %s" % r.status_code, level="error") + elif r.status_code >= 400: + self.write_log("Status code: %s" % r.status_code, level="warning") + else: + self.write_log("Status code: %s" % r.status_code) + if self.verbose and hasattr(r, "from_cache") and r.from_cache: + self.write_log("CACHE HIT: " + url) if r.status_code == 204: return None - if r.status_code in {404, 405, 410, 500}: - self.write_log("WARNING: " + url) + if r.status_code in {404, 405, 410, 500, 503, 504}: + self.write_log("WARNING: " + url, level="warning") return None if r.status_code == 401: - self.login() - continue + if auto_login: + self.login() + continue + + return None try: r.raise_for_status() except requests.exceptions.HTTPError: - self.write_log("HTTPError") + self.write_log("HTTPError", level="error") + # Log full request/response details for all HTTP errors + self.write_log( + " Request: GET https://api.familysearch.org%s" % url, + level="warning", + ) + self.write_log( + ( + " Response: %s" % r.text[:500] + if len(r.text) > 500 + else " Response: %s" % r.text + ), + level="warning", + ) if r.status_code == 403: - if ( - "message" in r.json()["errors"][0] - and r.json()["errors"][0]["message"] - == "Unable to get ordinances." - ): + try: + error_data = r.json() + if ( + "errors" in error_data + and error_data["errors"] + and error_data["errors"][0].get("message") + == "Unable to get ordinances." + ): + self.write_log( + "Unable to get ordinances. " + "Try with an LDS account or without option -c.", + level="error", + ) + return "error" + error_msg = error_data["errors"][0].get("message", "") self.write_log( - "Unable to get ordinances. " - "Try with an LDS account or without option -c." + "WARNING: code 403 from %s %s" % (url, error_msg), + level="warning", + ) + except (ValueError, KeyError, IndexError): + self.write_log( + "WARNING: code 403 from %s (no error details)" % url, + level="warning", ) - return "error" - self.write_log( - "WARNING: code 403 from %s %s" - % (url, r.json()["errors"][0]["message"] or "") - ) return None time.sleep(self.timeout) continue try: return r.json() except Exception as e: - self.write_log("WARNING: corrupted file from %s, error: %s" % (url, e)) + self.write_log( + "WARNING: corrupted file from %s, error: %s" % (url, e), + level="warning", + ) + return None - def set_current(self): + def set_current(self, auto_login=True): """retrieve FamilySearch current user ID, name and language""" url = "/platform/users/current" - data = self.get_url(url) + data = self.get_url(url, auto_login=auto_login) if data: self.fid = data["users"][0]["personId"] self.lang = data["users"][0]["preferredLanguage"] self.display_name = data["users"][0]["displayName"] def _(self, string): - """translate a string into user's language - TODO replace translation file for gettext format - """ - if string in translations and self.lang in translations[string]: + """translate a string into user's language""" + if self.lang and string in translations and self.lang in translations[string]: return translations[string][self.lang] return string class CachedSession(GMASession, CSession): + # pylint: disable=abstract-method + def __init__( + self, + username, + password, + client_id=None, + redirect_uri=None, + verbose=False, + logfile=False, + timeout=60, + cache_control=True, + requests_per_second=5, + ): + # Cache setup - use ~/.cache/getmyancestors/ by default + cache_dir = os.environ.get( + "GMA_CACHE_DIR", os.path.expanduser("~/.cache/getmyancestors") + ) + os.makedirs(cache_dir, exist_ok=True) + cache_path = os.path.join(cache_dir, "requests") - def __init__(self, username, password, verbose=False, logfile=False, timeout=60): - CSession.__init__(self, 'http_cache', backend='filesystem', expire_after=86400) - GMASession.__init__(self, username, password, verbose=verbose, logfile=logfile, timeout=timeout) -class Session(GMASession, requests.Session): + GMASession.__init__( + self, + username, + password, + client_id, + redirect_uri, + verbose=verbose, + logfile=logfile, + timeout=timeout, + requests_per_second=requests_per_second, + ) - def __init__(self, username, password, verbose=False, logfile=False, timeout=60): - requests.Session.__init__(self) - GMASession.__init__(self, username, password, verbose=verbose, logfile=logfile, timeout=timeout) + # Offline mode adjustments + offline_mode = bool(os.environ.get("GMA_OFFLINE_MODE")) + expire_after = -1 if offline_mode else 86400 + + # Use Filesystem backend as per requirement + CSession.__init__( + self, + cache_path, + backend="filesystem", + expire_after=expire_after, + allowable_codes=(200, 204), + cache_control=cache_control, # Enable HTTP conditional requests (ETag/Last-Modified) + allow_to_fetch_missing=(not offline_mode), # prevent fetch on miss + ) + # Re-apply default headers as CSession.__init__ might have wiped them + with self.lock: + self.headers.update(self.DEFAULT_HEADERS) + # Check for offline mode via environment variable + if os.environ.get("GMA_OFFLINE_MODE"): + self.write_log( + "🔧 OFFLINE MODE ENABLED - skipping login and using cached data only." + ) + # In offline mode, skip login - all requests must come from cache + # Satisfaction for self.logged property + with self.lock: + self.headers.update({"Authorization": "Bearer OFFLINE"}) + self.fid = "OFFLINE" + self.lang = "en" + self.display_name = "Offline Mode" + else: + self.login() + + def request(self, *args, **kwargs): + """Override request to block network in offline mode""" + if os.environ.get("GMA_OFFLINE_MODE"): + # Set only_if_cached to True for requests-cache + kwargs["only_if_cached"] = True + return super().request(*args, **kwargs) + + +class Session(GMASession): + def __init__( + self, + username, + password, + client_id=None, + redirect_uri=None, + verbose=False, + logfile=False, + timeout=60, + # pylint: disable=unused-argument + cache_control=True, # Ignored for non-cached sessions + requests_per_second=5, + ): + GMASession.__init__( + self, + username, + password, + client_id, + redirect_uri, + verbose=verbose, + logfile=logfile, + timeout=timeout, + requests_per_second=requests_per_second, + ) + self.login() diff --git a/getmyancestors/classes/translation.py b/getmyancestors/classes/translation.py index 06532ba..d125eea 100644 --- a/getmyancestors/classes/translation.py +++ b/getmyancestors/classes/translation.py @@ -118,16 +118,16 @@ translations = { "Cut": {"fr": "Couper"}, "Paste": {"fr": "Coller"}, "Username:": { - "fr": "Nom d'utilisateur :", - "de": "Benutzername:", + "fr": "Nom d'utilisateur :", + "de": "Benutzername:", }, "Password:": { - "fr": "Mot de passe :", - "de": "Passwort:", + "fr": "Mot de passe :", + "de": "Passwort:", }, "Save Password": { - "fr": "Enregistrer le mot de passe", - "de": "Passwort speichern", + "fr": "Enregistrer le mot de passe", + "de": "Passwort speichern", }, "ID already exist": {"fr": "Cet identifiant existe déjà"}, "Invalid FamilySearch ID: ": {"fr": "Identifiant FamilySearch invalide : "}, diff --git a/getmyancestors/classes/tree.py b/getmyancestors/classes/tree.py deleted file mode 100644 index 9091150..0000000 --- a/getmyancestors/classes/tree.py +++ /dev/null @@ -1,1523 +0,0 @@ -import sys -import re -import time -import asyncio -import os -from urllib.parse import unquote, unquote_plus -from datetime import datetime -from typing import Set, Dict, List, Tuple, Union, Optional, BinaryIO, Any -# global imports -import babelfish -import geocoder -import requests -import xml.etree.cElementTree as ET -from xml.etree.cElementTree import Element -from requests_cache import CachedSession - -# local imports -import getmyancestors -from getmyancestors.classes.constants import ( - MAX_PERSONS, - FACT_EVEN, - FACT_TAGS, - ORDINANCES_STATUS, -) - - -COUNTY = 'County' -COUNTRY = 'Country' -CITY = 'City' - -GEONAME_FEATURE_MAP = { - 'ADM1': COUNTY, # first-order administrative division a primary administrative division of a country, such as a state in the United States - 'ADM1H': COUNTY, # historical first-order administrative division a former first-order administrative division - 'ADM2': COUNTY, # second-order administrative division a subdivision of a first-order administrative division - 'ADM2H': COUNTY, # historical second-order administrative division a former second-order administrative division - 'ADM3': COUNTY, # third-order administrative division a subdivision of a second-order administrative division - 'ADM3H': COUNTY, # historical third-order administrative division a former third-order administrative division - 'ADM4': COUNTY, # fourth-order administrative division a subdivision of a third-order administrative division - 'ADM4H': COUNTY, # historical fourth-order administrative division a former fourth-order administrative division - 'ADM5': COUNTY, # fifth-order administrative division a subdivision of a fourth-order administrative division - 'ADM5H': COUNTY, # historical fifth-order administrative division a former fifth-order administrative division - 'ADMD': COUNTY, # administrative division an administrative division of a country, undifferentiated as to administrative level - 'ADMDH': COUNTY, # historical administrative division a former administrative division of a political entity, undifferentiated as to administrative level - # 'LTER': leased area a tract of land leased to another country, usually for military installations - 'PCL': COUNTRY, # political entity - 'PCLD': COUNTRY, # dependent political entity - 'PCLF': COUNTRY, # freely associated state - 'PCLH': COUNTRY, # historical political entity a former political entity - 'PCLI': COUNTRY, # independent political entity - 'PCLIX': COUNTRY, # section of independent political entity - 'PCLS': COUNTRY, # semi-independent political entity - - 'PPL': CITY, # populated place a city, town, village, or other agglomeration of buildings where people live and work - 'PPLA': CITY, # seat of a first-order administrative division seat of a first-order administrative division (PPLC takes precedence over PPLA) - 'PPLA2': CITY, # seat of a second-order administrative division - 'PPLA3': CITY, # seat of a third-order administrative division - 'PPLA4': CITY, # seat of a fourth-order administrative division - 'PPLA5': CITY, # seat of a fifth-order administrative division - 'PPLC': CITY, # capital of a political entity - 'PPLCH': CITY, # historical capital of a political entity a former capital of a political entity - 'PPLF': CITY, # farm village a populated place where the population is largely engaged in agricultural activities - 'PPLG': CITY, # seat of government of a political entity - 'PPLH': CITY, # historical populated place a populated place that no longer exists - 'PPLL': CITY, # populated locality an area similar to a locality but with a small group of dwellings or other buildings - 'PPLQ': CITY, # abandoned populated place - 'PPLR': CITY, # religious populated place a populated place whose population is largely engaged in religious occupations - 'PPLS': CITY, # populated places cities, towns, villages, or other agglomerations of buildings where people live and work - 'PPLW': CITY, # destroyed populated place a village, town or city destroyed by a natural disaster, or by war - 'PPLX': CITY, # section of populated place - -} - -# getmyancestors classes and functions -def cont(string): - """parse a GEDCOM line adding CONT and CONT tags if necessary""" - level = int(string[:1]) + 1 - lines = string.splitlines() - res = list() - max_len = 255 - for line in lines: - c_line = line - to_conc = list() - while len(c_line.encode("utf-8")) > max_len: - index = min(max_len, len(c_line) - 2) - while ( - len(c_line[:index].encode("utf-8")) > max_len - or re.search(r"[ \t\v]", c_line[index - 1 : index + 1]) - ) and index > 1: - index -= 1 - to_conc.append(c_line[:index]) - c_line = c_line[index:] - max_len = 248 - to_conc.append(c_line) - res.append(("\n%s CONC " % level).join(to_conc)) - max_len = 248 - return ("\n%s CONT " % level).join(res) + "\n" - -class Note: - """GEDCOM Note class - :param text: the Note content - :param tree: a Tree object - :param num: the GEDCOM identifier - """ - - counter = {} - - def __init__(self, text="", tree=None, num=None, num_prefix=None, note_type=None): - self._handle = None - self.note_type = note_type or 'Source Note' - self.num_prefix = num_prefix - if num: - self.num = num - else: - Note.counter[num_prefix or 'None'] = Note.counter.get(num_prefix or 'None', 0) + 1 - self.num = Note.counter[num_prefix or 'None'] - print(f'##### Creating Note: {num_prefix}, {self.num}', file=sys.stderr) - self.text = text.strip() - - if tree: - tree.notes.append(self) - - @property - def id(self): - return f'{self.num_prefix}_{self.num}' if self.num_prefix != None else self.num - - def print(self, file=sys.stdout): - """print Note in GEDCOM format""" - print(f'Note: {self.text}', file=sys.stderr) - file.write(cont("0 @N%s@ NOTE %s" % (self.id, self.text))) - - def link(self, file=sys.stdout, level=1): - """print the reference in GEDCOM format""" - print(f'Linking Note: {self.id}', file=sys.stderr) - file.write("%s NOTE @N%s@\n" % (level, self.id)) - - - @property - def handle(self): - if not self._handle: - self._handle = '_' + os.urandom(10).hex() - - return self._handle - - def printxml(self, parent_element: Element) -> None: - note_element = ET.SubElement( - parent_element, - 'note', - handle=self.handle, - # change='1720382308', - id=self.id, - type='Source Note' - ) - ET.SubElement(note_element, 'text').text = self.text - -class Source: - """GEDCOM Source class - :param data: FS Source data - :param tree: a Tree object - :param num: the GEDCOM identifier - """ - - counter = 0 - - def __init__(self, data=None, tree=None, num=None): - if num: - self.num = num - else: - Source.counter += 1 - self.num = Source.counter - - self._handle = None - - self.tree = tree - self.url = self.citation = self.title = self.fid = None - self.notes = set() - if data: - self.fid = data["id"] - if "about" in data: - self.url = data["about"].replace( - "familysearch.org/platform/memories/memories", - "www.familysearch.org/photos/artifacts", - ) - if "citations" in data: - self.citation = data["citations"][0]["value"] - if "titles" in data: - self.title = data["titles"][0]["value"] - if "notes" in data: - notes = [ n['text'] for n in data["notes"] if n["text"] ] - for idx, n in enumerate(notes): - self.notes.add(Note( - n, - self.tree, - num="S%s-%s" % (self.id, idx), - note_type='Source Note' - )) - self.modified = data['attribution']['modified'] - - @property - def id(self): - return 'S' + str(self.fid or self.num) - - - @property - def handle(self): - if not self._handle: - self._handle = '_' + os.urandom(10).hex() - - return self._handle - - def print(self, file=sys.stdout): - """print Source in GEDCOM format""" - file.write("0 @S%s@ SOUR \n" % self.id) - if self.title: - file.write(cont("1 TITL " + self.title)) - if self.citation: - file.write(cont("1 AUTH " + self.citation)) - if self.url: - file.write(cont("1 PUBL " + self.url)) - for n in self.notes: - n.link(file, 1) - file.write("1 REFN %s\n" % self.fid) - - def link(self, file=sys.stdout, level=1): - """print the reference in GEDCOM format""" - file.write("%s SOUR @S%s@\n" % (level, self.id)) - - def printxml(self, parent_element: Element) -> None: - - # - # Palkovics Cser József, "Hungary Civil Registration, 1895-1980" - # "Hungary Civil Registration, 1895-1980", , <i>FamilySearch</i> (https://www.familysearch.org/ark:/61903/1:1:6JBQ-NKWD : Thu Mar 07 10:23:43 UTC 2024), Entry for Palkovics Cser József and Palkovics Cser István, 27 Aug 1928. - # https://familysearch.org/ark:/61903/1:1:6JBQ-NKWD - # - # - source_element = ET.SubElement( - parent_element, - 'source', - handle=self.handle, - change=str(int(self.modified / 1000)), - id=self.id - ) - if self.title: - ET.SubElement(source_element, 'stitle').text = self.title - if self.citation: - ET.SubElement(source_element, 'sauthor').text = self.citation - if self.url: - ET.SubElement(source_element, 'spubinfo').text = self.url - if self.fid: - ET.SubElement(source_element, 'srcattribute', type='REFN', value=self.fid) - - -class Fact: - """GEDCOM Fact class - :param data: FS Fact data - :param tree: a tree object - """ - - counter = {} - - def __init__(self, data=None, tree: Optional['Tree']=None, num_prefix=None): - self.value = self.type = self.date = None - self.date_type = None - self.place: Optional[Place] = None - self.note = None - self._handle: Optional[str] = None - if data: - if "value" in data: - self.value = data["value"] - if "type" in data: - self.type = data["type"] - self.fs_type = self.type - if self.type in FACT_EVEN: - self.type = tree.fs._(FACT_EVEN[self.type]) - elif self.type[:6] == "data:,": - self.type = unquote(self.type[6:]) - elif self.type not in FACT_TAGS: - self.type = None - - - self.num_prefix = f'{num_prefix}_{FACT_TAGS[self.type]}' if num_prefix and self.type in FACT_TAGS else num_prefix - Fact.counter[self.num_prefix or 'None'] = Fact.counter.get(self.num_prefix or 'None', 0) + 1 - self.num = Fact.counter[self.num_prefix or 'None'] - if data: - if "date" in data: - if 'formal' in data['date']: - self.date = data['date']['formal'].split('+')[-1].split('/')[0] - if data['date']['formal'].startswith('A+'): - self.date_type = 'about' - if data['date']['formal'].startswith('/+'): - self.date_type = 'before' - if data['date']['formal'].endswith('/'): - self.date_type = 'after' - else: - self.date = data["date"]["original"] - if "place" in data: - place = data["place"] - place_name = place["original"] - place_id = place["description"][1:] if "description" in place and place["description"][1:] in tree.places else None - self.place = tree.ensure_place(place_name, place_id) - if "changeMessage" in data["attribution"]: - self.note = Note( - data["attribution"]["changeMessage"], - tree, - num_prefix='E' + self.num_prefix if self.num_prefix else None, - note_type='Event Note', - ) - if self.type == "http://gedcomx.org/Death" and not ( - self.date or self.place - ): - self.value = "Y" - - if tree: - tree.facts.add(self) - - - @property - def id(self): - return f'{self.num_prefix}_{self.num}' if self.num_prefix != None else self.num - - - @property - def handle(self): - if not self._handle: - self._handle = '_' + os.urandom(10).hex() - - return self._handle - - def printxml(self, parent_element): - - event_element = ET.SubElement( - parent_element, - 'event', - handle=self.handle, - # change='1720382301', - id=self.id - ) - - ET.SubElement(event_element, 'type').text = ( - unquote_plus(self.type[len('http://gedcomx.org/'):]) - if self.type.startswith('http://gedcomx.org/') - else self.type - ) - # FACT_TAGS.get(self.type, self.type) - if self.date: - params={ - 'val': self.date, - } - if self.date_type is not None: - params['type'] = self.date_type - ET.SubElement(event_element, 'datestr', **params) - if self.place: - ET.SubElement(event_element, 'place', hlink=self.place.handle) - if self.note: - ET.SubElement(event_element, 'noteref', hlink=self.note.handle) - - def print(self, file=sys.stdout): - """print Fact in GEDCOM format - the GEDCOM TAG depends on the type, defined in FACT_TAGS - """ - if self.type in FACT_TAGS: - tmp = "1 " + FACT_TAGS[self.type] - if self.value: - tmp += " " + self.value - file.write(cont(tmp)) - elif self.type: - file.write("1 EVEN\n2 TYPE %s\n" % self.type) - if self.value: - file.write(cont("2 NOTE Description: " + self.value)) - else: - return - if self.date: - file.write(cont("2 DATE " + self.date)) - if self.place: - self.place.print(file, 2) - if self.map: - latitude, longitude = self.map - file.write("3 MAP\n4 LATI %s\n4 LONG %s\n" % (latitude, longitude)) - if self.note: - self.note.link(file, 2) - - -class Memorie: - """GEDCOM Memorie class - :param data: FS Memorie data - """ - - def __init__(self, data=None): - self.description = self.url = None - if data and "links" in data: - self.url = data["about"] - if "titles" in data: - self.description = data["titles"][0]["value"] - if "descriptions" in data: - self.description = ( - "" if not self.description else self.description + "\n" - ) + data["descriptions"][0]["value"] - - def print(self, file=sys.stdout): - """print Memorie in GEDCOM format""" - file.write("1 OBJE\n2 FORM URL\n") - if self.description: - file.write(cont("2 TITL " + self.description)) - if self.url: - file.write(cont("2 FILE " + self.url)) - - -NAME_MAP = { - "preferred" : 'Preeferred Name', - "nickname" : 'Nickname', - "birthname": 'Birth Name', - "aka": 'Also Known As', - "married": 'Married Name', -} - -class Name: - """GEDCOM Name class - :param data: FS Name data - :param tree: a Tree object - """ - - def __init__(self, data=None, tree=None, owner_fis=None, kind=None, alternative: bool=False): - self.given = "" - self.surname = "" - self.prefix = None - self.suffix = None - self.note = None - self.alternative = alternative - self.owner_fis = owner_fis - self.kind = kind - if data: - if "parts" in data["nameForms"][0]: - for z in data["nameForms"][0]["parts"]: - if z["type"] == "http://gedcomx.org/Given": - self.given = z["value"] - if z["type"] == "http://gedcomx.org/Surname": - self.surname = z["value"] - if z["type"] == "http://gedcomx.org/Prefix": - self.prefix = z["value"] - if z["type"] == "http://gedcomx.org/Suffix": - self.suffix = z["value"] - if "changeMessage" in data["attribution"]: - self.note = Note( - data["attribution"]["changeMessage"], - tree, - num_prefix=f'NAME_{owner_fis}_{kind}', - note_type='Name Note', - ) - - def printxml(self, parent_element): - params = {} - if self.kind is not None: - params['type'] = NAME_MAP.get(self.kind, self.kind) - if self.alternative: - params['alt'] = '1' - person_name = ET.SubElement(parent_element, 'name', **params) - ET.SubElement(person_name, 'first').text = self.given - ET.SubElement(person_name, 'surname').text = self.surname - # TODO prefix / suffix - - - def print(self, file=sys.stdout, typ=None): - """print Name in GEDCOM format - :param typ: type for additional names - """ - tmp = "1 NAME %s /%s/" % (self.given, self.surname) - if self.suffix: - tmp += " " + self.suffix - file.write(cont(tmp)) - if typ: - file.write("2 TYPE %s\n" % typ) - if self.prefix: - file.write("2 NPFX %s\n" % self.prefix) - if self.note: - self.note.link(file, 2) - - - -class Place: - """GEDCOM Place class - :param name: the place name - :param tree: a Tree object - :param num: the GEDCOM identifier - """ - - counter = 0 - - def __init__( - self, - id: str, - name: str, - type: Optional[str]=None, - parent: Optional['Place']=None, - latitude: Optional[float]=None, - longitude: Optional[float]=None): - self._handle = None - self.name = name - self.type = type - self.id = id - self.parent = parent - self.latitude = latitude - self.longitude = longitude - - @property - def handle(self): - if not self._handle: - self._handle = '_' + os.urandom(10).hex() - - return self._handle - - - def print(self, file=sys.stdout, indentation=0): - """print Place in GEDCOM format""" - file.write("%d @P%s@ PLAC %s\n" % (indentation, self.num, self.name)) - - def printxml(self, parent_element): - - - # - # - # - # - # - # - # - place_element = ET.SubElement( - parent_element, - 'placeobj', - handle=self.handle, - # change='1720382307', - id=self.id, - type=self.type or 'Unknown' - ) - # ET.SubElement(place_element, 'ptitle').text = self.name - ET.SubElement(place_element, 'pname', value=self.name) - if self.parent: - ET.SubElement(place_element, 'placeref', hlink=self.parent.handle) - if self.latitude and self.longitude: - ET.SubElement(place_element, 'coord', long=str(self.longitude), lat=str(self.latitude)) - -class Ordinance: - """GEDCOM Ordinance class - :param data: FS Ordinance data - """ - - def __init__(self, data=None): - self.date = self.temple_code = self.status = self.famc = None - if data: - if "completedDate" in data: - self.date = data["completedDate"] - if "completedTemple" in data: - self.temple_code = data["completedTemple"]["code"] - self.status = data["status"] - - def print(self, file=sys.stdout): - """print Ordinance in Gecom format""" - if self.date: - file.write(cont("2 DATE " + self.date)) - if self.temple_code: - file.write("2 TEMP %s\n" % self.temple_code) - if self.status in ORDINANCES_STATUS: - file.write("2 STAT %s\n" % ORDINANCES_STATUS[self.status]) - if self.famc: - file.write("2 FAMC @F%s@\n" % self.famc.num) - -class Citation: - - def __init__(self, data: Dict[str, Any], source: Source): - self._handle = None - self.id = data["id"] - self.source = source - self.message = ( - data["attribution"]["changeMessage"] - if "changeMessage" in data["attribution"] - else None - ) - # TODO create citation note out of this. - self.modified = data['attribution']['modified'] - - - @property - def handle(self): - if not self._handle: - self._handle = '_' + os.urandom(10).hex() - - return self._handle - - def printxml(self, parent_element: Element): - -# -# -# 2 -# -# -# - citation_element = ET.SubElement( - parent_element, - 'citation', - handle=self.handle, - change=str(int(self.modified / 1000)), - id='C' + str(self.id) - ) - ET.SubElement(citation_element, 'confidence').text = '2' - ET.SubElement(citation_element, 'sourceref', hlink=self.source.handle) - - -class Indi: - """GEDCOM individual class - :param fid' FamilySearch id - :param tree: a tree object - :param num: the GEDCOM identifier - """ - - counter = 0 - - def __init__(self, fid: str, tree: 'Tree', num=None): - self._handle = None - if num: - self.num = num - else: - Indi.counter += 1 - self.num = Indi.counter - self.fid = fid - self.tree = tree - self.famc: Set['Fam'] = set() - self.fams: Set['Fam'] = set() - # self.famc_fid = set() - # self.fams_fid = set() - # self.famc_num = set() - # self.fams_num = set() - # self.famc_ids = set() - # self.fams_ids = set() - self.name: Optional[Name] = None - self.gender = None - self.living = None - self.parents: Set[Tuple[str, str]] = set() # (father_id, mother_id) - self.spouses: Set[Tuple[str, str, str]] = set() # (person1, person2, relfid) - self.children: Set[Tuple[str, str, str]] = set() # (father_id, mother_id, child_id) - self.baptism = self.confirmation = self.initiatory = None - self.endowment = self.sealing_child = None - self.nicknames: Set[Name] = set() - self.birthnames: Set[Name] = set() - self.married: Set[Name] = set() - self.aka: Set[Name] = set() - self.facts: Set[Fact] = set() - self.notes: Set[Note] = set() - # self.sources: Set[Source] = set() - self.citations: Set[Citation] = set() - self.memories = set() - - def add_data(self, data): - """add FS individual data""" - if data: - self.living = data["living"] - for x in data["names"]: - alt = not x.get('preferred', False) - if x["type"] == "http://gedcomx.org/Nickname": - self.nicknames.add(Name(x, self.tree, self.fid, "nickname", alt)) - elif x["type"] == "http://gedcomx.org/BirthName": - self.birthnames.add(Name(x, self.tree, self.fid, "birthname", alt)) - elif x["type"] == "http://gedcomx.org/AlsoKnownAs": - self.aka.add(Name(x, self.tree, self.fid, "aka", alt)) - elif x["type"] == "http://gedcomx.org/MarriedName": - self.married.add(Name(x, self.tree, self.fid, "married", alt)) - else: - print('Unknown name type: ' + x.get('type'), file=sys.stderr) - raise 'Unknown name type' - if "gender" in data: - if data["gender"]["type"] == "http://gedcomx.org/Male": - self.gender = "M" - elif data["gender"]["type"] == "http://gedcomx.org/Female": - self.gender = "F" - elif data["gender"]["type"] == "http://gedcomx.org/Unknown": - self.gender = "U" - if "facts" in data: - for x in data["facts"]: - if x["type"] == "http://familysearch.org/v1/LifeSketch": - self.notes.add( - Note( - "=== %s ===\n%s" - % (self.tree.fs._("Life Sketch"), x.get("value", "")), - self.tree, - num_prefix=f'INDI_{self.fid}', - note_type='Person Note', - ) - ) - else: - self.facts.add(Fact(x, self.tree, num_prefix=f'INDI_{self.fid}')) - if "sources" in data: - sources = self.tree.fs.get_url( - "/platform/tree/persons/%s/sources" % self.fid - ) - if sources: - quotes = dict() - for quote in sources["persons"][0]["sources"]: - source_id = quote["descriptionId"] - source_data = next( - (s for s in sources['sourceDescriptions'] if s['id'] == source_id), - None, - ) - source = self.tree.ensure_source(source_data) - if source: - citation = self.tree.ensure_citation(quote, source) - self.citations.add(citation) - - for evidence in data.get("evidence", []): - memory_id, *_ = evidence["id"].partition("-") - url = "/platform/memories/memories/%s" % memory_id - memorie = self.tree.fs.get_url(url) - if memorie and "sourceDescriptions" in memorie: - for x in memorie["sourceDescriptions"]: - if x["mediaType"] == "text/plain": - text = "\n".join( - val.get("value", "") - for val in x.get("titles", []) - + x.get("descriptions", []) - ) - self.notes.add( - Note( - text, - self.tree, - num_prefix=f'INDI_{self.fid}', - note_type='Person Note', - )) - else: - self.memories.add(Memorie(x)) - - def add_fams(self, fam: 'Fam'): - """add family fid (for spouse or parent)""" - self.fams.add(fam) - - def add_famc(self, fam: 'Fam'): - """add family fid (for child)""" - self.famc.add(fam) - - def get_notes(self): - """retrieve individual notes""" - print(f'Getting Notes for {self.fid}', file=sys.stderr) - notes = self.tree.fs.get_url("/platform/tree/persons/%s/notes" % self.fid) - if notes: - for n in notes["persons"][0]["notes"]: - text_note = "=== %s ===\n" % n["subject"] if "subject" in n else "" - text_note += n["text"] + "\n" if "text" in n else "" - self.notes.add( - Note( - text_note, - self.tree, - num_prefix=f'INDI_{self.fid}', - note_type='Person Note', - )) - - def get_ordinances(self): - """retrieve LDS ordinances - need a LDS account - """ - res = [] - famc = False - if self.living: - return res, famc - url = "/service/tree/tree-data/reservations/person/%s/ordinances" % self.fid - data = self.tree.fs.get_url(url, {}) - if data: - for key, o in data["data"].items(): - if key == "baptism": - self.baptism = Ordinance(o) - elif key == "confirmation": - self.confirmation = Ordinance(o) - elif key == "initiatory": - self.initiatory = Ordinance(o) - elif key == "endowment": - self.endowment = Ordinance(o) - elif key == "sealingsToParents": - for subo in o: - self.sealing_child = Ordinance(subo) - relationships = subo.get("relationships", {}) - father = relationships.get("parent1Id") - mother = relationships.get("parent2Id") - if father and mother: - famc = father, mother - elif key == "sealingsToSpouses": - res += o - return res, famc - - def get_contributors(self): - """retrieve contributors""" - temp = set() - url = "/platform/tree/persons/%s/changes" % self.fid - data = self.tree.fs.get_url(url, {"Accept": "application/x-gedcomx-atom+json"}) - if data: - for entries in data["entries"]: - for contributors in entries["contributors"]: - temp.add(contributors["name"]) - if temp: - text = "=== %s ===\n%s" % ( - self.tree.fs._("Contributors"), - "\n".join(sorted(temp)), - ) - for n in self.tree.notes: - if n.text == text: - self.notes.add(n) - return - self.notes.add(Note(text, self.tree, num_prefix=f'INDI_{self.fid}_CONTRIB', note_type='Contribution Note')) - - @property - def id(self): - return self.fid or self.num - - - @property - def handle(self): - if not self._handle: - self._handle = '_' + os.urandom(10).hex() - - return self._handle - - def printxml(self, parent_element): - - # - # M - # - # József - # Cser - # - # - # - # - # - # - # - # - person = ET.SubElement(parent_element, - 'person', - handle=self.handle, - # change='1720382301', - id='I' + str(self.id)) - if self.fid: - ET.SubElement(person, 'attribute', type='_FSFTID', value=self.fid) - - if self.name: - self.name.printxml(person) - for name in self.nicknames | self.birthnames | self.aka | self.married: - name.printxml(person) - - gender = ET.SubElement(person, 'gender') - gender.text = self.gender - - if self.fams: - for fam in self.fams: - ET.SubElement(person, 'parentin', hlink=fam.handle) - - if self.famc: - for fam in self.famc: - ET.SubElement(person, 'childof', hlink=fam.handle) - - - ET.SubElement(person, 'attribute', type="_FSFTID", value=self.fid) - - - for fact in self.facts: - ET.SubElement(person, 'eventref', hlink=fact.handle, role='Primary') - - for citation in self.citations: - ET.SubElement(person, 'citationref', hlink=citation.handle) - - for note in self.notes: - ET.SubElement(person, 'noteref', hlink=note.handle) - - # - # - - - def print(self, file=sys.stdout): - """print individual in GEDCOM format""" - file.write("0 @I%s@ INDI\n" % self.id) - if self.name: - self.name.print(file) - for o in self.nicknames: - file.write(cont("2 NICK %s %s" % (o.given, o.surname))) - for o in self.birthnames: - o.print(file) - for o in self.aka: - o.print(file, "aka") - for o in self.married: - o.print(file, "married") - if self.gender: - file.write("1 SEX %s\n" % self.gender) - for o in self.facts: - o.print(file) - for o in self.memories: - o.print(file) - if self.baptism: - file.write("1 BAPL\n") - self.baptism.print(file) - if self.confirmation: - file.write("1 CONL\n") - self.confirmation.print(file) - if self.initiatory: - file.write("1 WAC\n") - self.initiatory.print(file) - if self.endowment: - file.write("1 ENDL\n") - self.endowment.print(file) - if self.sealing_child: - file.write("1 SLGC\n") - self.sealing_child.print(file) - for fam in self.fams: - file.write("1 FAMS @F%s@\n" % fam.id) - for fam in self.famc: - file.write("1 FAMC @F%s@\n" % fam.id) - # print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr) - # for num in self.fams_ids: - # print(f'Famc Ids: {self.famc_ids}', file=sys.stderr) - # for num in self.famc_ids: - # file.write("1 FAMC @F%s@\n" % num) - file.write("1 _FSFTID %s\n" % self.fid) - for o in self.notes: - o.link(file) - for source, quote in self.sources: - source.link(file, 1) - if quote: - file.write(cont("2 PAGE " + quote)) - - -class Fam: - """GEDCOM family class - :param husb: husbant fid - :param wife: wife fid - :param tree: a Tree object - :param num: a GEDCOM identifier - """ - - counter = 0 - - def __init__(self, husband: Indi | None, wife: Indi | None, tree: 'Tree'): - self._handle = None - self.num = Fam.gen_id(husband, wife) - self.fid = None - self.husband = husband - self.wife = wife - self.tree = tree - self.children: Set[Indi] = set() - self.facts: Set[Fact] = set() - self.sealing_spouse = None - self.notes = set() - self.sources = set() - - @property - def handle(self): - if not self._handle: - self._handle = '_' + os.urandom(10).hex() - - return self._handle - - @staticmethod - def gen_id(husband: Indi | None, wife: Indi | None) -> str: - if husband and wife: - return f'FAM_{husband.id}-{wife.id}' - elif husband: - return f'FAM_{husband.id}-UNK' - elif wife: - return f'FAM_UNK-{wife.id}' - else: - Fam.counter += 1 - return f'FAM_UNK-UNK-{Fam.counter}' - - def add_child(self, child: Indi | None): - """add a child fid to the family""" - if child is not None: - self.children.add(child) - - def add_marriage(self, fid: str): - """retrieve and add marriage information - :param fid: the marriage fid - """ - if not self.fid: - self.fid = fid - url = "/platform/tree/couple-relationships/%s" % self.fid - data = self.tree.fs.get_url(url) - if data: - if "facts" in data["relationships"][0]: - for x in data["relationships"][0]["facts"]: - self.facts.add(Fact(x, self.tree, num_prefix=f'FAM_{self.fid}')) - if "sources" in data["relationships"][0]: - quotes = dict() - for x in data["relationships"][0]["sources"]: - quotes[x["descriptionId"]] = ( - x["attribution"]["changeMessage"] - if "changeMessage" in x["attribution"] - else None - ) - new_sources = quotes.keys() - self.tree.sources.keys() - if new_sources: - sources = self.tree.fs.get_url( - "/platform/tree/couple-relationships/%s/sources" % self.fid - ) - for source in sources["sourceDescriptions"]: - if ( - source["id"] in new_sources - and source["id"] not in self.tree.sources - ): - self.tree.sources[source["id"]] = Source( - source, self.tree - ) - for source_fid in quotes: - self.sources.add( - (self.tree.sources[source_fid], quotes[source_fid]) - ) - - def get_notes(self): - """retrieve marriage notes""" - if self.fid: - notes = self.tree.fs.get_url( - "/platform/tree/couple-relationships/%s/notes" % self.fid - ) - if notes: - for n in notes["relationships"][0]["notes"]: - text_note = "=== %s ===\n" % n["subject"] if "subject" in n else "" - text_note += n["text"] + "\n" if "text" in n else "" - self.notes.add(Note(text_note, self.tree, num_prefix=f'FAM_{self.fid}', note_type='Marriage Note')) - - def get_contributors(self): - """retrieve contributors""" - if self.fid: - temp = set() - url = "/platform/tree/couple-relationships/%s/changes" % self.fid - data = self.tree.fs.get_url( - url, {"Accept": "application/x-gedcomx-atom+json"} - ) - if data: - for entries in data["entries"]: - for contributors in entries["contributors"]: - temp.add(contributors["name"]) - if temp: - text = "=== %s ===\n%s" % ( - self.tree.fs._("Contributors"), - "\n".join(sorted(temp)), - ) - for n in self.tree.notes: - if n.text == text: - self.notes.add(n) - return - self.notes.add(Note(text, self.tree, num_prefix=f'FAM_{self.fid}_CONTRIB', note_type='Contribution Note')) - - @property - def id(self): - return self.num - - def printxml(self, parent_element): - # - # - # - # - # - # - # - family = ET.SubElement(parent_element, - 'family', - handle=self.handle, - # change='1720382301', - id=self.id) - ET.SubElement(family, 'rel', type='Unknown') - if self.husband: - ET.SubElement(family, 'father', hlink=self.husband.handle) - if self.wife: - ET.SubElement(family, 'mother', hlink=self.wife.handle) - for child in self.children: - ET.SubElement(family, 'childref', hlink=child.handle) - for fact in self.facts: - ET.SubElement(family, 'eventref', hlink=fact.handle, role='Primary') - - def print(self, file=sys.stdout): - """print family information in GEDCOM format""" - file.write("0 @F%s@ FAM\n" % self.id) - if self.husband: - file.write("1 HUSB @I%s@\n" % self.husband.id) - if self.wife: - file.write("1 WIFE @I%s@\n" % self.wife.id) - for child in self.children: - file.write("1 CHIL @I%s@\n" % child.id) - for o in self.facts: - o.print(file) - if self.sealing_spouse: - file.write("1 SLGS\n") - self.sealing_spouse.print(file) - if self.fid: - file.write("1 _FSFTID %s\n" % self.fid) - for o in self.notes: - o.link(file) - for source, quote in self.sources: - source.link(file, 1) - if quote: - file.write(cont("2 PAGE " + quote)) - - -class Tree: - """family tree class - :param fs: a Session object - """ - - def __init__(self, fs: Optional[requests.Session]=None, exclude: List[str]=None, geonames_key=None): - self.fs = fs - self.geonames_key = geonames_key - self.indi: Dict[str, Indi] = dict() - self.fam: Dict[str, Fam] = dict() - self.notes = list() - self.facts: Set[Fact] = set() - self.sources: Dict[str, Source] = dict() - self.citations: Dict[str, Citation] = dict() - self.places: List[Place] = [] - self.places_by_names: Dict[str, Place] = dict() - self.place_cache: Dict[str, Tuple[float, float]] = dict() - self.display_name = self.lang = None - self.exclude: List[str] = exclude or [] - self.place_counter = 0 - if fs: - self.display_name = fs.display_name - self.lang = babelfish.Language.fromalpha2(fs.lang).name - - self.geosession = CachedSession('http_cache', backend='filesystem', expire_after=86400) - - def add_indis(self, fids_in: List[str]): - """add individuals to the family tree - :param fids: an iterable of fid - """ - fids = [] - for fid in fids_in: - if fid not in self.exclude: - fids.append(fid) - else: - print( - "Excluding %s from the family tree" % fid, file=sys.stderr - ) - - async def add_datas(loop, data): - futures = set() - for person in data["persons"]: - self.indi[person["id"]] = Indi(person["id"], self) - futures.add( - loop.run_in_executor(None, self.indi[person["id"]].add_data, person) - ) - for future in futures: - await future - - new_fids = [fid for fid in fids if fid and fid not in self.indi] - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - while new_fids: - data = self.fs.get_url( - "/platform/tree/persons?pids=" + ",".join(new_fids[:MAX_PERSONS]) - ) - if data: - if "places" in data: - for place in data["places"]: - if place["id"] not in self.place_cache: - self.place_cache[place["id"]] = ( - place["latitude"], - place["longitude"], - ) - loop.run_until_complete(add_datas(loop, data)) - if "childAndParentsRelationships" in data: - for rel in data["childAndParentsRelationships"]: - father: str | None = rel.get("parent1", {}).get("resourceId") - mother: str | None = rel.get("parent2", {}).get("resourceId") - child: str | None = rel.get("child", {}).get("resourceId") - if child in self.indi: - self.indi[child].parents.add((father, mother)) - if father in self.indi: - self.indi[father].children.add((father, mother, child)) - if mother in self.indi: - self.indi[mother].children.add((father, mother, child)) - if "relationships" in data: - for rel in data["relationships"]: - if rel["type"] == "http://gedcomx.org/Couple": - person1 = rel["person1"]["resourceId"] - person2 = rel["person2"]["resourceId"] - relfid = rel["id"] - if person1 in self.indi: - self.indi[person1].spouses.add( - (person1, person2, relfid) - ) - if person2 in self.indi: - self.indi[person2].spouses.add( - (person1, person2, relfid) - ) - new_fids = new_fids[MAX_PERSONS:] - - def ensure_source(self, source_data: Dict[str, Any]) -> Source: - if source_data["id"] not in self.sources: - self.sources[source_data["id"]] = Source(source_data, self) - return self.sources.get(source_data["id"]) - - def ensure_citation(self, data: Dict[str, Any], source: Source) -> Citation: - citation_id = data["id"] - if citation_id not in self.citations: - self.citations[citation_id] = Citation(data, source) - return self.citations[citation_id] - - def ensure_family(self, father: Optional['Indi'], mother: Optional['Indi']) -> Fam: - fam_id = Fam.gen_id(father, mother) - if fam_id not in self.fam: - self.fam[fam_id] = Fam(father, mother, self) - return self.fam[fam_id] - - - def place_by_geoname_id(self, id: str) -> Optional[Place]: - for place in self.places: - if place.id == id: - return place - return None - - def get_by_geonames_id(self, geonames_id: str) -> Place: - print('Fetching place hierarchy for', geonames_id, file=sys.stderr) - hierarchy = geocoder.geonames( - geonames_id, - key=self.geonames_key, - lang=['hu', 'en', 'de'], - method='hierarchy', - session=self.geosession, - ) - - if hierarchy and hierarchy.ok: - last_place = None - for item in hierarchy.geojson.get('features', []): - properties = item.get('properties', {}) - code = properties.get('code') - - if code in ['AREA', 'CONT']: - continue - - print('Properties', properties, file=sys.stderr) - id = 'GEO' + str(properties['geonames_id']) - place = self.place_by_geoname_id(id) - if place is None: - place = Place( - id, - properties.get('address'), - GEONAME_FEATURE_MAP.get(code, 'Unknown'), - last_place, - properties.get('lat'), - properties.get('lng') - ) - self.places.append(place) - last_place = place - return last_place - - @property - def _next_place_counter(self): - self.place_counter += 1 - return self.place_counter - - - def ensure_place(self, place_name: str, fid: Optional[str] = None, coord: Optional[Tuple[float, float]] = None) -> Place: - if place_name not in self.places_by_names: - place = None - if self.geonames_key: - print('Fetching place', place_name, file=sys.stderr) - geoname_record = geocoder.geonames( - place_name, - key=self.geonames_key, - session=self.geosession, - ) - if geoname_record and geoname_record.ok: - place = self.get_by_geonames_id(geoname_record.geonames_id) - if place is None: - coord = self.place_cache.get(fid) if coord is None else coord - place = Place( - 'PFSID' + fid if fid is not None else 'P' + str(self._next_place_counter), - place_name, - latitude=coord[0] if coord is not None else None, - longitude=coord[1] if coord is not None else None - ) - self.places.append(place) - self.places_by_names[place_name] = place - return self.places_by_names[place_name] - - # def add_fam(self, father, mother): - # """add a family to the family tree - # :param father: the father fid or None - # :param mother: the mother fid or None - # """ - # if (father, mother) not in self.fam: - # self.fam[(father, mother)] = Fam(father, mother, self) - - def add_trio(self, father: Indi | None, mother: Indi | None, child: Indi | None): - """add a children relationship to the family tree - :param father: the father fid or None - :param mother: the mother fid or None - :param child: the child fid or None - """ - fam = self.ensure_family(father, mother) - if child is not None: - fam.add_child(child) - child.add_famc(fam) - - if father is not None: - father.add_fams(fam) - if mother is not None: - mother.add_fams(fam) - - def add_parents(self, fids: Set[str]): - """add parents relationships - :param fids: a set of fids - """ - parents = set() - for fid in fids & self.indi.keys(): - for couple in self.indi[fid].parents: - parents |= set(couple) - if parents: - self.add_indis(parents) - for fid in fids & self.indi.keys(): - for father, mother in self.indi[fid].parents: - if ( - mother in self.indi - and father in self.indi - or not father - and mother in self.indi - or not mother - and father in self.indi - ): - self.add_trio( - self.indi.get(father), - self.indi.get(mother), - self.indi.get(fid), - ) - return set(filter(None, parents)) - - def add_spouses(self, fids: Set[str]): - """add spouse relationships - :param fids: a set of fid - """ - - async def add(loop, rels: Set[Tuple[str, str, str]]): - futures = set() - for father, mother, relfid in rels: - if father in self.exclude or mother in self.exclude: - continue - fam_id = Fam.gen_id(self.indi[father], self.indi[mother]) - if self.fam.get(fam_id): - futures.add( - loop.run_in_executor( - None, self.fam[fam_id].add_marriage, relfid - ) - ) - for future in futures: - await future - - rels: Set[Tuple[str, str, str]] = set() - for fid in fids & self.indi.keys(): - rels |= self.indi[fid].spouses - loop = asyncio.get_event_loop() - if rels: - self.add_indis( - set.union(*({father, mother} for father, mother, relfid in rels)) - ) - for father, mother, _ in rels: - if father in self.indi and mother in self.indi: - father_indi = self.indi[father] - mother_indi = self.indi[mother] - fam = self.ensure_family(father_indi, mother_indi) - father_indi.add_fams(fam) - mother_indi.add_fams(fam) - - loop.run_until_complete(add(loop, rels)) - - def add_children(self, fids): - """add children relationships - :param fids: a set of fid - """ - rels: Set[Tuple[str, str, str]] = set() - for fid in fids & self.indi.keys(): - rels |= self.indi[fid].children if fid in self.indi else set() - children = set() - if rels: - self.add_indis(set.union(*(set(rel) for rel in rels))) - for father, mother, child in rels: - if child in self.indi and ( - mother in self.indi - and father in self.indi - or not father - and mother in self.indi - or not mother - and father in self.indi - ): - self.add_trio( - self.indi.get(father), - self.indi.get(mother), - self.indi.get(child), - ) - children.add(child) - return children - - def add_ordinances(self, fid): - """retrieve ordinances - :param fid: an individual fid - """ - if fid in self.indi: - ret, famc = self.indi[fid].get_ordinances() - if famc and famc in self.fam: - self.indi[fid].sealing_child.famc = self.fam[famc] - for o in ret: - spouse_id = o["relationships"]["spouseId"] - if (fid, spouse_id) in self.fam: - self.fam[fid, spouse_id].sealing_spouse = Ordinance(o) - elif (spouse_id, fid) in self.fam: - self.fam[spouse_id, fid].sealing_spouse = Ordinance(o) - - def reset_num(self): - """reset all GEDCOM identifiers""" - # for husb, wife in self.fam: - # self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None - # self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None - # self.fam[(husb, wife)].chil_num = set( - # self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid - # ) - # for fid in self.indi: - # self.indi[fid].famc_num = set( - # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid - # ) - # self.indi[fid].fams_num = set( - # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid - # ) - # self.indi[fid].famc_ids = set( - # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid - # ) - # self.indi[fid].fams_ids = set( - # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid - # ) - - def printxml(self, file: BinaryIO): - -# root = ET.Element("root") -# doc = ET.SubElement(root, "doc") - -# ET.SubElement(doc, "field1", name="blah").text = "some value1" -# ET.SubElement(doc, "field2", name="asdfasd").text = "some vlaue2" - -# tree = ET.ElementTree(root) -# tree.write("filename.xml") - -# -# -# -#
-# -# Barnabás Südy -# -#
- - root = ET.Element("database", xmlns="http://gramps-project.org/xml/1.7.1/") - - header = ET.SubElement(root, "header") - ET.SubElement(header, "created", date=datetime.strftime(datetime.now(), "%Y-%m-%d"), version="5.2.2") - researcher = ET.SubElement(header, "researcher") - resname = ET.SubElement(researcher, "resname") - resname.text = self.display_name - - people = ET.SubElement(root, "people") - for indi in sorted(self.indi.values(), key=lambda x: x.num): - indi.printxml(people) - - families = ET.SubElement(root, "families") - for fam in sorted(self.fam.values(), key=lambda x: x.num): - fam.printxml(families) - - events = ET.SubElement(root, "events") - for fact in self.facts: - fact.printxml(events) - - notes = ET.SubElement(root, "notes") - for note in sorted(self.notes, key=lambda x: x.id): - note.printxml(notes) - - places = ET.SubElement(root, "places") - for place in self.places: - place.printxml(places) - - sources = ET.SubElement(root, "sources") - for source in self.sources.values(): - source.printxml(sources) - - citations = ET.SubElement(root, "citations") - for citation in self.citations.values(): - citation.printxml(citations) - - tree = ET.ElementTree(root) - - doctype='' - file.write(doctype.encode('utf-8')) - tree.write(file, 'utf-8') - - - def print(self, file=sys.stdout): - """print family tree in GEDCOM format""" - file.write("0 HEAD\n") - file.write("1 CHAR UTF-8\n") - file.write("1 GEDC\n") - file.write("2 VERS 5.5.1\n") - file.write("2 FORM LINEAGE-LINKED\n") - file.write("1 SOUR getmyancestors\n") - file.write("2 VERS %s\n" % getmyancestors.__version__) - file.write("2 NAME getmyancestors\n") - file.write("1 DATE %s\n" % time.strftime("%d %b %Y")) - file.write("2 TIME %s\n" % time.strftime("%H:%M:%S")) - file.write("1 SUBM @SUBM@\n") - file.write("0 @SUBM@ SUBM\n") - file.write("1 NAME %s\n" % self.display_name) - # file.write("1 LANG %s\n" % self.lang) - - for fid in sorted(self.indi, key=lambda x: self.indi.__getitem__(x).num): - self.indi[fid].print(file) - for fam in sorted(self.fam.values(), key=lambda x: x.num): - fam.print(file) - sources = sorted(self.sources.values(), key=lambda x: x.num) - for s in sources: - s.print(file) - notes = sorted(self.notes, key=lambda x: x.id) - for i, n in enumerate(notes): - if i > 0: - if n.id == notes[i - 1].id: - continue - n.print(file) - file.write("0 TRLR\n") diff --git a/getmyancestors/classes/tree/__init__.py b/getmyancestors/classes/tree/__init__.py new file mode 100644 index 0000000..d47e240 --- /dev/null +++ b/getmyancestors/classes/tree/__init__.py @@ -0,0 +1,32 @@ +"""Tree module for family tree data structures""" + +# Import classes in dependency order (utils -> records -> elements -> core) +from .core import Fam, Indi, Tree +from .elements import Citation, Name, Ordinance, Place +from .records import Fact, Memorie, Note, Source +from .utils import CITY, COUNTRY, COUNTY, GEONAME_FEATURE_MAP, NAME_MAP, cont + +__all__ = [ + # Functions + "cont", + # Constants + "COUNTY", + "COUNTRY", + "CITY", + "NAME_MAP", + "GEONAME_FEATURE_MAP", + # Classes from records + "Note", + "Source", + "Fact", + "Memorie", + # Classes from elements + "Name", + "Place", + "Ordinance", + "Citation", + # Classes from core + "Indi", + "Fam", + "Tree", +] diff --git a/getmyancestors/classes/tree/core.py b/getmyancestors/classes/tree/core.py new file mode 100644 index 0000000..046e8a0 --- /dev/null +++ b/getmyancestors/classes/tree/core.py @@ -0,0 +1,1255 @@ +"""Core classes: Indi, Fam, Tree""" + +# pylint: disable=too-many-lines + +import asyncio +import hashlib +import os +import sys +import threading +import time +import xml.etree.ElementTree as ET +from datetime import datetime +from typing import Any, BinaryIO, Dict, Iterable, List, Optional, Set, Tuple, Union + +# global imports +import babelfish +import geocoder +from requests_cache import CachedSession + +# local imports +from getmyancestors import __version__ +from getmyancestors.classes.constants import MAX_PERSONS +from getmyancestors.classes.session import GMASession + +from .elements import Citation, Name, Ordinance, Place +from .records import Fact, Memorie, Note, Source +from .utils import GEONAME_FEATURE_MAP, cont + + +class Indi: + """GEDCOM individual class + :param fid' FamilySearch id + :param tree: a tree object + :param num: the GEDCOM identifier + """ + + counter = 0 + + def __init__( + self, fid: Optional[str] = None, tree: Optional["Tree"] = None, num=None + ): + self._handle: Optional[str] = None + if num: + self.num = num + else: + Indi.counter += 1 + self.num = Indi.counter + self.fid = fid + self.tree = tree + self.num_prefix = "I" + self.origin_file: Optional[str] = None + self.famc: Set["Fam"] = set() + self.fams: Set["Fam"] = set() + self.famc_fid: Set[str] = set() + self.fams_fid: Set[str] = set() + self.famc_num: Set[int] = set() + self.fams_num: Set[int] = set() + self.famc_ids: Set[str] = set() + self.fams_ids: Set[str] = set() + self.name: Optional[Name] = None + self.gender: Optional[str] = None + self.living: Optional[bool] = None + self.parents: Set[Tuple[Optional[str], Optional[str]]] = ( + set() + ) # (father_id, mother_id) + self.spouses: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = ( + set() + ) # (person1, person2, relfid) + self.children: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = ( + set() + ) # (father_id, mother_id, child_id) + self.baptism: Optional[Ordinance] = None + self.confirmation: Optional[Ordinance] = None + self.initiatory: Optional[Ordinance] = None + self.endowment: Optional[Ordinance] = None + self.sealing_child: Optional[Ordinance] = None + self.nicknames: Set[Name] = set() + self.birthnames: Set[Name] = set() + self.married: Set[Name] = set() + self.aka: Set[Name] = set() + self.facts: Set[Fact] = set() + self.notes: Set[Note] = set() + self.sources: Set[Tuple[Source, Optional[str]]] = set() + self.citations: Set[Citation] = set() + self.memories: Set[Memorie] = set() + + def __str__(self): + """Return readable string for debugging/reference purposes.""" + return f"{self.num}. {self.name}, fam: {self.fid}" + + def add_data(self, data): + """add FS individual data""" + if data: + self.living = data["living"] + for x in data["names"]: + alt = not x.get("preferred", False) + if x["type"] == "http://gedcomx.org/Nickname": + self.nicknames.add(Name(x, self.tree, self.fid, "nickname", alt)) + elif x["type"] == "http://gedcomx.org/BirthName": + self.birthnames.add(Name(x, self.tree, self.fid, "birthname", alt)) + elif x["type"] == "http://gedcomx.org/AlsoKnownAs": + self.aka.add(Name(x, self.tree, self.fid, "aka", alt)) + elif x["type"] == "http://gedcomx.org/MarriedName": + self.married.add(Name(x, self.tree, self.fid, "married", alt)) + else: + print("Unknown name type: " + x.get("type"), file=sys.stderr) + raise ValueError("Unknown name type") + if "gender" in data: + if data["gender"]["type"] == "http://gedcomx.org/Male": + self.gender = "M" + elif data["gender"]["type"] == "http://gedcomx.org/Female": + self.gender = "F" + elif data["gender"]["type"] == "http://gedcomx.org/Unknown": + self.gender = "U" + if "facts" in data: + for x in data["facts"]: + if x["type"] == "http://familysearch.org/v1/LifeSketch": + self.notes.add( + Note( + "=== %s ===\n%s" + % ( + ( + self.tree.fs._("Life Sketch") + if self.tree and self.tree.fs + else "Life Sketch" + ), + x.get("value", ""), + ), + self.tree, + num_prefix=f"INDI_{self.fid}", + note_type="Person Note", + ) + ) + else: + self.facts.add( + Fact(x, self.tree, num_prefix=f"INDI_{self.fid}") + ) + if "sources" in data and self.tree and self.tree.fs: + sources = self.tree.fs.get_url( + "/platform/tree/persons/%s/sources" % self.fid + ) + if sources: + for quote in sources["persons"][0]["sources"]: + source_id = quote["descriptionId"] + source_data = next( + ( + s + for s in sources["sourceDescriptions"] + if s["id"] == source_id + ), + None, + ) + source = ( + self.tree.ensure_source(source_data) + if self.tree and source_data + else None + ) + if source and self.tree: + citation = self.tree.ensure_citation(quote, source) + self.citations.add(citation) + self.sources.add((source, citation.message)) + + for evidence in data.get("evidence", []): + memory_id, *_ = evidence["id"].partition("-") + url = "/platform/memories/memories/%s" % memory_id + memorie = ( + self.tree.fs.get_url(url) if self.tree and self.tree.fs else None + ) + if memorie and "sourceDescriptions" in memorie: + for x in memorie["sourceDescriptions"]: + if x["mediaType"] == "text/plain": + text = "\n".join( + val.get("value", "") + for val in x.get("titles", []) + + x.get("descriptions", []) + ) + self.notes.add( + Note( + text, + self.tree, + num_prefix=f"INDI_{self.fid}", + note_type="Person Note", + ) + ) + else: + self.memories.add(Memorie(x)) + + def add_fams(self, fam: "Fam"): + """add family fid (for spouse or parent)""" + self.fams.add(fam) + + def add_famc(self, fam: "Fam"): + """add family fid (for child)""" + self.famc.add(fam) + + def get_notes(self): + """retrieve individual notes""" + name_str = str(self.name) if self.name else "Unknown" + print( + f"Getting Notes for {self.fid} {name_str}", + file=sys.stderr, + ) + if not self.tree or not self.tree.fs: + return + notes = self.tree.fs.get_url("/platform/tree/persons/%s/notes" % self.fid) + if notes: + for n in notes["persons"][0]["notes"]: + text_note = "=== %s ===\n" % n["subject"] if "subject" in n else "" + text_note += n["text"] + "\n" if "text" in n else "" + self.notes.add( + Note( + text_note, + self.tree, + num_prefix=f"INDI_{self.fid}", + note_type="Person Note", + ) + ) + + def get_ordinances(self): + """retrieve LDS ordinances + need a LDS account + """ + res: List[Any] = [] + famc: Union[bool, Tuple[str, str]] = False + if self.living: + return res, famc + if not self.tree or not self.tree.fs: + return res, famc + url = "/service/tree/tree-data/reservations/person/%s/ordinances" % self.fid + data = self.tree.fs.get_url(url, {}, no_api=True) + if data: + for key, o in data["data"].items(): + if key == "baptism": + self.baptism = Ordinance(o) + elif key == "confirmation": + self.confirmation = Ordinance(o) + elif key == "initiatory": + self.initiatory = Ordinance(o) + elif key == "endowment": + self.endowment = Ordinance(o) + elif key == "sealingsToParents": + for subo in o: + self.sealing_child = Ordinance(subo) + relationships = subo.get("relationships", {}) + father = relationships.get("parent1Id") + mother = relationships.get("parent2Id") + if father and mother: + famc = father, mother + elif key == "sealingsToSpouses": + res += o + return res, famc + + @property + def id(self): + return self.fid or self.num + + @property + def handle(self): + if not self._handle: + self._handle = "_" + os.urandom(10).hex() + + return self._handle + + def printxml(self, parent_element): + # + # M + # + # József + # Cser + # + # + # + # + # + # + # + # + person = ET.SubElement( + parent_element, + "person", + handle=self.handle, + # change='1720382301', + id="I" + str(self.id), + ) + if self.fid: + # Add custom attribute for FamilySearch ID + ET.SubElement(person, "attribute", type="_FSFTID", value=self.fid) + + if self.name: + self.name.printxml(person) + for name in self.nicknames | self.birthnames | self.aka | self.married: + name.printxml(person) + + gender = ET.SubElement(person, "gender") + gender.text = self.gender + + if self.fams: + for fam in self.fams: + ET.SubElement(person, "parentin", hlink=fam.handle) + + if self.famc: + for fam in self.famc: + ET.SubElement(person, "childof", hlink=fam.handle) + + for fact in self.facts: + ET.SubElement(person, "eventref", hlink=fact.handle, role="Primary") + + for citation in self.citations: + ET.SubElement(person, "citationref", hlink=citation.handle) + + for note in self.notes: + ET.SubElement(person, "noteref", hlink=note.handle) + + # + # + + def get_contributors(self): + """retrieve contributors""" + if self.fid and self.tree: + url = "/platform/tree/persons/%s/changes" % self.fid + text = self.tree.get_contributors_text(url) + if text: + for n in self.tree.notes: + if n.text == text: + self.notes.add(n) + return + self.notes.add(Note(text, self.tree)) + + def print(self, file=sys.stdout): + """print individual in GEDCOM format""" + file.write("0 @I%s@ INDI\n" % self.id) + if self.name: + self.name.print(file) + for nick in sorted( + self.nicknames, + key=lambda x: ( + x.given or "", + x.surname or "", + x.prefix or "", + x.suffix or "", + x.kind or "", + str(x.alternative), + x.note.text if x.note else "", + ), + ): + file.write(cont("2 NICK %s %s" % (nick.given, nick.surname))) + for birthname in sorted( + self.birthnames, + key=lambda x: ( + x.given or "", + x.surname or "", + x.prefix or "", + x.suffix or "", + x.kind or "", + str(x.alternative), + x.note.text if x.note else "", + ), + ): + birthname.print(file) + for aka in sorted( + self.aka, + key=lambda x: ( + x.given or "", + x.surname or "", + x.prefix or "", + x.suffix or "", + x.kind or "", + str(x.alternative), + x.note.text if x.note else "", + ), + ): + aka.print(file, "aka") + for married_name in sorted( + self.married, + key=lambda x: ( + x.given or "", + x.surname or "", + x.prefix or "", + x.suffix or "", + x.kind or "", + str(x.alternative), + x.note.text if x.note else "", + ), + ): + married_name.print(file, "married") + if self.gender: + file.write("1 SEX %s\n" % self.gender) + for fact in sorted( + self.facts, + key=lambda x: ( + x.date or "9999", + x.type or "", + x.value or "", + x.place.id if x.place else "", + x.note.text if x.note else "", + ), + ): + fact.print(file) + for memory in sorted( + self.memories, key=lambda x: (x.url or "", x.description or "") + ): + memory.print(file) + if self.baptism: + file.write("1 BAPL\n") + self.baptism.print(file) + if self.confirmation: + file.write("1 CONL\n") + self.confirmation.print(file) + if self.initiatory: + file.write("1 WAC\n") + self.initiatory.print(file) + if self.endowment: + file.write("1 ENDL\n") + self.endowment.print(file) + if self.sealing_child: + file.write("1 SLGC\n") + self.sealing_child.print(file) + for fam in sorted(self.fams, key=lambda x: x.id or ""): + file.write("1 FAMS @F%s@\n" % fam.id) + for fam in sorted(self.famc, key=lambda x: x.id or ""): + file.write("1 FAMC @F%s@\n" % fam.id) + # print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr) + # for num in self.fams_ids: + # print(f'Famc Ids: {self.famc_ids}', file=sys.stderr) + # for num in self.famc_ids: + # file.write("1 FAMC @F%s@\n" % num) + file.write("1 _FSFTID %s\n" % self.fid) + for note in sorted(self.notes, key=lambda x: x.id or ""): + note.link(file) + for source, quote in sorted( + self.sources, key=lambda x: (x[0].id or "", x[1] or "") + ): + source.link(file, 1) + if quote: + file.write(cont("2 PAGE " + quote)) + + +class Fam: + """GEDCOM family class + :param husb: husbant fid + :param wife: wife fid + :param tree: a Tree object + :param num: a GEDCOM identifier + """ + + counter = 0 + + def __init__( + self, + husband: Optional[Indi] = None, + wife: Optional[Indi] = None, + tree: Optional["Tree"] = None, + num=None, + ): + self._handle: Optional[str] = None + self.num = num if num else Fam.gen_id(husband, wife) + self.fid: Optional[str] = None + self._husband = husband + self._wife = wife + self.tree = tree + self.num_prefix = "F" + self.origin_file: Optional[str] = None + self.children: Set[Indi] = set() + self.facts: Set[Fact] = set() + self.sealing_spouse: Optional[Ordinance] = None + self.husb_num: Optional[str] = None + self.wife_num: Optional[str] = None + self.chil_num: Set[str] = set() + self.husb_fid: Optional[str] = None + self.wife_fid: Optional[str] = None + self.chil_fid: Set[str] = set() + self.notes: Set[Note] = set() + self.sources: Set[Tuple[Source, Optional[str]]] = set() + + @property + def husband(self): + """get husband""" + if self._husband: + return self._husband + if self.husb_num and self.tree and self.husb_num in self.tree.indi: + return self.tree.indi.get(self.husb_num) + return None + + @husband.setter + def husband(self, value): + """set husband""" + self._husband = value + + @property + def wife(self): + """get wife""" + if self._wife: + return self._wife + if self.wife_num and self.tree and self.wife_num in self.tree.indi: + return self.tree.indi.get(self.wife_num) + return None + + @wife.setter + def wife(self, value): + """set wife""" + self._wife = value + + @property + def handle(self): + if not self._handle: + self._handle = "_" + os.urandom(10).hex() + + return self._handle + + @staticmethod + def gen_id(husband: Indi | None, wife: Indi | None) -> str: + if husband and wife: + return f"FAM_{husband.id}-{wife.id}" + if husband: + return f"FAM_{husband.id}-UNK" + if wife: + return f"FAM_UNK-{wife.id}" + + Fam.counter += 1 + return f"FAM_UNK-UNK-{Fam.counter}" + + def add_child(self, child: Indi | None): + """add a child fid to the family""" + if child is not None: + self.children.add(child) + + def add_marriage(self, fid: str): + """retrieve and add marriage information + :param fid: the marriage fid + """ + if not self.tree or not self.tree.fs: + return + + if not self.fid: + self.fid = fid + url = "/platform/tree/couple-relationships/%s" % self.fid + data = self.tree.fs.get_url(url) + if data: + if "facts" in data["relationships"][0]: + for x in data["relationships"][0]["facts"]: + self.facts.add(Fact(x, self.tree, num_prefix=f"FAM_{self.fid}")) + if "sources" in data["relationships"][0]: + quotes = dict() + for x in data["relationships"][0]["sources"]: + quotes[x["descriptionId"]] = ( + x["attribution"]["changeMessage"] + if "changeMessage" in x["attribution"] + else None + ) + # self.tree.sources is effectively Dict[str, Source] so keys() returns strings + new_sources = quotes.keys() - self.tree.sources.keys() + if new_sources: + sources = self.tree.fs.get_url( + "/platform/tree/couple-relationships/%s/sources" % self.fid + ) + for source in sources["sourceDescriptions"]: + if ( + source["id"] in new_sources + and source["id"] not in self.tree.sources + ): + self.tree.sources[source["id"]] = Source( + source, self.tree + ) + for source_fid, change_message in quotes.items(): + self.sources.add( + (self.tree.sources[source_fid], change_message) + ) + + def get_notes(self): + """retrieve marriage notes""" + if self.fid and self.tree and self.tree.fs: + notes = self.tree.fs.get_url( + "/platform/tree/couple-relationships/%s/notes" % self.fid + ) + if notes: + for n in notes["relationships"][0]["notes"]: + text_note = "=== %s ===\n" % n["subject"] if "subject" in n else "" + text_note += n["text"] + "\n" if "text" in n else "" + self.notes.add( + Note( + text_note, + self.tree, + num_prefix=f"FAM_{self.fid}", + note_type="Marriage Note", + ) + ) + + @property + def id(self): + # Prefer fid (original FamilySearch ID) to preserve through merge + # Fall back to num (counter) for newly created families + return self.fid if self.fid else self.num + + def printxml(self, parent_element): + # + # + # + # + # + # + # + family = ET.SubElement( + parent_element, + "family", + handle=self.handle, + # change='1720382301', + id=self.id, + ) + ET.SubElement(family, "rel", type="Unknown") + if self.husband: + ET.SubElement(family, "father", hlink=self.husband.handle) + if self.wife: + ET.SubElement(family, "mother", hlink=self.wife.handle) + for child in self.children: + ET.SubElement(family, "childref", hlink=child.handle) + for fact in self.facts: + ET.SubElement(family, "eventref", hlink=fact.handle, role="Primary") + + def get_contributors(self): + """retrieve contributors""" + if self.fid and self.tree: + url = "/platform/tree/couple-relationships/%s/changes" % self.fid + text = self.tree.get_contributors_text(url) + if text: + for n in self.tree.notes: + if n.text == text: + self.notes.add(n) + return + self.notes.add(Note(text, self.tree)) + + def print(self, file=sys.stdout): + """print family information in GEDCOM format""" + file.write("0 @F%s@ FAM\n" % self.id) + if self.husband: + file.write("1 HUSB @I%s@\n" % self.husband.id) + if self.wife: + file.write("1 WIFE @I%s@\n" % self.wife.id) + for child in sorted(self.children, key=lambda x: x.id or ""): + file.write("1 CHIL @I%s@\n" % child.id) + for fact in sorted( + self.facts, + key=lambda x: ( + x.date or "9999", + x.type or "", + x.value or "", + x.place.id if x.place else "", + x.note.text if x.note else "", + ), + ): + fact.print(file) + if self.sealing_spouse: + file.write("1 SLGS\n") + self.sealing_spouse.print(file) + if self.fid: + file.write("1 _FSFTID %s\n" % self.fid) + for note in sorted(self.notes, key=lambda x: x.id or ""): + note.link(file) + for source, quote in sorted( + self.sources, key=lambda x: (x[0].id or "", x[1] or "") + ): + source.link(file, 1) + if quote: + file.write(cont("2 PAGE " + quote)) + + +class Tree: + """family tree class + :param fs: a Session object + """ + + def __init__( + self, + fs: Optional[GMASession] = None, + exclude: Optional[List[str]] = None, + geonames_key=None, + creation_date: Optional[datetime] = None, + **kwargs, + ): + self.fs = fs + self.geonames_key = geonames_key + self.lock = threading.Lock() + self.creation_date: Optional[datetime] = creation_date + self.indi: Dict[str, Indi] = {} + self.fam: Dict[str, Fam] = {} + self.notes: Set[Note] = set() + self.facts: Set[Fact] = set() + self.sources: Dict[str, Source] = {} + self.citations: Dict[str, Citation] = {} + self.places: Set[Place] = set() + self.places_by_names: Dict[str, Place] = {} + self.place_cache: Dict[str, Tuple[float, float]] = {} + self.display_name: Optional[str] = None + self.lang: Optional[str] = None + self.exclude: List[str] = exclude or [] + self.only_blood_relatives = False + if "only_blood_relatives" in kwargs: + self.only_blood_relatives = kwargs["only_blood_relatives"] + self.place_counter = 0 + if fs: + self.display_name = fs.display_name + self.lang = babelfish.Language.fromalpha2(fs.lang).name + + # Geocoder cache - honor GMA_CACHE_DIR if present, else fallback to ~/.cache/getmyancestors/ + geocache_dir = os.environ.get( + "GMA_CACHE_DIR", os.path.expanduser("~/.cache/getmyancestors") + ) + os.makedirs(geocache_dir, exist_ok=True) + geocache_path = os.path.join(geocache_dir, "geocoder_requests") + + self.geosession = CachedSession( + geocache_path, + backend="sqlite", + expire_after=86400, + allowable_codes=(200,), + backend_kwargs={"table_name": "requests"}, + ) + if os.environ.get("GMA_OFFLINE_MODE"): + orig_request = self.geosession.request + + def offline_request(*args, **kwargs): + kwargs["only_if_cached"] = True + return orig_request(*args, **kwargs) + + self.geosession.request = offline_request # type: ignore[method-assign] + + def add_indis(self, fids_in: Iterable[str]): + """add individuals to the family tree + :param fids: an iterable of fid + """ + fids = [] + for fid in fids_in: + if fid not in self.exclude: + fids.append(fid) + else: + print("Excluding %s from the family tree" % fid, file=sys.stderr) + + async def add_datas(loop, data): + futures = set() + for person in data["persons"]: + self.indi[person["id"]] = Indi(person["id"], self) + futures.add( + loop.run_in_executor(None, self.indi[person["id"]].add_data, person) + ) + for future in futures: + await future + + new_fids = sorted([fid for fid in fids if fid and fid not in self.indi]) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + while new_fids: + if not self.fs: + break + data = self.fs.get_url( + "/platform/tree/persons?pids=" + ",".join(new_fids[:MAX_PERSONS]) + ) + if data: + if "places" in data: + for place in data["places"]: + if place["id"] not in self.place_cache: + self.place_cache[place["id"]] = ( + place["latitude"], + place["longitude"], + ) + loop.run_until_complete(add_datas(loop, data)) + if "childAndParentsRelationships" in data: + for rel in data["childAndParentsRelationships"]: + father: str | None = rel.get("parent1", {}).get("resourceId") + mother: str | None = rel.get("parent2", {}).get("resourceId") + child: str | None = rel.get("child", {}).get("resourceId") + if child in self.indi: + self.indi[child].parents.add((father, mother)) + if father in self.indi: + self.indi[father].children.add((father, mother, child)) + if mother in self.indi: + self.indi[mother].children.add((father, mother, child)) + if "relationships" in data: + for rel in data["relationships"]: + if rel["type"] == "http://gedcomx.org/Couple": + person1 = rel["person1"]["resourceId"] + person2 = rel["person2"]["resourceId"] + relfid = rel["id"] + if person1 in self.indi: + self.indi[person1].spouses.add( + (person1, person2, relfid) + ) + if person2 in self.indi: + self.indi[person2].spouses.add( + (person1, person2, relfid) + ) + new_fids = new_fids[MAX_PERSONS:] + + def ensure_source(self, source_data: Dict[str, Any]) -> Source: + with self.lock: + if source_data["id"] not in self.sources: + self.sources[source_data["id"]] = Source(source_data, self) + return self.sources[source_data["id"]] + + def ensure_citation(self, data: Dict[str, Any], source: Source) -> Citation: + with self.lock: + citation_id = data["id"] + if citation_id not in self.citations: + self.citations[citation_id] = Citation(data, source) + return self.citations[citation_id] + + def ensure_family(self, father: Optional["Indi"], mother: Optional["Indi"]) -> Fam: + with self.lock: + fam_id = Fam.gen_id(father, mother) + if fam_id not in self.fam: + self.fam[fam_id] = Fam(father, mother, self) + return self.fam[fam_id] + + def get_contributors_text(self, url: str) -> Optional[str]: + """Helper to fetch contributors from a changelog URL""" + if not self.fs: + return None + data = self.fs.get_url(url, {"Accept": "application/x-gedcomx-atom+json"}) + if not data: + return None + + contributors_map = {} # name -> uri + names = set() + + for entry in data.get("entries", []): + for contrib in entry.get("contributors", []): + name = contrib.get("name", "Unknown") + uri = contrib.get("uri", "").replace("https://www.familysearch.org", "") + contributors_map[name] = uri + names.add(name) + + if not names: + return None + + text = "=== %s ===\n" % self.fs._("Contributors") + + for name in sorted(names): + text += name + agent_uri = contributors_map[name] + # Fetch agent details + # Default headers work better per jcarroll findings + agent_data = self.fs.get_url(agent_uri) + + # Display Name + try: + agent_names = agent_data["agents"][0]["names"] + display_name = "".join([n["value"] + " " for n in agent_names]).strip() + if display_name != name: + text += " (" + display_name + ")" + except (KeyError, IndexError, TypeError): + pass + + # Email + try: + email = agent_data["agents"][0]["emails"][0]["resource"].replace( + "mailto:", " " + ) + text += email + except (KeyError, IndexError, TypeError): + pass + + # Phone + try: + phone = agent_data["agents"][0]["phones"][0]["resource"].replace( + "tel:", " " + ) + text += phone + except (KeyError, IndexError, TypeError): + pass + + text += "\n" + + return text + + def place_by_geoname_id(self, place_id: str) -> Optional[Place]: + for place in self.places: + if place.id == place_id: + return place + return None + + def get_by_geonames_id(self, geonames_id: str) -> Optional[Place]: + print("Fetching place hierarchy for", geonames_id, file=sys.stderr) + hierarchy = geocoder.geonames( + geonames_id, + key=self.geonames_key, + lang=["hu", "en", "de"], + method="hierarchy", + session=self.geosession, + ) + + if hierarchy and hierarchy.ok: + last_place = None + for item in hierarchy.geojson.get("features", []): + properties = item.get("properties", {}) + code = properties.get("code") + + if code in ["AREA", "CONT"]: + continue + + print("Properties", properties, file=sys.stderr) + place_id = "GEO" + str(properties["geonames_id"]) + place = self.place_by_geoname_id(place_id) + if place is None: + place = Place( + place_id, + properties.get("address"), + GEONAME_FEATURE_MAP.get(code, "Unknown"), + last_place, + properties.get("lat"), + properties.get("lng"), + ) + self.places.add(place) + last_place = place + return last_place + return None + + @property + def _next_place_counter(self): + self.place_counter += 1 + return self.place_counter + + def ensure_place( + self, + place_name: str, + fid: Optional[str] = None, + coord: Optional[Tuple[float, float]] = None, + ) -> Place: + with self.lock: + if place_name not in self.places_by_names: + place = None + if self.geonames_key: + print("Fetching place", place_name, file=sys.stderr) + geoname_record = geocoder.geonames( + place_name, + key=self.geonames_key, + session=self.geosession, + ) + if geoname_record and geoname_record.ok: + place = self.get_by_geonames_id(geoname_record.geonames_id) + if place is None: + coord = ( + self.place_cache.get(fid) if coord is None and fid else coord + ) + start_char = ( + "P" + + hashlib.md5(place_name.encode("utf-8")) + .hexdigest()[:6] + .upper() + ) + place = Place( + ("PFSID" + fid if fid is not None else start_char), + place_name, + latitude=coord[0] if coord is not None else None, + longitude=coord[1] if coord is not None else None, + ) + self.places.add(place) + self.places_by_names[place_name] = place + return self.places_by_names[place_name] + + # def add_fam(self, father, mother): + # """add a family to the family tree + # :param father: the father fid or None + # :param mother: the mother fid or None + # """ + # if (father, mother) not in self.fam: + # self.fam[(father, mother)] = Fam(father, mother, self) + + def add_trio(self, father: Indi | None, mother: Indi | None, child: Indi | None): + """add a children relationship to the family tree + :param father: the father fid or None + :param mother: the mother fid or None + :param child: the child fid or None + """ + fam = self.ensure_family(father, mother) + if child is not None: + fam.add_child(child) + child.add_famc(fam) + + if father is not None: + father.add_fams(fam) + if mother is not None: + mother.add_fams(fam) + + def add_parents(self, fids: Iterable[str]) -> Set[str]: + """add parents relationships + :param fids: a set of fids + """ + parents = set() + for fid in [f for f in fids if f in self.indi]: + for couple in self.indi[fid].parents: + parents |= set(couple) + if parents: + parents -= set(self.exclude) + self.add_indis(set(filter(None, parents))) + for fid in [f for f in fids if f in self.indi]: + for father, mother in self.indi[fid].parents: + self.add_trio( + self.indi.get(father) if father else None, + self.indi.get(mother) if mother else None, + self.indi.get(fid) if fid else None, + ) + return set(filter(None, parents)) + + def add_spouses(self, fids: Iterable[str]): + """add spouse relationships + :param fids: a set of fid + """ + + async def add( + loop, rels: Set[Tuple[Optional[str], Optional[str], Optional[str]]] + ): + futures = set() + for father, mother, relfid in rels: + if ( + father in self.exclude + or mother in self.exclude + or not father + or not mother + ): + continue + fam_id = Fam.gen_id(self.indi[father], self.indi[mother]) + if self.fam.get(fam_id): + futures.add( + loop.run_in_executor( + None, self.fam[fam_id].add_marriage, relfid + ) + ) + for future in futures: + await future + + rels: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = set() + for fid in [f for f in fids if f in self.indi]: + rels |= self.indi[fid].spouses + # TODO: test this + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + if rels: + all_involved = set.union( + set(), + *( + {father, mother} + for father, mother, relfid in rels + if father and mother + ), + ) + self.add_indis(set(filter(None, all_involved))) + for father, mother, _ in rels: + if father in self.indi and mother in self.indi: + father_indi = self.indi[father] + mother_indi = self.indi[mother] + fam = self.ensure_family(father_indi, mother_indi) + father_indi.add_fams(fam) + mother_indi.add_fams(fam) + + loop.run_until_complete(add(loop, rels)) + + def add_children(self, fids: Iterable[str]) -> Set[str]: + """add children relationships + :param fids: a set of fid + """ + rels: Set[Tuple[Optional[str], Optional[str], Optional[str]]] = set() + for fid in [f for f in fids if f in self.indi]: + rels |= self.indi[fid].children if fid in self.indi else set() + children = set() + if rels: + all_involved = set.union(set(), *(set(rel) for rel in rels if rel)) + all_involved -= set(self.exclude) + self.add_indis(set(filter(None, all_involved))) + for father, mother, child in rels: + has_child = child in self.indi + if not has_child: + continue + + father_valid = not father or father in self.indi + mother_valid = not mother or mother in self.indi + if father_valid and mother_valid and (father or mother): + self.add_trio( + self.indi.get(father) if father else None, + self.indi.get(mother) if mother else None, + self.indi.get(child) if child else None, + ) + children.add(child) + return set(filter(None, children)) + + def add_ordinances(self, fid): + """retrieve ordinances + :param fid: an individual fid + """ + if fid in self.indi: + ret, famc = self.indi[fid].get_ordinances() + if famc: + # self.fam is keyed by (father_id, mother_id), so we can't look up by fam_id directly. + # Find family by iterating values + for f in self.fam.values(): + if f.fid == famc: + sc = self.indi[fid].sealing_child + if sc: + sc.famc = f + break + for o in ret: + spouse_id = o["relationships"]["spouseId"] + for f in self.fam.values(): + if ( + f.husband + and f.husband.fid == fid + and f.wife + and f.wife.fid == spouse_id + ): + f.sealing_spouse = Ordinance(o) + break + if ( + f.husband + and f.husband.fid == spouse_id + and f.wife + and f.wife.fid == fid + ): + f.sealing_spouse = Ordinance(o) + break + + def reset_num(self): + """reset all GEDCOM identifiers""" + # TODO: implement this + # for husb, wife in self.fam: + # self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None + # self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None + # self.fam[(husb, wife)].chil_num = set( + # self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid + # ) + # for fid in self.indi: + # self.indi[fid].famc_num = set( + # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid + # ) + # self.indi[fid].fams_num = set( + # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid + # ) + # self.indi[fid].famc_ids = set( + # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid + # ) + # self.indi[fid].fams_ids = set( + # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid + # ) + + def printxml(self, file: BinaryIO): + # TODO: implement this + # root = ET.Element("root") + # doc = ET.SubElement(root, "doc") + + # ET.SubElement(doc, "field1", name="blah").text = "some value1" + # ET.SubElement(doc, "field2", name="asdfasd").text = "some vlaue2" + + # tree = ET.ElementTree(root) + # tree.write("filename.xml") + + # + # + # + #
+ # + # Barnabás Südy + # + #
+ + root = ET.Element("database", xmlns="http://gramps-project.org/xml/1.7.1/") + + header = ET.SubElement(root, "header") + ET.SubElement( + header, + "created", + date=datetime.strftime(datetime.now(), "%Y-%m-%d"), + version="5.2.2", + ) + researcher = ET.SubElement(header, "researcher") + resname = ET.SubElement(researcher, "resname") + resname.text = self.display_name + + people = ET.SubElement(root, "people") + for indi in sorted(self.indi.values(), key=lambda x: x.id): + indi.printxml(people) + + families = ET.SubElement(root, "families") + for fam in sorted(self.fam.values(), key=lambda x: x.id): + fam.printxml(families) + + events = ET.SubElement(root, "events") + for fact in self.facts: + fact.printxml(events) + + notes = ET.SubElement(root, "notes") + for note in sorted(self.notes, key=lambda x: x.id): + note.printxml(notes) + + places = ET.SubElement(root, "places") + for place in self.places: + place.printxml(places) + + sources = ET.SubElement(root, "sources") + for source in self.sources.values(): + source.printxml(sources) + + citations = ET.SubElement(root, "citations") + for citation in self.citations.values(): + citation.printxml(citations) + + tree = ET.ElementTree(root) + + doctype = '' + file.write(doctype.encode("utf-8")) + tree.write(file, "utf-8") + + def print(self, file=sys.stdout): + """print family tree in GEDCOM format""" + file.write("0 HEAD\n") + file.write("1 CHAR UTF-8\n") + file.write("1 GEDC\n") + file.write("2 VERS 5.5.1\n") + file.write("2 FORM LINEAGE-LINKED\n") + file.write("1 SOUR getmyancestors\n") + file.write("2 VERS %s\n" % __version__) + file.write("2 NAME getmyancestors\n") + # Use provided creation date if available, otherwise current time + if self.creation_date: + date_str = self.creation_date.strftime("%d %b %Y").upper() + time_str = self.creation_date.strftime("%H:%M:%S") + else: + date_str = time.strftime("%d %b %Y").upper() + time_str = time.strftime("%H:%M:%S") + + file.write("1 DATE %s\n" % date_str) + file.write("2 TIME %s\n" % time_str) + file.write("1 SUBM @SUBM@\n") + file.write("0 @SUBM@ SUBM\n") + file.write("1 NAME %s\n" % self.display_name) + # file.write("1 LANG %s\n" % self.lang) + + for fid in sorted(self.indi, key=lambda x: self.indi[x].id or ""): + self.indi[fid].print(file) + for fam in sorted(self.fam.values(), key=lambda x: x.id or ""): + fam.print(file) + sources = sorted(self.sources.values(), key=lambda x: x.id or "") + for s in sources: + s.print(file) + # Deduplicate notes by text content before printing + seen_texts = set() + unique_notes = [] + for n in sorted(self.notes, key=lambda x: x.id): + if n.text not in seen_texts: + seen_texts.add(n.text) + unique_notes.append(n) + for n in unique_notes: + n.print(file) + file.write("0 TRLR\n") diff --git a/getmyancestors/classes/tree/elements.py b/getmyancestors/classes/tree/elements.py new file mode 100644 index 0000000..08ca269 --- /dev/null +++ b/getmyancestors/classes/tree/elements.py @@ -0,0 +1,206 @@ +"""Element classes: Name, Place, Ordinance, Citation""" + +import os +import sys +import xml.etree.ElementTree as ET +from typing import Any, Dict, Optional +from xml.etree.ElementTree import Element + +from getmyancestors.classes.constants import ORDINANCES_STATUS + +from .records import Note +from .utils import NAME_MAP, cont + + +class Name: + """GEDCOM Name class""" + + def __init__( + self, data=None, tree=None, owner_fis=None, kind=None, alternative: bool = False + ): + self.given = "" + self.surname = "" + self.prefix = None + self.suffix = None + self.note = None + self.alternative = alternative + self.owner_fis = owner_fis + self.kind = kind + if data: + if "parts" in data["nameForms"][0]: + for z in data["nameForms"][0]["parts"]: + if z["type"] == "http://gedcomx.org/Given": + self.given = z["value"] + if z["type"] == "http://gedcomx.org/Surname": + self.surname = z["value"] + if z["type"] == "http://gedcomx.org/Prefix": + self.prefix = z["value"] + if z["type"] == "http://gedcomx.org/Suffix": + self.suffix = z["value"] + if "changeMessage" in data.get("attribution", {}): + self.note = Note( + data["attribution"]["changeMessage"], + tree, + note_type="Name Note", + ) + + def __str__(self): + return f"{self.given} {self.surname}" + + def __eq__(self, other): + if not isinstance(other, Name): + return NotImplemented + return ( + self.given == other.given + and self.surname == other.surname + and self.prefix == other.prefix + and self.suffix == other.suffix + and self.kind == other.kind + and self.alternative == other.alternative + and (self.note.text if self.note else None) + == (other.note.text if other.note else None) + ) + + def __hash__(self): + return hash( + ( + self.given, + self.surname, + self.prefix, + self.suffix, + self.kind, + self.alternative, + self.note.text if self.note else None, + ) + ) + + def printxml(self, parent_element): + params = {} + if self.kind is not None: + params["type"] = NAME_MAP.get(self.kind, self.kind) + if self.alternative: + params["alt"] = "1" + person_name = ET.SubElement(parent_element, "name", **params) + ET.SubElement(person_name, "first").text = self.given + ET.SubElement(person_name, "surname").text = self.surname + # TODO prefix / suffix + + def print(self, file=sys.stdout, typ=None): + tmp = "1 NAME %s /%s/" % (self.given, self.surname) + if self.suffix: + tmp += " " + self.suffix + file.write(cont(tmp)) + if typ: + file.write("2 TYPE %s\n" % typ) + if self.prefix: + file.write("2 NPFX %s\n" % self.prefix) + if self.note: + self.note.link(file, 2) + + +class Place: + """GEDCOM Place class""" + + counter = 0 + + def __init__( + self, + place_id: str, + name: str, + place_type: Optional[str] = None, + parent: Optional["Place"] = None, + latitude: Optional[float] = None, + longitude: Optional[float] = None, + ): + self._handle: Optional[str] = None + self.name = name + self.type = place_type + self.id = place_id + self.parent = parent + self.latitude = latitude + self.longitude = longitude + + @property + def handle(self): + if not self._handle: + self._handle = "_" + os.urandom(10).hex() + return self._handle + + def print(self, file, indentation=0): + file.write("%d @P%s@ PLAC %s\n" % (indentation, self.id, self.name)) + + def __eq__(self, other): + if not isinstance(other, Place): + return NotImplemented + return self.name == other.name and self.id == other.id + + def __hash__(self): + return hash((self.name, self.id)) + + def printxml(self, parent_element): + place_element = ET.SubElement( + parent_element, + "placeobj", + handle=self.handle, + id=self.id, + type=self.type or "Unknown", + ) + ET.SubElement(place_element, "pname", value=self.name) + if self.parent: + ET.SubElement(place_element, "placeref", hlink=self.parent.handle) + if self.latitude is not None and self.longitude is not None: + ET.SubElement( + place_element, "coord", long=str(self.longitude), lat=str(self.latitude) + ) + + +class Ordinance: + """GEDCOM Ordinance class""" + + def __init__(self, data=None): + self.date = self.temple_code = self.status = self.famc = None + if data: + if "completedDate" in data: + self.date = data["completedDate"] + if "completedTemple" in data: + self.temple_code = data["completedTemple"]["code"] + self.status = data.get("status") + + def print(self, file): + if self.date: + file.write(cont("2 DATE " + self.date)) + if self.temple_code: + file.write("2 TEMP %s\n" % self.temple_code) + if self.status in ORDINANCES_STATUS: + file.write("2 STAT %s\n" % ORDINANCES_STATUS[self.status]) + if self.famc: + file.write("2 FAMC @F%s@\n" % self.famc.num) + + +class Citation: + """Citation class""" + + def __init__(self, data: Dict[str, Any], source): + self._handle: Optional[str] = None + self.id = data["id"] + self.source = source + attr = data.get("attribution", {}) + self.message = attr.get("changeMessage") + self.modified = attr.get("modified") + + @property + def handle(self): + if not self._handle: + self._handle = "_" + os.urandom(10).hex() + return self._handle + + def printxml(self, parent_element: Element): + citation_element = ET.SubElement( + parent_element, + "citation", + handle=self.handle, + change=str(int(self.modified / 1000)), + id="C" + str(self.id), + ) + ET.SubElement(citation_element, "confidence").text = "2" + ET.SubElement(citation_element, "sourceref", hlink=self.source.handle) diff --git a/getmyancestors/classes/tree/records.py b/getmyancestors/classes/tree/records.py new file mode 100644 index 0000000..0443c72 --- /dev/null +++ b/getmyancestors/classes/tree/records.py @@ -0,0 +1,370 @@ +"""Record classes: Note, Source, Fact, Memorie""" + +import hashlib +import os +import sys +import xml.etree.ElementTree as ET +from typing import TYPE_CHECKING, Any, Dict, Optional +from urllib.parse import unquote, unquote_plus +from xml.etree.ElementTree import Element + +from getmyancestors.classes.constants import FACT_EVEN, FACT_TAGS + +from .utils import cont + +if TYPE_CHECKING: + from .core import Tree + + +class Note: + """GEDCOM Note class""" + + def __init__(self, text="", tree=None, num=None, num_prefix=None, note_type=None): + self._handle = None + self.note_type = note_type or "Source Note" + self.num_prefix = num_prefix + self.text = text.strip() + + if num: + self.num = num + else: + # Use hash of text for deterministic ID + self.num = hashlib.md5(self.text.encode("utf-8")).hexdigest()[:10].upper() + + # Restore debug print if verbose + if tree and hasattr(tree, "fs") and getattr(tree.fs, "verbose", False): + print(f"##### Creating Note: {num_prefix}, {self.num}", file=sys.stderr) + + if tree: + if self in tree.notes: + if hasattr(tree, "fs") and getattr(tree.fs, "verbose", False): + preview = ( + self.text[:60].replace("\n", " ") if self.text else "" + ) + print( + f"♻️ Deduplicated {self.note_type}: ID={self.id} Text='{preview}...' (Prefix={self.num_prefix})", + file=sys.stderr, + ) + tree.notes.add(self) + + def __eq__(self, other): + if not isinstance(other, Note): + return False + return self.text == other.text and self.num_prefix == other.num_prefix + + def __hash__(self): + return hash((self.text, self.num_prefix)) + + def __str__(self): + return f"{self.num}. {self.text}" + + @property + def id(self): + return ( + f"{self.num_prefix}_{self.num}" + if self.num_prefix is not None + else str(self.num) + ) + + def print(self, file=sys.stdout): + + # NOTE: print is not passed tree, so we can't check verbose easily unless we store it. + # But Note is simple. Maybe skip this one or check global? + # The user specifically asked for L34. + file.write(cont("0 @N%s@ NOTE %s" % (self.id, self.text))) + + def link(self, file=sys.stdout, level=1): + + file.write("%s NOTE @N%s@\n" % (level, self.id)) + + @property + def handle(self): + if not self._handle: + self._handle = "_" + os.urandom(10).hex() + return self._handle + + def printxml(self, parent_element: Element) -> None: + note_element = ET.SubElement( + parent_element, + "note", + handle=self.handle, + id=self.id, + type="Source Note", + ) + ET.SubElement(note_element, "text").text = self.text + + +class Source: + """GEDCOM Source class""" + + counter: int = 0 + + def __init__(self, data=None, tree=None, num=None): + if num: + self.num = num + else: + Source.counter += 1 + self.num = Source.counter + + self._handle = None + self.tree = tree + self.url = self.citation = self.title = self.fid = None + self.notes = set() + if data: + self.fid = data["id"] + if "about" in data: + self.url = data["about"].replace( + "familysearch.org/platform/memories/memories", + "www.familysearch.org/photos/artifacts", + ) + if "citations" in data: + self.citation = data["citations"][0]["value"] + if "titles" in data: + self.title = data["titles"][0]["value"] + if "notes" in data: + notes = [n["text"] for n in data["notes"] if n["text"]] + for _idx, n in enumerate(notes): + self.notes.add( + Note( + n, + self.tree, + num=None, + note_type="Source Note", + ) + ) + self.modified = data["attribution"]["modified"] + + def __str__(self): + return f"{self.num}. {self.title}" + + @property + def id(self): + return "S" + str(self.fid or self.num) + + @property + def handle(self): + if not self._handle: + self._handle = "_" + os.urandom(10).hex() + return self._handle + + def print(self, file=sys.stdout): + file.write("0 @S%s@ SOUR \n" % self.id) + if self.title: + file.write(cont("1 TITL " + self.title)) + if self.citation: + file.write(cont("1 AUTH " + self.citation)) + if self.url: + file.write(cont("1 PUBL " + self.url)) + for n in sorted(self.notes, key=lambda x: x.id or ""): + n.link(file, 1) + file.write("1 REFN %s\n" % self.fid) + + def link(self, file=sys.stdout, level=1): + file.write("%s SOUR @S%s@\n" % (level, self.id)) + + def printxml(self, parent_element: Element) -> None: + source_element = ET.SubElement( + parent_element, + "source", + handle=self.handle, + change=str(int(self.modified / 1000)), + id=self.id, + ) + if self.title: + ET.SubElement(source_element, "stitle").text = self.title + if self.citation: + ET.SubElement(source_element, "sauthor").text = self.citation + if self.url: + ET.SubElement(source_element, "spubinfo").text = self.url + if self.fid: + ET.SubElement(source_element, "srcattribute", type="REFN", value=self.fid) + + +class Fact: + """GEDCOM Fact class""" + + counter: Dict[str, int] = {} + + def __init__(self, data=None, tree: Optional["Tree"] = None, num_prefix=None): + self.value: Optional[str] = None + self.type: Optional[str] = None + self.date: Optional[str] = None + self.date_type: Optional[str] = None + self.place = None + self.note = None + self.map = None + self._handle = None + if data: + if "value" in data: + self.value = data["value"] + if "type" in data: + self.type = data["type"] + self.fs_type = self.type + if self.type in FACT_EVEN and tree and tree.fs: + # Cast or ignore, FS session dynamic attr _ + self.type = tree.fs._(FACT_EVEN[self.type]) + elif self.type[:6] == "data:,": + self.type = unquote(self.type[6:]) + elif self.type not in FACT_TAGS: + self.type = None + + self.num_prefix = ( + f"{num_prefix}_{FACT_TAGS[self.type]}" + if num_prefix and self.type in FACT_TAGS + else num_prefix + ) + Fact.counter[self.num_prefix or "None"] = ( + Fact.counter.get(self.num_prefix or "None", 0) + 1 + ) + self.num = Fact.counter[self.num_prefix or "None"] + if data: + if "date" in data: + if "formal" in data["date"]: + self.date = data["date"]["formal"].split("+")[-1].split("/")[0] + if data["date"]["formal"].startswith("A+"): + self.date_type = "about" + elif data["date"]["formal"].startswith("/+"): + self.date_type = "before" + elif data["date"]["formal"].endswith("/"): + self.date_type = "after" + else: + self.date = data["date"]["original"] + if "place" in data: + place = data["place"] + place_name = place["original"] + place_id = ( + place["description"][1:] + if "description" in place + and tree + and place["description"][1:] in (tree.places or []) + else None + ) + # Import Place locally to avoid circular import + + if tree: + self.place = tree.ensure_place(place_name, place_id) + if "changeMessage" in data["attribution"]: + self.note = Note( + data["attribution"]["changeMessage"], + tree, + num_prefix="E" + self.num_prefix if self.num_prefix else None, + note_type="Event Note", + ) + if self.type == "http://gedcomx.org/Death" and not ( + self.date or self.place + ): + self.value = "Y" + + if tree: + tree.facts.add(self) + + @property + def id(self): + return ( + f"{self.num_prefix}_{self.num}" + if self.num_prefix is not None + else str(self.num) + ) + + @property + def handle(self): + if not self._handle: + self._handle = "_" + os.urandom(10).hex() + return self._handle + + def __eq__(self, other): + """Facts are equal if type, date, date_type, place, and value match.""" + if not isinstance(other, Fact): + return False + # Compare by semantic content, not object identity + place_name = self.place.name if self.place else None + other_place_name = other.place.name if other.place else None + return ( + self.type == other.type + and self.date == other.date + and self.date_type == other.date_type + and place_name == other_place_name + and self.value == other.value + and (self.note.text if self.note else None) + == (other.note.text if other.note else None) + ) + + def __hash__(self): + """Hash based on semantic content for set deduplication.""" + place_name = self.place.name if self.place else None + return hash( + ( + self.type, + self.date, + self.date_type, + place_name, + self.value, + self.note.text if self.note else None, + ) + ) + + def printxml(self, parent_element): + event_element = ET.SubElement( + parent_element, + "event", + handle=self.handle, + id=self.id, + ) + ET.SubElement(event_element, "type").text = ( + unquote_plus(self.type[len("http://gedcomx.org/") :]) + if self.type and self.type.startswith("http://gedcomx.org/") + else self.type + ) + if self.date: + params: Dict[str, Any] = {"val": self.date} + if self.date_type is not None: + params["type"] = self.date_type + ET.SubElement(event_element, "datestr", **params) + if self.place: + ET.SubElement(event_element, "place", hlink=self.place.handle) + if self.note: + ET.SubElement(event_element, "noteref", hlink=self.note.handle) + + def print(self, file): + if self.type in FACT_TAGS: + tmp = "1 " + FACT_TAGS[self.type] + if self.value: + tmp += " " + self.value + file.write(cont(tmp)) + elif self.type: + file.write("1 EVEN\n2 TYPE %s\n" % self.type) + if self.value: + file.write(cont("2 NOTE Description: " + self.value)) + else: + return + if self.date: + file.write(cont("2 DATE " + self.date)) + if self.place: + self.place.print(file, 2) + if self.map: + latitude, longitude = self.map + file.write("3 MAP\n4 LATI %s\n4 LONG %s\n" % (latitude, longitude)) + if self.note: + self.note.link(file, 2) + + +class Memorie: + """GEDCOM Memorie class""" + + def __init__(self, data=None): + self.description = self.url = None + if data and "links" in data: + self.url = data["about"] + if "titles" in data: + self.description = data["titles"][0]["value"] + if "descriptions" in data: + self.description = ( + "" if not self.description else self.description + "\n" + ) + data["descriptions"][0]["value"] + + def print(self, file): + file.write("1 OBJE\n2 FORM URL\n") + if self.description: + file.write(cont("2 TITL " + self.description)) + if self.url: + file.write(cont("2 FILE " + self.url)) diff --git a/getmyancestors/classes/tree/utils.py b/getmyancestors/classes/tree/utils.py new file mode 100644 index 0000000..a4426f9 --- /dev/null +++ b/getmyancestors/classes/tree/utils.py @@ -0,0 +1,83 @@ +"""Utility constants and functions for tree package""" + +import re + +# Constants +COUNTY = "County" +COUNTRY = "Country" +CITY = "City" + + +NAME_MAP = { + "preferred": "Preferred Name", + "nickname": "Nickname", + "birthname": "Birth Name", + "aka": "Also Known As", + "married": "Married Name", +} + + +GEONAME_FEATURE_MAP = { + "ADM1": COUNTY, # first-order administrative division a primary administrative division of a country, such as a state in the United States + "ADM1H": COUNTY, # historical first-order administrative division a former first-order administrative division + "ADM2": COUNTY, # second-order administrative division a subdivision of a first-order administrative division + "ADM2H": COUNTY, # historical second-order administrative division a former second-order administrative division + "ADM3": COUNTY, # third-order administrative division a subdivision of a second-order administrative division + "ADM3H": COUNTY, # historical third-order administrative division a former third-order administrative division + "ADM4": COUNTY, # fourth-order administrative division a subdivision of a third-order administrative division + "ADM4H": COUNTY, # historical fourth-order administrative division a former fourth-order administrative division + "ADM5": COUNTY, # fifth-order administrative division a subdivision of a fourth-order administrative division + "ADM5H": COUNTY, # historical fifth-order administrative division a former fifth-order administrative division + "ADMD": COUNTY, # administrative division an administrative division of a country, undifferentiated as to administrative level + "ADMDH": COUNTY, # historical administrative division a former administrative division of a political entity, undifferentiated as to administrative level + # 'LTER': leased area a tract of land leased to another country, usually for military installations + "PCL": COUNTRY, # political entity + "PCLD": COUNTRY, # dependent political entity + "PCLF": COUNTRY, # freely associated state + "PCLH": COUNTRY, # historical political entity a former political entity + "PCLI": COUNTRY, # independent political entity + "PCLIX": COUNTRY, # section of independent political entity + "PCLS": COUNTRY, # semi-independent political entity + "PPL": CITY, # populated place a city, town, village, or other agglomeration of buildings where people live and work + "PPLA": CITY, # seat of a first-order administrative division seat of a first-order administrative division (PPLC takes precedence over PPLA) + "PPLA2": CITY, # seat of a second-order administrative division + "PPLA3": CITY, # seat of a third-order administrative division + "PPLA4": CITY, # seat of a fourth-order administrative division + "PPLA5": CITY, # seat of a fifth-order administrative division + "PPLC": CITY, # capital of a political entity + "PPLCH": CITY, # historical capital of a political entity a former capital of a political entity + "PPLF": CITY, # farm village a populated place where the population is largely engaged in agricultural activities + "PPLG": CITY, # seat of government of a political entity + "PPLH": CITY, # historical populated place a populated place that no longer exists + "PPLL": CITY, # populated locality an area similar to a locality but with a small group of dwellings or other buildings + "PPLQ": CITY, # abandoned populated place + "PPLR": CITY, # religious populated place a populated place whose population is largely engaged in religious occupations + "PPLS": CITY, # populated places cities, towns, villages, or other agglomerations of buildings where people live and work + "PPLW": CITY, # destroyed populated place a village, town or city destroyed by a natural disaster, or by war + "PPLX": CITY, # section of populated place +} + + +def cont(string): + """parse a GEDCOM line adding CONT and CONT tags if necessary""" + level = int(string[:1]) + 1 + lines = string.splitlines() + res = [] + max_len = 255 + for line in lines: + c_line = line + to_conc = [] + while len(c_line.encode("utf-8")) > max_len: + index = min(max_len, len(c_line) - 2) + while ( + len(c_line[:index].encode("utf-8")) > max_len + or re.search(r"[ \t\v]", c_line[index - 1 : index + 1]) + ) and index > 1: + index -= 1 + to_conc.append(c_line[:index]) + c_line = c_line[index:] + max_len = 248 + to_conc.append(c_line) + res.append(("\n%s CONC " % level).join(to_conc)) + max_len = 248 + return ("\n%s CONT " % level).join(res) + "\n" diff --git a/getmyancestors/fstogedcom.py b/getmyancestors/fstogedcom.py index db4faef..f4e939b 100644 --- a/getmyancestors/fstogedcom.py +++ b/getmyancestors/fstogedcom.py @@ -4,15 +4,24 @@ # global imports import os import sys -from tkinter import ( - Tk, - PhotoImage, -) + +try: + from tkinter import PhotoImage, Tk +except ImportError: + print("\n" + "=" * 60) + print("ERROR: Tkinter is not available.") + print("=" * 60) + print("The graphical interface requires Tkinter.") + print("\nInstallation instructions:") + print("- Ubuntu/Debian: sudo apt install python3-tk") + print("- Fedora/RHEL: sudo dnf install python3-tkinter") + print("- macOS: brew install python-tk") + print("- Windows: Usually included with Python installation") + print("\n" + "=" * 60) + sys.exit(1) # local imports -from getmyancestors.classes.gui import ( - FStoGEDCOM, -) +from getmyancestors.classes.gui import FStoGEDCOM def main(): diff --git a/getmyancestors/getmyanc.py b/getmyancestors/getmyanc.py new file mode 100755 index 0000000..418af68 --- /dev/null +++ b/getmyancestors/getmyanc.py @@ -0,0 +1,453 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# global imports +from __future__ import print_function + +import asyncio +import getpass +import os +import re +import sys +import time +from datetime import datetime +from typing import List + +import typer + +from getmyancestors.classes.session import CachedSession, GMASession, Session +from getmyancestors.classes.tree import Tree + +app = typer.Typer( + help="Retrieve GEDCOM data from FamilySearch Tree", + add_completion=True, + no_args_is_help=True, + context_settings={"help_option_names": ["-h", "--help"]}, +) + + +@app.command() +def main( + username: str = typer.Option( + None, "-u", "--username", metavar="", help="FamilySearch username" + ), + password: str = typer.Option( + None, "-p", "--password", metavar="", help="FamilySearch password" + ), + individuals: List[str] = typer.Option( + None, + "-i", + "--individuals", + metavar="", + help="List of individual FamilySearch IDs for whom to retrieve ancestors", + ), + exclude: List[str] = typer.Option( + None, + "-e", + "--exclude", + metavar="", + help="List of individual FamilySearch IDs to exclude from the tree", + ), + ascend: int = typer.Option( + 4, "-a", "--ascend", metavar="", help="Number of generations to ascend [4]" + ), + descend: int = typer.Option( + 0, + "-d", + "--descend", + metavar="", + help="Number of generations to descend [0]", + ), + distance: int = typer.Option( + 0, + "--distance", + metavar="", + help="The maxium distance from the starting individuals [0]. If distance is set, ascend and descend will be ignored.", + ), + only_blood_relatives: bool = typer.Option( + False, + "--only-blood-relatives", + help="Only include blood relatives in the tree [False]", + ), + marriage: bool = typer.Option( + False, + "-m", + "--marriage", + help="Add spouses and couples information [False]", + ), + cache: bool = typer.Option( + True, "--cache/--no-cache", help="Enable/Disable http cache [True]" + ), + cache_control: bool = typer.Option( + True, + "--cache-control/--no-cache-control", + help="Disable cache-control (use dumb cache) [True]", + ), + get_contributors: bool = typer.Option( + False, + "-r", + "--get-contributors", + help="Add list of contributors in notes [False]", + ), + get_ordinances: bool = typer.Option( + False, + "-c", + "--get_ordinances", + help="Add LDS ordinances (need LDS account) [False]", + ), + verbose: bool = typer.Option( + False, "-v", "--verbose", help="Increase output verbosity [False]" + ), + timeout: int = typer.Option( + 60, "-t", "--timeout", metavar="", help="Timeout in seconds [60]" + ), + rate_limit: int = typer.Option( + 5, + "-R", + "--rate-limit", + metavar="", + help="Maximum requests per second [5]", + ), + xml: bool = typer.Option( + False, + "-x", + "--xml", + help="To print the output in Gramps XML format [False]", + ), + show_password: bool = typer.Option( + False, "--show-password", help="Show password in .settings file [False]" + ), + save_settings: bool = typer.Option( + False, "--save-settings", help="Save settings into file [False]" + ), + geonames: str = typer.Option( + None, + "-g", + "--geonames", + metavar="", + help="Geonames.org username in order to download place data", + ), + client_id: str = typer.Option( + None, "--client_id", metavar="", help="Use Specific Client ID" + ), + redirect_uri: str = typer.Option( + None, "--redirect_uri", metavar="", help="Use Specific Redirect Uri" + ), + creation_date: str = typer.Option( + None, + "--creation-date", + metavar="", + help="Override creation date in GEDCOM header (YYYY-MM-DDTHH:MM:SS)", + ), + outfile: str = typer.Option( + None, "-o", "--outfile", metavar="", help="output GEDCOM file [stdout]" + ), + logfile: str = typer.Option( + None, "-l", "--logfile", metavar="", help="output log file [stderr]" + ), + extra_individuals: List[str] = typer.Argument(None, hidden=True), +): + """ + Retrieve GEDCOM data from FamilySearch Tree + """ + # NOISY DEBUG FOR CI + if os.environ.get("GMA_DEBUG"): + print( + f"DEBUG: GMA_OFFLINE_MODE={os.environ.get('GMA_OFFLINE_MODE')}", + file=sys.stderr, + ) + print(f"DEBUG: GMA_DEBUG={os.environ.get('GMA_DEBUG')}", file=sys.stderr) + if extra_individuals: + if individuals is None: + individuals = [] + individuals.extend(extra_individuals) + + # dummy translation function + def _(s): + return s + + # Forces stdout to use UTF-8 or at least not crash on unknown characters + if hasattr(sys.stdout, "reconfigure"): + try: + sys.stdout.reconfigure(encoding="utf-8", errors="replace") + except Exception: + pass + + # Manually handle logfile opening (FileType is deprecated) + logfile_handle = None + if logfile: + try: + # pylint: disable=consider-using-with + logfile_handle = open(logfile, "w", encoding="UTF-8") + except OSError as e: + print(f"Could not open logfile: {e}", file=sys.stderr) + raise typer.Exit(code=2) from None + + if individuals: + for fid in individuals: + if not re.fullmatch(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid): + print("Invalid FamilySearch ID: " + fid, file=sys.stderr) + raise typer.Exit(code=1) + if exclude: + for fid in exclude: + if not re.fullmatch(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid): + print("Invalid FamilySearch ID: " + fid, file=sys.stderr) + raise typer.Exit(code=1) + + if not username: + if verbose: + print("⚠️ Warning: getting username from command line, env var not set.") + username = input("Enter FamilySearch username: ") + if not password: + if os.getenv("FAMILYSEARCH_PASS"): + if verbose: + print("✅ Using password from env var.") + password = os.getenv("FAMILYSEARCH_PASS") or "" + else: + if verbose: + print("⚠️ Warning: getting password from command line, env var not set.") + password = getpass.getpass("Enter FamilySearch password: ") + + if verbose: + print("✅ Using username: " + username) + print(f"✅ Using password: {len(password)} digits long.") + + time_count = time.time() + + # Report settings used when getmyancestors is executed + if save_settings and outfile and outfile != "": + + formatting = "{:74}{:\\t>1}\\n" + settings_name = outfile.rsplit(".", 1)[0] + ".settings" + try: + with open(settings_name, "w", encoding="utf-8") as settings_file: + settings_file.write( + formatting.format("time stamp: ", time.strftime("%X %x %Z")) + ) + # Reconstruct args for settings file + # This is a bit manual since we don't have Namespace, but feasible + params = locals() + for key, val in params.items(): + if key in [ + "settings_file", + "formatting", + "settings_name", + "_", + "logfile_handle", + "time_count", + "params", + ]: + continue + if key == "password" and not show_password: + val = "******" + settings_file.write( + formatting.format(f"--{key.replace('_', '-')}", str(val)) + ) + + except OSError as exc: + print( + "Unable to write %s: %s" % (settings_name, repr(exc)), file=sys.stderr + ) + + # initialize a FamilySearch session and a family tree object + print(_("Login to FamilySearch..."), file=sys.stderr) + + # Common params + session_kwargs = { + "username": username, + "password": password, + "client_id": client_id, + "redirect_uri": redirect_uri, + "verbose": verbose, + "logfile": logfile_handle, + "timeout": timeout, + "cache_control": cache_control, + "requests_per_second": rate_limit, + } + + if cache: + print(_("Using cache..."), file=sys.stderr) + fs: GMASession = CachedSession(**session_kwargs) # type: ignore + else: + fs = Session(**session_kwargs) + + if not fs.logged: + raise typer.Exit(code=2) + _ = fs._ + + creation_dt = None + if creation_date: + try: + creation_dt = datetime.fromisoformat(creation_date) + except ValueError: + print( + f"Invalid creation date format: {creation_date}. Expected ISO 8601 (YYYY-MM-DDTHH:MM:SS)", + file=sys.stderr, + ) + raise typer.Exit(code=1) from None + + tree = Tree( + fs, + exclude=exclude, + geonames_key=geonames, + only_blood_relatives=only_blood_relatives, + creation_date=creation_dt, + ) + + # check LDS account + if get_ordinances: + test = fs.get_url( + "/service/tree/tree-data/reservations/person/%s/ordinances" % fs.fid, {} + ) + if not test or test.get("status") != "OK": + raise typer.Exit(code=2) + + success = False + try: + # add list of starting individuals to the family tree + todo_list = individuals if individuals else ([fs.fid] if fs.fid else []) + if not todo_list: + raise typer.Exit(code=1) + print(_("Downloading starting individuals..."), file=sys.stderr) + tree.add_indis(todo_list) + + # download ancestors + if distance == 0: + todo = set(tree.indi.keys()) + done = set() + for i in range(ascend): + if not todo: + break + done |= todo + print( + _("Downloading %s. of generations of ancestors...") % (i + 1), + file=sys.stderr, + ) + todo = tree.add_parents(sorted(todo)) - done + + # download descendants + todo = set(tree.indi.keys()) + done = set() + for i in range(descend): + if not todo: + break + done |= todo + print( + _("Downloading %s. of generations of descendants...") % (i + 1), + file=sys.stderr, + ) + todo = tree.add_children(sorted(todo)) - done + + # download spouses + if marriage: + print( + _("Downloading spouses and marriage information..."), + file=sys.stderr, + ) + todo = set(tree.indi.keys()) + tree.add_spouses(sorted(todo)) + + else: + todo_bloodline = set(tree.indi.keys()) + # TODO: check for regressons here, since we removed a set() + done = set() + for dist in range(distance): + if not todo_bloodline: + break + done |= todo_bloodline + print( + _("Downloading individuals at distance %s...") % (dist + 1), + file=sys.stderr, + ) + parents = tree.add_parents(sorted(todo_bloodline)) - done + children = tree.add_children(sorted(todo_bloodline)) - done + + if marriage: + print( + _("Downloading spouses and marriage information..."), + file=sys.stderr, + ) + todo = set(tree.indi.keys()) + tree.add_spouses(sorted(todo)) + + todo_bloodline = parents | children + + # download ordinances, notes and contributors + async def download_stuff(loop): + futures = set() + for fid, indi in tree.indi.items(): + futures.add(loop.run_in_executor(None, indi.get_notes)) + if get_ordinances: + futures.add(loop.run_in_executor(None, tree.add_ordinances, fid)) + if get_contributors: + futures.add(loop.run_in_executor(None, indi.get_contributors)) + for fam in tree.fam.values(): + futures.add(loop.run_in_executor(None, fam.get_notes)) + if get_contributors: + futures.add(loop.run_in_executor(None, fam.get_contributors)) + for future in futures: + await future + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + print( + _("Downloading notes") + + ( + (("," if get_contributors else _(" and")) + _(" ordinances")) + if get_ordinances + else "" + ) + + (_(" and contributors") if get_contributors else "") + + "...", + file=sys.stderr, + ) + loop.run_until_complete(download_stuff(loop)) + + success = True + + finally: + if logfile_handle: + logfile_handle.close() + + if success: + tree.reset_num() + output_format = "XML" if xml else "GEDCOM" + print(_("Generating output..."), file=sys.stderr) + print( + _("Generating %s with %d individuals...") + % (output_format, len(tree.indi)), + file=sys.stderr, + ) + if xml: + if outfile: + with open(outfile, "wb") as f: + tree.printxml(f) + else: + tree.printxml(sys.stdout.buffer) + else: + if outfile: + with open(outfile, "w", encoding="UTF-8") as f_ged: + tree.print(f_ged) + else: + tree.print(sys.stdout) + + # Statistics printout (abbreviated for brevity) + print( + _( + "Downloaded %s individuals, %s families, %s sources and %s notes " + "in %s seconds with %s HTTP requests." + ) + % ( + str(len(tree.indi)), + str(len(tree.fam)), + str(len(tree.sources)), + str(len(tree.notes)), + str(round(time.time() - time_count)), + str(fs.counter), + ), + file=sys.stderr, + ) + + +if __name__ == "__main__": + app() diff --git a/getmyancestors/getmyancestors.py b/getmyancestors/getmyancestors.py deleted file mode 100644 index 3e13e4c..0000000 --- a/getmyancestors/getmyancestors.py +++ /dev/null @@ -1,377 +0,0 @@ -# coding: utf-8 - -# global imports -from __future__ import print_function -import re -import sys -import time -from urllib.parse import unquote -import getpass -import asyncio -import argparse - -# local imports -from getmyancestors.classes.tree import Tree -from getmyancestors.classes.session import Session -from getmyancestors.classes.session import CachedSession - - -def main(): - parser = argparse.ArgumentParser( - description="Retrieve GEDCOM data from FamilySearch Tree (4 Jul 2016)", - add_help=False, - usage="getmyancestors -u username -p password [options]", - ) - parser.add_argument( - "-u", "--username", metavar="", type=str, help="FamilySearch username" - ) - parser.add_argument( - "-p", "--password", metavar="", type=str, help="FamilySearch password" - ) - parser.add_argument( - "-i", - "--individuals", - metavar="", - nargs="+", - type=str, - help="List of individual FamilySearch IDs for whom to retrieve ancestors", - ) - parser.add_argument( - "-e", - "--exclude", - metavar="", - nargs="+", - type=str, - help="List of individual FamilySearch IDs to exclude from the tree", - ) - parser.add_argument( - "-a", - "--ascend", - metavar="", - type=int, - default=4, - help="Number of generations to ascend [4]", - ) - parser.add_argument( - "-d", - "--descend", - metavar="", - type=int, - default=0, - help="Number of generations to descend [0]", - ) - parser.add_argument( - '--distance', - metavar="", - type=int, - default=0, - help="The maxium distance from the starting individuals [0]. If distance is set, ascend and descend will be ignored.", - ) - parser.add_argument( - '--only-blood-relatives', - action="store_true", - default=True, - help="Only include blood relatives in the tree [False]", - ) - parser.add_argument( - "-m", - "--marriage", - action="store_true", - default=False, - help="Add spouses and couples information [False]", - ) - parser.add_argument( - "--cache", - action="store_true", - default=False, - help="Use of http cache to reduce requests during testing [False]", - ) - parser.add_argument( - "-r", - "--get-contributors", - action="store_true", - default=False, - help="Add list of contributors in notes [False]", - ) - parser.add_argument( - "-c", - "--get_ordinances", - action="store_true", - default=False, - help="Add LDS ordinances (need LDS account) [False]", - ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - default=False, - help="Increase output verbosity [False]", - ) - parser.add_argument( - "-t", - "--timeout", - metavar="", - type=int, - default=60, - help="Timeout in seconds [60]", - ) - - parser.add_argument( - "-x", - "--xml", - action="store_true", - default=False, - help="To print the output in Gramps XML format [False]", - ) - parser.add_argument( - "--show-password", - action="store_true", - default=False, - help="Show password in .settings file [False]", - ) - parser.add_argument( - "--save-settings", - action="store_true", - default=False, - help="Save settings into file [False]", - ) - parser.add_argument( - "-g", - "--geonames", - metavar="", - type=str, - help="Geonames.org username in order to download place data", - ) - try: - parser.add_argument( - "-o", - "--outfile", - metavar="", - # type=argparse.FileType("w", encoding="UTF-8"), - # default=sys.stdout, - help="output GEDCOM file [stdout]", - ) - parser.add_argument( - "-l", - "--logfile", - metavar="", - type=argparse.FileType("w", encoding="UTF-8"), - default=False, - help="output log file [stderr]", - ) - except TypeError: - sys.stderr.write("Python >= 3.4 is required to run this script\n") - sys.stderr.write("(see https://docs.python.org/3/whatsnew/3.4.html#argparse)\n") - sys.exit(2) - - # extract arguments from the command line - try: - parser.error = parser.exit - args = parser.parse_args() - except SystemExit: - parser.print_help(file=sys.stderr) - sys.exit(2) - if args.individuals: - for fid in args.individuals: - if not re.match(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid): - sys.exit("Invalid FamilySearch ID: " + fid) - if args.exclude: - for fid in args.exclude: - if not re.match(r"[A-Z0-9]{4}-[A-Z0-9]{3}", fid): - sys.exit("Invalid FamilySearch ID: " + fid) - - args.username = ( - args.username if args.username else input("Enter FamilySearch username: ") - ) - args.password = ( - args.password - if args.password - else getpass.getpass("Enter FamilySearch password: ") - ) - - time_count = time.time() - - # Report settings used when getmyancestors is executed - if args.save_settings and args.outfile.name != "": - - def parse_action(act): - if not args.show_password and act.dest == "password": - return "******" - value = getattr(args, act.dest) - return str(getattr(value, "name", value)) - - formatting = "{:74}{:\t>1}\n" - settings_name = args.outfile.name.split(".")[0] + ".settings" - try: - with open(settings_name, "w") as settings_file: - settings_file.write( - formatting.format("time stamp: ", time.strftime("%X %x %Z")) - ) - for action in parser._actions: - settings_file.write( - formatting.format( - action.option_strings[-1], parse_action(action) - ) - ) - except OSError as exc: - print( - "Unable to write %s: %s" % (settings_name, repr(exc)), file=sys.stderr - ) - - # initialize a FamilySearch session and a family tree object - print("Login to FamilySearch...", file=sys.stderr) - if args.cache: - print("Using cache...", file=sys.stderr) - fs = CachedSession(args.username, args.password, args.verbose, args.logfile, args.timeout) - else: - fs = Session(args.username, args.password, args.verbose, args.logfile, args.timeout) - if not fs.logged: - sys.exit(2) - _ = fs._ - tree = Tree( - fs, - exclude=args.exclude, - geonames_key=args.geonames, - ) - - # check LDS account - if args.get_ordinances: - test = fs.get_url( - "/service/tree/tree-data/reservations/person/%s/ordinances" % fs.fid, {} - ) - if test["status"] != "OK": - sys.exit(2) - - try: - # add list of starting individuals to the family tree - todo = args.individuals if args.individuals else [fs.fid] - print(_("Downloading starting individuals..."), file=sys.stderr) - tree.add_indis(todo) - - - - # download ancestors - if args.distance == 0: - todo = set(tree.indi.keys()) - done = set() - for i in range(args.ascend): - if not todo: - break - done |= todo - print( - _("Downloading %s. of generations of ancestors...") % (i + 1), - file=sys.stderr, - ) - todo = tree.add_parents(todo) - done - - # download descendants - todo = set(tree.indi.keys()) - done = set() - for i in range(args.descend): - if not todo: - break - done |= todo - print( - _("Downloading %s. of generations of descendants...") % (i + 1), - file=sys.stderr, - ) - todo = tree.add_children(todo) - done - - # download spouses - if args.marriage: - print(_("Downloading spouses and marriage information..."), file=sys.stderr) - todo = set(tree.indi.keys()) - tree.add_spouses(todo) - - else: - todo_bloodline = set(tree.indi.keys()) - todo_others = set() - done = set() - for distance in range(args.distance): - - if not todo_bloodline and not todo_others: - break - done |= todo_bloodline - print( - _("Downloading individuals at distance %s...") % (distance + 1), - file=sys.stderr, - ) - parents = tree.add_parents(todo_bloodline) - done - children = tree.add_children(todo_bloodline) - done - - # download spouses - if args.marriage: - print(_("Downloading spouses and marriage information..."), file=sys.stderr) - todo = set(tree.indi.keys()) - tree.add_spouses(todo) - - # spouses = tree.add_spouses(todo_bloodline) - done - - todo_bloodline = parents | children - # if args.only_blood_relatives: - # # Downloading non bloodline parents - # tree.add_parents(todo_others) - - # # TODO what is a non bloodline person becomes bloodline on another branch? - # todo_others = spouses - # else: - # todo_bloodline |= spouses - - # download ordinances, notes and contributors - async def download_stuff(loop): - futures = set() - for fid, indi in tree.indi.items(): - futures.add(loop.run_in_executor(None, indi.get_notes)) - if args.get_ordinances: - futures.add(loop.run_in_executor(None, tree.add_ordinances, fid)) - if args.get_contributors: - futures.add(loop.run_in_executor(None, indi.get_contributors)) - for fam in tree.fam.values(): - futures.add(loop.run_in_executor(None, fam.get_notes)) - if args.get_contributors: - futures.add(loop.run_in_executor(None, fam.get_contributors)) - for future in futures: - await future - - loop = asyncio.get_event_loop() - print( - _("Downloading notes") - + ( - (("," if args.get_contributors else _(" and")) + _(" ordinances")) - if args.get_ordinances - else "" - ) - + (_(" and contributors") if args.get_contributors else "") - + "...", - file=sys.stderr, - ) - loop.run_until_complete(download_stuff(loop)) - - finally: - # compute number for family relationships and print GEDCOM file - tree.reset_num() - if args.xml: - with open(args.outfile, "wb") as f: - tree.printxml(f) - else: - with open(args.outfile, "w", encoding="UTF-8") as f: - tree.print(f) - print( - _( - "Downloaded %s individuals, %s families, %s sources and %s notes " - "in %s seconds with %s HTTP requests." - ) - % ( - str(len(tree.indi)), - str(len(tree.fam)), - str(len(tree.sources)), - str(len(tree.notes)), - str(round(time.time() - time_count)), - str(fs.counter), - ), - file=sys.stderr, - ) - - -if __name__ == "__main__": - main() diff --git a/getmyancestors/mergemyanc.py b/getmyancestors/mergemyanc.py new file mode 100755 index 0000000..f1426ef --- /dev/null +++ b/getmyancestors/mergemyanc.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +# coding: utf-8 +import os +import sys +from datetime import datetime +from typing import Any, List, Optional + +try: + from typing import Annotated +except ImportError: + from typing_extensions import Annotated + +import typer + +from getmyancestors.classes.gedcom import Gedcom +from getmyancestors.classes.tree import Fam, Indi, Tree + +# Hack to play nice in script mode +sys.path.append(os.path.dirname(sys.argv[0])) + +app = typer.Typer( + help="Merge GEDCOM data from FamilySearch Tree (4 Jul 2016)", + add_completion=True, + no_args_is_help=False, # script might be piped stdin + context_settings={"help_option_names": ["-h", "--help"]}, +) + + +def _warn(msg: str): + """Write a warning message to stderr with optional color (if TTY).""" + use_color = sys.stderr.isatty() or os.environ.get("FORCE_COLOR", "") + if use_color: + sys.stderr.write(f"\033[33m{msg}\033[0m\n") + else: + sys.stderr.write(f"{msg}\n") + + +@app.command() +def main( + files: Annotated[ + Optional[List[str]], + typer.Option("-i", metavar="", help="input GEDCOM files [stdin]"), + ] = None, + outfile: Annotated[ + Optional[str], + typer.Option("-o", metavar="", help="output GEDCOM files [stdout]"), + ] = None, + creation_date: Annotated[ + Optional[str], + typer.Option( + "--creation-date", + metavar="", + help="Override creation date in GEDCOM header (YYYY-MM-DDTHH:MM:SS)", + ), + ] = None, + extra_files: List[str] = typer.Argument(None, hidden=True), +): + """ + Merge GEDCOM data from FamilySearch Tree + """ + if extra_files: + if files is None: + files = [] + files.extend(extra_files) + + # Force generic usage usage help logic if needed, but Typer handles it. + + creation_dt = None + if creation_date: + try: + creation_dt = datetime.fromisoformat(creation_date) + except ValueError: + print( + f"Invalid creation date format: {creation_date}. Expected ISO 8601 (YYYY-MM-DDTHH:MM:SS)", + file=sys.stderr, + ) + raise typer.Exit(code=1) from None + + tree = Tree(creation_date=creation_dt) + + # Track used IDs to prevent collisions when merging multiple files + used_indi_nums = set() + used_fam_nums = set() + + # Determine input sources + input_handles: List[Any] = [] + if files: + for fpath in files: + try: + # Open in read mode with utf-8 encoding + # pylint: disable=consider-using-with + f = open(fpath, "r", encoding="UTF-8") + input_handles.append(f) + except OSError as e: + print(f"Error opening file {fpath}: {e}", file=sys.stderr) + raise typer.Exit(code=2) from None + else: + # Default to stdin + input_handles.append(sys.stdin) + + try: + # read the GEDCOM data + for file in input_handles: + # Determine filename for logging + filename = getattr(file, "name", "stdin") + # If it's a relative path, might want basename to keep it short + if filename != "stdin": + filename = os.path.basename(filename) + + ged = Gedcom(file, tree) + + # Deduplicate names by string representation + def merge_names(target_set, source_set): + target_set.update(source_set) + + # Helper for whitespace normalization in quotes + def norm_space(s): + return " ".join(s.split()) if s else "" + + # add information about individuals + new_indi = 0 + merged_indi = 0 + for fid, indi in sorted(ged.indi.items()): + if fid not in tree.indi: + new_indi += 1 + + # Try to reuse the original GEDCOM ID (indi.num) + # If it collides with an existing ID in the merged tree, generate a new one + candidate_num = indi.num + original_candidate = candidate_num + suffix_counter = 1 + while candidate_num in used_indi_nums: + # Collision detected! Append suffix + candidate_num = f"{original_candidate}_{suffix_counter}" + suffix_counter += 1 + + used_indi_nums.add(candidate_num) + tree.indi[fid] = Indi(indi.fid, tree, num=candidate_num) + + # Track origin file + tree.indi[fid].origin_file = filename + else: + merged_indi += 1 + + # UNION data from both sources (superset) + tree.indi[fid].fams_fid |= indi.fams_fid + tree.indi[fid].famc_fid |= indi.famc_fid + + merge_names(tree.indi[fid].birthnames, indi.birthnames) + merge_names(tree.indi[fid].nicknames, indi.nicknames) + merge_names(tree.indi[fid].aka, indi.aka) + merge_names(tree.indi[fid].married, indi.married) + + # Deduplicate facts by type/date/value/place + existing_facts = { + (f.type, f.date, f.value, f.place.name if f.place else None) + for f in tree.indi[fid].facts + } + # Sort facts to ensure deterministic winner on collision + for f in sorted( + indi.facts, + key=lambda fa: ( + fa.type or "", + fa.date or "", + fa.value or "", + fa.place.name if fa.place else "", + fa.note.text if fa.note else "", + ), + ): + fact_key = ( + f.type, + f.date, + f.value, + f.place.name if f.place else None, + ) + if fact_key not in existing_facts: + tree.indi[fid].facts.add(f) + existing_facts.add(fact_key) + + # Manually merge notes to avoid duplication by text content + # Sort notes for consistent order (though order in SET doesn't matter, processing order might) + for n in sorted(indi.notes, key=lambda note: note.text or ""): + is_dup = any(x.text == n.text for x in tree.indi[fid].notes) + if not is_dup: + tree.indi[fid].notes.add(n) + + # Deduplicate sources by (source.fid, normalized_quote) + existing_sources = { + (s.fid, norm_space(q)) for s, q in tree.indi[fid].sources + } + # Sort sources + for s, q in sorted( + indi.sources, + key=lambda src: ( + src[0].title or "", + src[0].fid or "", + src[1] or "", + ), + ): + source_key = (s.fid, norm_space(q)) + if source_key not in existing_sources: + tree.indi[fid].sources.add((s, q)) + existing_sources.add(source_key) + + # Deduplicate memories by URL (primary) or Description (fallback) + def get_mem_key(mem): + return mem.url if mem.url else (None, mem.description) + + existing_memories = {get_mem_key(m) for m in tree.indi[fid].memories} + # Sort memories + for m in sorted( + indi.memories, + key=lambda mem: (mem.url or "", mem.description or ""), + ): + key = get_mem_key(m) + if key not in existing_memories: + tree.indi[fid].memories.add(m) + existing_memories.add(key) + + # Update ordinance fields if they are missing in the target + if not tree.indi[fid].baptism: + tree.indi[fid].baptism = indi.baptism + if not tree.indi[fid].confirmation: + tree.indi[fid].confirmation = indi.confirmation + if not tree.indi[fid].initiatory: + tree.indi[fid].initiatory = indi.initiatory + if not tree.indi[fid].endowment: + tree.indi[fid].endowment = indi.endowment + if not tree.indi[fid].sealing_child: + tree.indi[fid].sealing_child = indi.sealing_child + + # Only update simple fields if they are missing (first file wins for stability) + if not tree.indi[fid].name: + tree.indi[fid].name = indi.name + if not tree.indi[fid].gender: + tree.indi[fid].gender = indi.gender + + # add information about families + # Key by fam.fid to preserve unique family records + # (keying by (husb, wife) incorrectly merges different families with same parents) + new_fam = 0 + merged_fam = 0 + for fid, fam in sorted(ged.fam.items()): + if fid not in tree.fam: + new_fam += 1 + + # Try to reuse the original GEDCOM ID (fam.num) + candidate_num = fam.num + original_candidate = candidate_num + suffix_counter = 1 + while candidate_num in used_fam_nums: + candidate_num = f"{original_candidate}_{suffix_counter}" + suffix_counter += 1 + + used_fam_nums.add(candidate_num) + + tree.fam[fid] = Fam( + tree.indi.get(fam.husb_fid), + tree.indi.get(fam.wife_fid), + tree, + candidate_num, + ) + tree.fam[fid].tree = tree + # Track origin file + tree.fam[fid].origin_file = filename + + # Copy husb_fid/wife_fid for proper linking later + tree.fam[fid].husb_fid = fam.husb_fid + tree.fam[fid].wife_fid = fam.wife_fid + else: + merged_fam += 1 + + # UNION data + # Deduplicate facts + existing_facts = { + (f.type, f.date, f.value, f.place.name if f.place else None) + for f in tree.fam[fid].facts + } + for f in sorted( + fam.facts, + key=lambda fa: ( + fa.type or "", + fa.date or "", + fa.value or "", + fa.place.name if fa.place else "", + fa.note.text if fa.note else "", + ), + ): + fact_key = ( + f.type, + f.date, + f.value, + f.place.name if f.place else None, + ) + if fact_key not in existing_facts: + tree.fam[fid].facts.add(f) + existing_facts.add(fact_key) + + # Manually merge notes + for n in sorted(fam.notes, key=lambda note: note.text or ""): + if not any(x.text == n.text for x in tree.fam[fid].notes): + tree.fam[fid].notes.add(n) + + # Deduplicate sources + + existing_sources = { + (s.fid, norm_space(q)) for s, q in tree.fam[fid].sources + } + for s, q in sorted( + fam.sources, + key=lambda src: ( + src[0].title or "", + src[0].fid or "", + src[1] or "", + ), + ): + source_key = (s.fid, norm_space(q)) + if source_key not in existing_sources: + tree.fam[fid].sources.add((s, q)) + existing_sources.add(source_key) + + if not tree.fam[fid].sealing_spouse: + tree.fam[fid].sealing_spouse = fam.sealing_spouse + + if not tree.fam[fid].fid: + tree.fam[fid].fid = fam.fid + + # Always merge children - set union prevents duplicates + tree.fam[fid].chil_fid |= fam.chil_fid + + # Notes already have stable IDs from content hashing in classes/tree/records.py + # No renumbering needed. + + # Link families to individuals and vice versa + # This creates the actual object references needed for GEDCOM output + for _fam_fid, fam in tree.fam.items(): + # Link husband to this family + if fam.husb_fid and fam.husb_fid in tree.indi: + fam.husband = tree.indi[fam.husb_fid] + tree.indi[fam.husb_fid].fams.add(fam) + # Link wife to this family + if fam.wife_fid and fam.wife_fid in tree.indi: + fam.wife = tree.indi[fam.wife_fid] + tree.indi[fam.wife_fid].fams.add(fam) + # Link children to this family + for chil_fid in fam.chil_fid: + if chil_fid in tree.indi: + fam.children.add(tree.indi[chil_fid]) + tree.indi[chil_fid].famc.add(fam) + + # compute number for family relationships and print GEDCOM file + tree.reset_num() + + if outfile: + try: + with open(outfile, "w", encoding="UTF-8") as out: + tree.print(out) + except OSError as e: + print(f"Error opening output file {outfile}: {e}", file=sys.stderr) + raise typer.Exit(code=2) from None + else: + tree.print(sys.stdout) + + finally: + # Close handles that are not stdin + for f in input_handles: + if f is not sys.stdin: + f.close() + + +if __name__ == "__main__": + app() diff --git a/getmyancestors/mergemyancestors.py b/getmyancestors/mergemyancestors.py deleted file mode 100644 index b650a67..0000000 --- a/getmyancestors/mergemyancestors.py +++ /dev/null @@ -1,123 +0,0 @@ -# coding: utf-8 - -from __future__ import print_function - -# global imports -import os -import sys -import argparse - -# local imports -from getmyancestors.classes.tree import Indi, Fam, Tree -from getmyancestors.classes.gedcom import Gedcom - -sys.path.append(os.path.dirname(sys.argv[0])) - - -def main(): - parser = argparse.ArgumentParser( - description="Merge GEDCOM data from FamilySearch Tree (4 Jul 2016)", - add_help=False, - usage="mergemyancestors -i input1.ged input2.ged ... [options]", - ) - try: - parser.add_argument( - "-i", - metavar="", - nargs="+", - type=argparse.FileType("r", encoding="UTF-8"), - default=[sys.stdin], - help="input GEDCOM files [stdin]", - ) - parser.add_argument( - "-o", - metavar="", - nargs="?", - type=argparse.FileType("w", encoding="UTF-8"), - default=sys.stdout, - help="output GEDCOM files [stdout]", - ) - except TypeError: - sys.stderr.write("Python >= 3.4 is required to run this script\n") - sys.stderr.write("(see https://docs.python.org/3/whatsnew/3.4.html#argparse)\n") - exit(2) - - # extract arguments from the command line - try: - parser.error = parser.exit - args = parser.parse_args() - except SystemExit as e: - print(e.code) - parser.print_help() - exit(2) - - tree = Tree() - - indi_counter = 0 - fam_counter = 0 - - # read the GEDCOM data - for file in args.i: - ged = Gedcom(file, tree) - - # add information about individuals - for num in ged.indi: - fid = ged.indi[num].fid - if fid not in tree.indi: - indi_counter += 1 - tree.indi[fid] = Indi(ged.indi[num].fid, tree, num=indi_counter) - tree.indi[fid].fams_fid |= ged.indi[num].fams_fid - tree.indi[fid].famc_fid |= ged.indi[num].famc_fid - tree.indi[fid].name = ged.indi[num].name - tree.indi[fid].birthnames = ged.indi[num].birthnames - tree.indi[fid].nicknames = ged.indi[num].nicknames - tree.indi[fid].aka = ged.indi[num].aka - tree.indi[fid].married = ged.indi[num].married - tree.indi[fid].gender = ged.indi[num].gender - tree.indi[fid].facts = ged.indi[num].facts - tree.indi[fid].notes = ged.indi[num].notes - tree.indi[fid].sources = ged.indi[num].sources - tree.indi[fid].memories = ged.indi[num].memories - tree.indi[fid].baptism = ged.indi[num].baptism - tree.indi[fid].confirmation = ged.indi[num].confirmation - tree.indi[fid].initiatory = ged.indi[num].initiatory - tree.indi[fid].endowment = ged.indi[num].endowment - if not (tree.indi[fid].sealing_child and tree.indi[fid].sealing_child.famc): - tree.indi[fid].sealing_child = ged.indi[num].sealing_child - - # add information about families - for num in ged.fam: - husb, wife = (ged.fam[num].husb_fid, ged.fam[num].wife_fid) - if (husb, wife) not in tree.fam: - fam_counter += 1 - tree.fam[(husb, wife)] = Fam(husb, wife, tree, fam_counter) - tree.fam[(husb, wife)].tree = tree - tree.fam[(husb, wife)].chil_fid |= ged.fam[num].chil_fid - if ged.fam[num].fid: - tree.fam[(husb, wife)].fid = ged.fam[num].fid - if ged.fam[num].facts: - tree.fam[(husb, wife)].facts = ged.fam[num].facts - if ged.fam[num].notes: - tree.fam[(husb, wife)].notes = ged.fam[num].notes - if ged.fam[num].sources: - tree.fam[(husb, wife)].sources = ged.fam[num].sources - tree.fam[(husb, wife)].sealing_spouse = ged.fam[num].sealing_spouse - - # merge notes by text - tree.notes = sorted(tree.notes, key=lambda x: x.text) - for i, n in enumerate(tree.notes): - if i == 0: - n.num = 1 - continue - if n.text == tree.notes[i - 1].text: - n.num = tree.notes[i - 1].num - else: - n.num = tree.notes[i - 1].num + 1 - - # compute number for family relationships and print GEDCOM file - tree.reset_num() - tree.print(args.o) - - -if __name__ == "__main__": - main() diff --git a/getmyancestors/tests/__init__.py b/getmyancestors/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/getmyancestors/tests/conftest.py b/getmyancestors/tests/conftest.py new file mode 100644 index 0000000..b87f8e8 --- /dev/null +++ b/getmyancestors/tests/conftest.py @@ -0,0 +1,88 @@ +import os +from unittest.mock import MagicMock, patch + +import pytest + +from getmyancestors.classes.session import Session + + +@pytest.fixture +def mock_session(): + """ + Creates a Session object where the network layer is mocked out. + """ + with patch("getmyancestors.classes.session.Session.login"): + session = Session("test_user", "test_pass", verbose=False) + + # Mock cookies + session.cookies.update({"fssessionid": "mock_session_id", "XSRF-TOKEN": "mock_token"}) # type: ignore + + # Mock session attributes required by Tree + session.lang = "en" # type: ignore + session.fid = "KW7V-Y32" # type: ignore + + # Mock the network methods + session.get = MagicMock() # type: ignore + session.post = MagicMock() # type: ignore + session.get_url = MagicMock() # type: ignore + + # Mock the translation method + session._ = lambda s: s # type: ignore + + yield session + + +@pytest.fixture +def sample_person_json(): + return { + "persons": [ + { + "id": "KW7V-Y32", + "living": False, + "display": { + "name": "John Doe", + "gender": "Male", + "lifespan": "1900-1980", + }, + "facts": [ + { + "type": "http://gedcomx.org/Birth", + "date": {"original": "1 Jan 1900"}, + "place": {"original": "New York"}, + "attribution": {"changeMessage": "Initial import"}, + } + ], + "names": [ + { + "nameForms": [{"fullText": "John Doe"}], + "preferred": True, + "type": "http://gedcomx.org/BirthName", + "attribution": {"changeMessage": "Initial import"}, + } + ], + "attribution": {"changeMessage": "Initial import"}, + } + ] + } + + +@pytest.fixture +def mock_user_data(): + return { + "users": [ + { + "personId": "KW7V-Y32", + "preferredLanguage": "en", + "displayName": "Test User", + } + ] + } + + +@pytest.fixture(autouse=True) +def suppress_license_prompt(): + """Automatically suppress license prompt for all tests""" + with patch.dict( + os.environ, {"GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT": "1"} + ): + yield diff --git a/getmyancestors/tests/test_cache_concurrency.py b/getmyancestors/tests/test_cache_concurrency.py new file mode 100644 index 0000000..7377491 --- /dev/null +++ b/getmyancestors/tests/test_cache_concurrency.py @@ -0,0 +1,117 @@ +import os +import shutil +import unittest +from concurrent.futures import ThreadPoolExecutor + +import requests +from requests_cache import CachedSession + + +class TestCacheConcurrency(unittest.TestCase): + def setUp(self): + self.cache_name = ".tmp/test_concurrency_cache" + self.backend = "filesystem" + # Ensure clean state + if os.path.exists(self.cache_name): + shutil.rmtree(self.cache_name, ignore_errors=True) + os.makedirs(".tmp", exist_ok=True) + + def tearDown(self): + if os.path.exists(self.cache_name): + shutil.rmtree(self.cache_name, ignore_errors=True) + + def test_concurrent_writes(self): + """ + Verify thread safety with concurrent writes using 'filesystem' backend. + This backend handles concurrency via file locking and is much more robust than SQLite for this use case. + """ + + # Filesystem backend doesn't need check_same_thread + with CachedSession( + self.cache_name, backend=self.backend, expire_after=3600 + ) as session: + + exceptions = [] + + def stress_cache(i): + try: + # Simulate "Check Cache" -> "Write Cache" race + key = f"key_{i}" + if not session.cache.contains(key): + # Create a REAL response object to avoid mock serialization errors + response = requests.Response() + response.status_code = 200 + # pylint: disable=protected-access + response._content = b"test" + response.url = "http://test.com" + + # Attach dummy request for serialization + req = requests.Request( + method="GET", url="http://test.com" + ).prepare() + response.request = req + + # Mock raw response for requests-cache compatibility + class MockRaw: + _request_url = "http://test.com" + + def read( + self, *args, **kwargs + ): # pylint: disable=unused-argument + return b"" + + def close(self): + pass + + def stream( + self, *args, **kwargs + ): # pylint: disable=unused-argument + return [] + + response.raw = MockRaw() + + # Write to cache + session.cache.save_response(response, key) + except Exception as e: + exceptions.append(e) + + # Run concurrent threads with 10 threads + with ThreadPoolExecutor(max_workers=10) as executor: + for i in range(100): + executor.submit(stress_cache, i) + + # Filter out known transient errors from requests-cache filesystem backend + # These can occur under heavy concurrent writes but don't indicate real bugs + # Note: requests-cache uses SQLite internally even with filesystem backend for metadata + transient_errors = ["bad parameter", "database is locked"] + real_exceptions = [ + e + for e in exceptions + if not any(msg in str(e).lower() for msg in transient_errors) + ] + + # Count transient errors - fail if too many (potential real issue) + transient_count = len(exceptions) - len(real_exceptions) + transient_threshold = 10 # More than 10% of 100 requests = potential issue + + if real_exceptions: + print(f"Encountered {len(real_exceptions)} real exceptions:") + unique_errors = set(str(e) for e in real_exceptions) + for e in unique_errors: + print(f"- {e}") + self.fail(f"Concurrency test failed with {len(real_exceptions)} exceptions") + elif transient_count > transient_threshold: + # Too many transient errors may indicate a real problem + self.fail( + f"Too many transient errors ({transient_count} > {transient_threshold}), " + "may indicate cache corruption" + ) + elif transient_count > 0: + # Log but don't fail for small number of transient errors + print( + f"Note: {transient_count} transient cache errors (expected under heavy threading)" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/getmyancestors/tests/test_cli.py b/getmyancestors/tests/test_cli.py new file mode 100644 index 0000000..2d1da72 --- /dev/null +++ b/getmyancestors/tests/test_cli.py @@ -0,0 +1,63 @@ +import traceback +from unittest.mock import patch + +from typer.testing import CliRunner + +from getmyancestors.getmyanc import app + +runner = CliRunner() + + +class TestCLI: + + @patch("getmyancestors.getmyanc.Session") + @patch("getmyancestors.getmyanc.CachedSession") + @patch("getmyancestors.getmyanc.Tree") + def test_basic_args(self, mock_tree, mock_cached_session, _mock_session): + """Test that arguments are parsed and passed to classes correctly""" + + # Typer/Click arguments (no need for program name "getmyancestors" in list) + test_args = [ + "-u", + "myuser", + "-p", + "mypass", + "-i", + "KW7V-Y32", + "--verbose", + ] + + # Setup the session to appear logged in + mock_cached_session.return_value.logged = True + + result = runner.invoke(app, test_args) + if result.exc_info: + traceback.print_exception(*result.exc_info) + + # Verify exit code + assert result.exit_code == 0 + + # Verify Session was initialized with CLI args + mock_cached_session.assert_called_once() + _args, kwargs = mock_cached_session.call_args + assert kwargs["username"] == "myuser" + assert kwargs["password"] == "mypass" + assert kwargs["verbose"] is True + assert kwargs["cache_control"] is True + + # Verify Tree started + # Typer parses "-i KW..." into a list + mock_tree.return_value.add_indis.assert_called_with(["KW7V-Y32"]) + + def test_arg_validation(self): + """Test that invalid ID formats cause an exit""" + test_args = ["-u", "u", "-p", "p", "-i", "BAD_ID"] + + result = runner.invoke(app, test_args) + print("STDOUT:", result.stdout) + + # Should exit with code 1 due to validation error + assert result.exit_code == 1 + # Click/Typer might print to stdout or stderr depending on context/runner + output = result.stdout + (result.stderr if result.stderr else "") + assert "Invalid FamilySearch ID: BAD_ID" in output diff --git a/getmyancestors/tests/test_fork_features.py b/getmyancestors/tests/test_fork_features.py new file mode 100644 index 0000000..e445b79 --- /dev/null +++ b/getmyancestors/tests/test_fork_features.py @@ -0,0 +1,207 @@ +import unittest +from unittest.mock import MagicMock, patch + +from getmyancestors.classes.constants import FACT_TAGS +from getmyancestors.classes.tree.core import Fam, Indi, Tree + + +class TestForkFeatures(unittest.TestCase): + def setUp(self): + self.mock_session = MagicMock() + self.mock_session.lang = "en" # Needed for Tree init + self.tree = Tree(self.mock_session) + assert self.tree.fs is not None + self.tree.fs._ = lambda x: x # type: ignore # Mock translation + + def _setup_mock_api(self, changelog=None, agent_data=None): + """Helper to mock API responses""" + + def side_effect(url, _headers=None): + if "changes" in url: + return changelog + if "agents" in url: + # Naive matching for test simplicity + return agent_data + return None + + assert self.tree.fs is not None + self.tree.fs.get_url = MagicMock(side_effect=side_effect) # type: ignore + + def test_immigration_tag(self): + """Verify Immigration tag mapping exists""" + self.assertIn("http://gedcomx.org/Immigration", FACT_TAGS) + self.assertEqual(FACT_TAGS["http://gedcomx.org/Immigration"], "IMMI") + + def test_exclude_logic_parents(self): + """Verify add_parents respects exclude list""" + # Setup: Main person I1, Parent I2 + i1 = Indi("I1", self.tree) + self.tree.indi["I1"] = i1 + + # Manually populate parents list for I1 + i1.parents = {("I2", "I3")} # Father, Mother + + # Case 1: No exclude + self.tree.exclude = [] + with patch.object(self.tree, "add_indis") as mock_add_indis: + self.tree.add_parents({"I1"}) + # verify add_indis called with {"I2", "I3"} + args, _ = mock_add_indis.call_args + self.assertEqual(args[0], {"I2", "I3"}) + + # Case 2: Exclude I2 + self.tree.exclude = ["I2"] + with patch.object(self.tree, "add_indis") as mock_add_indis: + self.tree.add_parents({"I1"}) + # verify add_indis called with {"I3"} only + args, _ = mock_add_indis.call_args + self.assertEqual(args[0], {"I3"}) + + def test_exclude_logic_children(self): + """Verify add_children respects exclude list""" + # Setup: Main person I1, Child I4 + i1 = Indi("I1", self.tree) + self.tree.indi["I1"] = i1 + + # Manually populate children + i1.children = {("I1", "I3", "I4"), ("I1", "I3", "I5")} + + # Case 1: No exclude + self.tree.exclude = [] + with patch.object(self.tree, "add_indis") as mock_add_indis: + self.tree.add_children({"I1"}) + mock_add_indis.assert_called() + args, _ = mock_add_indis.call_args + self.assertTrue("I4" in args[0]) + self.assertTrue("I5" in args[0]) + + # Case 2: Exclude I5 (filter out filtered_indis) + self.tree.exclude = ["I5"] + with patch.object(self.tree, "add_indis") as mock_add_indis: + self.tree.add_children({"I1"}) + args, _ = mock_add_indis.call_args + self.assertTrue("I4" in args[0]) + self.assertFalse("I5" in args[0]) + + def test_get_contributors(self): + """Verify get_contributors fetches and parses agent data""" + # Setup Indi + i1 = Indi("I1", self.tree) + self.tree.indi["I1"] = i1 + + # Mock API responses + # 1. Changelog + changelog = { + "entries": [ + { + "contributors": [ + { + "name": "AgentName", + "uri": "https://www.familysearch.org/agents/123", + } + ] + } + ] + } + # 2. Agent Data + agent_data = { + "agents": [ + { + "names": [{"value": "Real Name"}], + "emails": [{"resource": "mailto:test@example.com"}], + "phones": [{"resource": "tel:555-1234"}], + } + ] + } + + def side_effect(url, _headers=None): + if "changes" in url: + return changelog + if "agents/123" in url: + return agent_data + return None + + assert self.tree.fs is not None + self.tree.fs.get_url = MagicMock(side_effect=side_effect) # type: ignore + + # Action + i1.get_contributors() + + # Verify + self.assertEqual(len(i1.notes), 1) + note = list(i1.notes)[0] + self.assertIn("AgentName", note.text) + self.assertIn("Real Name", note.text) # Display name + self.assertIn("test@example.com", note.text) + self.assertIn("555-1234", note.text) + + def test_get_contributors_family(self): + """Verify get_contributors works for Families""" + fam = Fam(tree=self.tree, num="F1") + fam.fid = "F1" + self.tree.fam["F1"] = fam + + changelog = { + "entries": [ + { + "contributors": [ + { + "name": "FamAgent", + "uri": "https://www.familysearch.org/agents/456", + } + ] + } + ] + } + agent_data = { + "agents": [{"names": [{"value": "Fam Agent"}], "emails": [], "phones": []}] + } + + self._setup_mock_api(changelog, agent_data) + + fam.get_contributors() + + self.assertEqual(len(fam.notes), 1) + note = list(fam.notes)[0] + self.assertIn("FamAgent", note.text) + self.assertIn("Fam Agent", note.text) + + def test_get_contributors_duplicates_and_missing(self): + """Verify duplicate contributors are deduped and missing fields handled""" + i1 = Indi("I1", self.tree) + self.tree.indi["I1"] = i1 + + # Two entries, same agent + changelog = { + "entries": [ + { + "contributors": [ + { + "name": "AgentX", + "uri": "https://www.familysearch.org/agents/X", + } + ] + }, + { + "contributors": [ + { + "name": "AgentX", + "uri": "https://www.familysearch.org/agents/X", + } + ] + }, + ] + } + # Agent has no email/phone + agent_data = { + "agents": [{"names": [{"value": "Agent X"}], "emails": [], "phones": []}] + } + + self._setup_mock_api(changelog, agent_data) + + i1.get_contributors() + + note = list(i1.notes)[0] + # Should only list AgentX once + self.assertEqual(note.text.count("AgentX"), 1) + # Should not crash on missing email/phone diff --git a/getmyancestors/tests/test_gedcom_logic.py b/getmyancestors/tests/test_gedcom_logic.py new file mode 100644 index 0000000..ca0f20a --- /dev/null +++ b/getmyancestors/tests/test_gedcom_logic.py @@ -0,0 +1,118 @@ +import io +import unittest + +from getmyancestors.classes.gedcom import Gedcom +from getmyancestors.classes.tree import Fact, Indi, Name, Tree + +SAMPLE_GEDCOM = """0 HEAD +1 CHAR UTF-8 +1 GEDC +2 VERS 5.5.1 +2 FORM LINEAGE-LINKED +0 @I1@ INDI +1 NAME John /Doe/ +2 GIVN John +2 SURN Doe +1 SEX M +1 BIRT +2 DATE 1 JAN 1980 +2 PLAC Springfield +1 FAMC @F1@ +1 _FSFTID KW7V-Y32 +0 @I2@ INDI +1 NAME Jane /Smith/ +1 SEX F +1 FAMS @F1@ +1 _FSFTID KW7V-Y33 +0 @F1@ FAM +1 HUSB @I1@ +1 WIFE @I2@ +1 CHIL @I3@ +1 _FSFTID F123-456 +0 @I3@ INDI +1 NAME Baby /Doe/ +1 SEX M +1 FAMC @F1@ +1 _FSFTID KW7V-Y34 +0 TRLR +""" + + +class TestGedcomLogic(unittest.TestCase): + def test_parse_gedcom(self): + """Test parsing of a GEDCOM string using the Gedcom class.""" + f = io.StringIO(SAMPLE_GEDCOM) + tree = Tree() + + # The Gedcom class takes a file-like object and a tree + ged = Gedcom(f, tree) + + # Verify Individuals + # The parser seems to use the number from @I{num}@ as the key in ged.indi + self.assertIn("1", ged.indi) + self.assertIn("2", ged.indi) + self.assertIn("3", ged.indi) + + john = ged.indi["1"] + self.assertEqual(john.gender, "M") + self.assertEqual(john.fid, "KW7V-Y32") + + # Check Name - The parsing logic for names is a bit complex in __get_name + # It populates birthnames by default if no type is specified + # BUT the first name found is assigned to self.name, NOT birthnames + self.assertIsNotNone(john.name) + self.assertEqual(john.name.given, "John") + self.assertEqual(john.name.surname, "Doe") + + # Verify birthnames if any additional names present (none in this sample) + # self.assertTrue(len(john.birthnames) > 0) + + # Verify Family + self.assertIn("1", ged.fam) + fam = ged.fam["1"] + self.assertEqual(fam.husb_num, "1") # Points to I1 + self.assertEqual(fam.wife_num, "2") # Points to I2 + self.assertIn("3", fam.chil_num) # Points to I3 + self.assertEqual(fam.fid, "F123-456") + + def test_tree_export(self): + """Test that a Tree object can be exported to GEDCOM format.""" + tree = Tree() + tree.display_name = "Test User" + tree.lang = "en" + + # Create Individual + indi = Indi("KW7V-Y32", tree, num=1) + indi.gender = "M" + + name = Name() + name.given = "John" + name.surname = "Doe" + # name.full = "John Doe" # Removed: Name class has no 'full' attribute + indi.birthnames.add(name) + + fact = Fact() + fact.type = "http://gedcomx.org/Birth" + fact.date = "1 JAN 1980" + fact.place = tree.ensure_place("Springfield") + indi.facts.add(fact) + + tree.indi["KW7V-Y32"] = indi + + # Validate output + output = io.StringIO() + tree.print(output) + content = output.getvalue() + + self.assertIn("0 HEAD", content) + self.assertIn("1 NAME John /Doe/", content) + # ID is derived from fid if present + self.assertIn("0 @IKW7V-Y32@ INDI", content) + self.assertIn("1 SEX M", content) + self.assertIn("1 BIRT", content) + self.assertIn("2 DATE 1 JAN 1980", content) + self.assertIn("0 TRLR", content) + + +if __name__ == "__main__": + unittest.main() diff --git a/getmyancestors/tests/test_integration.py b/getmyancestors/tests/test_integration.py new file mode 100644 index 0000000..ad902d0 --- /dev/null +++ b/getmyancestors/tests/test_integration.py @@ -0,0 +1,189 @@ +import json +import os +import traceback +import unittest +from unittest.mock import MagicMock, PropertyMock, patch + +import requests +from requests.models import PreparedRequest, Response +from typer.testing import CliRunner + +from getmyancestors import getmyanc as getmyancestors + +runner = CliRunner() + + +class TestFullIntegration(unittest.TestCase): + @patch.dict( + os.environ, {"GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT": "1"} + ) + @patch("getmyancestors.classes.session.LimiterAdapter") + # @patch("builtins.print") + @patch( + "getmyancestors.classes.session.GMASession.login", autospec=True + ) # Mock login to prevent network calls + @patch( + "getmyancestors.classes.session.GMASession.logged", new_callable=PropertyMock + ) + @patch("requests.Session.get") + @patch("requests.Session.post") + def test_main_execution( + self, + mock_post, + mock_get, + mock_logged, + mock_login, + # mock_print, + mock_adapter, + ): + """ + Integration test for the main execution flow. + Bypasses login logic and mocks network responses with static data. + """ + # Suppress unused argument warnings + _ = (mock_adapter,) + + # Setup mocks + mock_logged.return_value = True + + # Define a fake login that sets FID directly without network call + def fake_login(self): + self.fid = "TEST-123" + self.lang = "en" + # Set cookie/header so the 'logged' property returns True + # Set cookie/header so the 'logged' property returns True + self.cookies["fssessionid"] = "mock_session_id" + + mock_login.side_effect = fake_login + + # Setup generic response for any GET request + # users/current -> sets lang='en' + generic_json = { + "users": [ + { + "personId": "TEST-123", + "preferredLanguage": "en", + "displayName": "Integrator", + } + ], + "persons": [ + { + "id": "TEST-123", + "living": True, + "names": [ + { + "preferred": True, + "type": "http://gedcomx.org/BirthName", + "nameForms": [ + { + "fullText": "Test Person", + "parts": [ + { + "type": "http://gedcomx.org/Given", + "value": "Test", + }, + { + "type": "http://gedcomx.org/Surname", + "value": "Person", + }, + ], + } + ], + "attribution": {"changeMessage": "Automated update"}, + } + ], + "notes": [], # Added notes list for get_notes() + "facts": [], + "display": { + "name": "Test Person", + "gender": "Male", + "lifespan": "1900-2000", + }, + } + ], + "childAndParentsRelationships": [], + "parentAndChildRelationships": [], + } + + mock_response = Response() + mock_response.status_code = 200 + mock_response.url = "https://api.familysearch.org/test" + mock_response.headers = requests.structures.CaseInsensitiveDict( + {"Content-Type": "application/json"} + ) + # pylint: disable=protected-access + mock_response._content = json.dumps(generic_json).encode("utf-8") + # mock_response.headers is already a CaseInsensitiveDict by default in Response() + + # requests_cache needs response.request to be set + mock_req = PreparedRequest() + mock_req.url = "https://api.familysearch.org/test" + mock_req.method = "GET" + mock_req.headers = requests.structures.CaseInsensitiveDict({}) + # mock_req.cookies = {} # PreparedRequest doesn't have public cookies dict usually, avoiding access + mock_response.request = mock_req + + # requests_cache needs response.raw (urllib3 response) + # It accesses ._request_url + mock_response.raw = MagicMock() + # pylint: disable=protected-access + mock_response.raw._request_url = "https://api.familysearch.org/test" + + # Configure LimiterAdapter mock to return our response + mock_adapter_instance = mock_adapter.return_value + mock_adapter_instance.send.return_value = mock_response + + # When Session.get is called, it returns our mock response + def side_effect_get(url, *args, **kwargs): # pylint: disable=unused-argument + # print(f"DEBUG: Mock GET called for {url}") + return mock_response + + mock_get.side_effect = side_effect_get + mock_post.return_value = mock_response + + # Output file path in .tmp directory + output_file = os.path.abspath(".tmp/test_output.ged") + settings_file = os.path.abspath(".tmp/test_output.settings") + + # Create the .tmp directory if it doesn't exist + tmp_dir = os.path.dirname(output_file) + os.makedirs(tmp_dir, exist_ok=True) + + # Prepare arguments mimicking CLI usage (Typer args, no program name) + test_args = [ + "-u", + "testuser", + "-p", + "testpass", + "--no-cache", + "--outfile", + output_file, + ] + + # Invoke via CliRunner + # Note: we invoke getmyancestors.app + result = runner.invoke(getmyancestors.app, test_args) + + if result.exit_code != 0: + print(f"STDOUT: {result.stdout}") + if result.exc_info: + traceback.print_exception(*result.exc_info) + self.fail(f"App exited with code {result.exit_code}") + + # Basic assertions + self.assertTrue(mock_login.called, "Login should have been called") + self.assertTrue(mock_get.called, "Should have attempted network calls") + + self.assertTrue( + os.path.exists(output_file), + f"Output file should have been created at {output_file}", + ) + + if os.path.exists(output_file): + self.addCleanup(os.remove, output_file) + if os.path.exists(settings_file): + self.addCleanup(os.remove, settings_file) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/getmyancestors/tests/test_main.py b/getmyancestors/tests/test_main.py new file mode 100644 index 0000000..994519c --- /dev/null +++ b/getmyancestors/tests/test_main.py @@ -0,0 +1,38 @@ +"""Test __main__ functionality.""" + +import sys +import unittest +from unittest.mock import patch + + +class TestMain(unittest.TestCase): + """Test __main__ module.""" + + def test_main_module_can_be_imported(self): + """Test that __main__ module can be imported without error.""" + # Mock getmyanc.app to avoid execution when importing __main__ + with patch("getmyancestors.getmyanc.app"): + # Mock sys.argv to avoid argument parsing errors + with patch.object(sys, "argv", ["getmyancestors", "--help"]): + # Import should work without error + import getmyancestors.__main__ # pylint: disable=import-outside-toplevel + + self.assertTrue(hasattr(getmyancestors.__main__, "__name__")) + + def test_main_execution_with_mock(self): + """Test that importing __main__ triggers getmyanc.main() call.""" + # pylint: disable=import-outside-toplevel + import runpy + + # Create a mock for getmyanc.app + with patch("getmyancestors.getmyanc.app") as mock_app: + # Mock sys.argv + with patch.object(sys, "argv", ["getmyancestors", "--help"]): + # pylint: disable=import-outside-toplevel,no-name-in-module + runpy.run_module("getmyancestors.__main__", run_name="__main__") + + self.assertTrue(mock_app.called) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/getmyancestors/tests/test_merge_idempotency.py b/getmyancestors/tests/test_merge_idempotency.py new file mode 100644 index 0000000..efb248d --- /dev/null +++ b/getmyancestors/tests/test_merge_idempotency.py @@ -0,0 +1,373 @@ +"""Test merge idempotency - ensure re-merging produces no duplicates.""" + +import io +import os +import shutil +import tempfile +import unittest + +from getmyancestors.classes.gedcom import Gedcom +from getmyancestors.classes.tree import Fam, Indi, Tree + + +class TestMergeIdempotency(unittest.TestCase): + """Test that merging is idempotent - merging A+B then (A+B)+A should equal A+B.""" + + def setUp(self): + """Create sample GEDCOM content for testing.""" + # Simple GEDCOM with one individual (simulating FamilySearch output) + self.gedcom_a = """0 HEAD +1 SOUR getmyancestors +1 GEDC +2 VERS 5.5 +1 CHAR UTF-8 +0 @I1@ INDI +1 NAME John /Doe/ +1 SEX M +1 _FSFTID AAAA-111 +1 BIRT +2 DATE 1 JAN 1900 +2 PLAC New York, USA +1 NOTE This is a test note +0 @F1@ FAM +1 HUSB @I1@ +1 _FSFTID FFFF-111 +0 TRLR +""" + + # Different GEDCOM with different individual + self.gedcom_b = """0 HEAD +1 SOUR getmyancestors +1 GEDC +2 VERS 5.5 +1 CHAR UTF-8 +0 @I2@ INDI +1 NAME Jane /Smith/ +1 SEX F +1 _FSFTID BBBB-222 +1 BIRT +2 DATE 15 MAR 1905 +2 PLAC Boston, USA +0 @F2@ FAM +1 WIFE @I2@ +1 _FSFTID FFFF-222 +0 TRLR +""" + + self.temp_dir = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.temp_dir) + + self.file_a = os.path.join(self.temp_dir, "a.ged") + self.file_b = os.path.join(self.temp_dir, "b.ged") + with open(self.file_a, "w", encoding="utf-8") as f: + f.write(self.gedcom_a) + with open(self.file_b, "w", encoding="utf-8") as f: + f.write(self.gedcom_b) + + def _count_data_lines(self, tree: Tree) -> int: + """Count output lines.""" + output = io.StringIO() + tree.print(output) + lines = output.getvalue().strip().split("\n") + return len(lines) + + def _merge_files(self, *files) -> Tree: + """Merge multiple GEDCOM files into a single tree.""" + contents = [] + for fpath in files: + with open(fpath, "r", encoding="utf-8") as f: + contents.append(f.read()) + return self._merge_gedcoms(*contents) + + def _merge_gedcoms(self, *gedcom_strings) -> Tree: + """Merge multiple GEDCOM strings into a single tree.""" + tree = Tree() + indi_counter = 0 + fam_counter = 0 + + for gedcom_str in gedcom_strings: + file = io.StringIO(gedcom_str) + ged = Gedcom(file, tree) + + # Replicate merge logic from mergemyancestors.py + for _, indi in ged.indi.items(): + fid = indi.fid + if fid not in tree.indi: + indi_counter += 1 + tree.indi[fid] = Indi(indi.fid, tree, num=indi_counter) + tree.indi[fid].fams_fid |= indi.fams_fid + tree.indi[fid].famc_fid |= indi.famc_fid + tree.indi[fid].name = indi.name + tree.indi[fid].birthnames |= indi.birthnames + tree.indi[fid].nicknames |= indi.nicknames + tree.indi[fid].aka |= indi.aka + tree.indi[fid].married |= indi.married + tree.indi[fid].gender = indi.gender + tree.indi[fid].facts |= indi.facts + # Manually merge notes to avoid duplication by text content + for n in indi.notes: + if not any(x.text == n.text for x in tree.indi[fid].notes): + tree.indi[fid].notes.add(n) + tree.indi[fid].sources |= indi.sources + tree.indi[fid].memories |= indi.memories + tree.indi[fid].baptism = indi.baptism + tree.indi[fid].confirmation = indi.confirmation + tree.indi[fid].initiatory = indi.initiatory + tree.indi[fid].endowment = indi.endowment + sc = tree.indi[fid].sealing_child + if not (sc and sc.famc): + tree.indi[fid].sealing_child = indi.sealing_child + + for _, fam in ged.fam.items(): + husb, wife = (fam.husb_fid, fam.wife_fid) + # Use standard ID generation to satisfy Dict[str, Fam] type + fam_key = Fam.gen_id(tree.indi.get(husb), tree.indi.get(wife)) + + if fam_key not in tree.fam: + fam_counter += 1 + tree.fam[fam_key] = Fam( + tree.indi.get(husb), tree.indi.get(wife), tree, fam_counter + ) + tree.fam[fam_key].tree = tree + tree.fam[fam_key].chil_fid |= fam.chil_fid + if fam.fid: + tree.fam[fam_key].fid = fam.fid + tree.fam[fam_key].facts |= fam.facts + # Manually merge notes + for n in fam.notes: + if not any(x.text == n.text for x in tree.fam[fam_key].notes): + tree.fam[fam_key].notes.add(n) + tree.fam[fam_key].sources |= fam.sources + tree.fam[fam_key].sealing_spouse = fam.sealing_spouse + + # Merge notes by text + tree.notes = sorted(tree.notes, key=lambda x: x.text) # type: ignore + for i, n in enumerate(tree.notes): + if i == 0: + n.num = 1 + continue + if n.text == tree.notes[i - 1].text: # type: ignore + n.num = tree.notes[i - 1].num # type: ignore + else: + n.num = tree.notes[i - 1].num + 1 # type: ignore + + tree.reset_num() + return tree + + def _tree_to_gedcom_string(self, tree: Tree) -> str: + """Convert tree back to GEDCOM string.""" + output = io.StringIO() + tree.print(output) + return output.getvalue() + + def test_merge_is_idempotent(self): + """ + Test that merging A+B then re-merging with A produces no duplicates. + + If merge is idempotent: + lines(A+B) == lines((A+B)+A) + """ + # First merge: A + B + merged_tree = self._merge_gedcoms(self.gedcom_a, self.gedcom_b) + merged_lines = self._count_data_lines(merged_tree) + + # Get merged output as string + merged_gedcom = self._tree_to_gedcom_string(merged_tree) + + # Second merge: (A+B) + A again + remerged_tree = self._merge_gedcoms(merged_gedcom, self.gedcom_a) + remerged_lines = self._count_data_lines(remerged_tree) + + # They should be equal if merge is idempotent + self.assertEqual( + merged_lines, + remerged_lines, + f"Merge is not idempotent: original={merged_lines} lines, " + f"after re-merge with A={remerged_lines} lines (diff={remerged_lines - merged_lines})", + ) + + def test_merge_preserves_individuals(self): + """Test that merging preserves all individuals without duplication.""" + # Merge A + B + merged_tree = self._merge_gedcoms(self.gedcom_a, self.gedcom_b) + + # Should have exactly 2 individuals + self.assertEqual(len(merged_tree.indi), 2, "Expected 2 individuals after merge") + + # Re-merge with A + merged_gedcom = self._tree_to_gedcom_string(merged_tree) + remerged_tree = self._merge_gedcoms(merged_gedcom, self.gedcom_a) + + # Should still have exactly 2 individuals + self.assertEqual( + len(remerged_tree.indi), + 2, + f"Expected 2 individuals after re-merge, got {len(remerged_tree.indi)}", + ) + + # Should have exactly 2 families + self.assertEqual( + len(merged_tree.fam), 2, "Expected 2 families after merging A+B" + ) + + def test_merge_with_overlap_is_idempotent(self): + """ + Test merging A+B, then re-merging (A+B) with A again. + + The second merge should not change counts since A already exists. + This models the stress test scenario. + """ + # First merge: A + B + tree1 = self._merge_files(self.file_a, self.file_b) + indi_count1 = len(tree1.indi) + fam_count1 = len(tree1.fam) + + # Save merged output + merged_file = os.path.join(self.temp_dir, "merged.ged") + self._save_tree(tree1, merged_file) + + # Second merge: (A+B) + A using fresh parse + tree2 = self._merge_files(merged_file, self.file_a) + indi_count2 = len(tree2.indi) + fam_count2 = len(tree2.fam) + + # Individual and family counts should be unchanged + self.assertEqual( + indi_count1, + indi_count2, + f"Individual count changed: {indi_count1} -> {indi_count2}", + ) + self.assertEqual( + fam_count1, + fam_count2, + f"Family count changed: {fam_count1} -> {fam_count2}", + ) + + def test_merge_mutually_exclusive_trees(self): + """ + Test merging two non-overlapping trees produces expected totals. + + If A has 1 person and B has 1 person, merged should have 2. + """ + tree = self._merge_files(self.file_a, self.file_b) + + self.assertEqual(len(tree.indi), 2, "Expected 2 individuals") + self.assertEqual(len(tree.fam), 2, "Expected 2 families") + + # Verify the specific individuals exist + self.assertIn("AAAA-111", tree.indi, "John Doe should be present") + self.assertIn("BBBB-222", tree.indi, "Jane Smith should be present") + + def test_notes_preserved_after_remerge(self): + """ + Test that notes are preserved and not duplicated during re-merge. + + This catches the bug where notes were being added to tree.notes + during parsing even for existing individuals. + """ + # GEDCOM with notes + gedcom_with_notes = """0 HEAD +1 SOUR getmyancestors +1 GEDC +2 VERS 5.5 +1 CHAR UTF-8 +0 @I1@ INDI +1 NAME John /Noted/ +1 SEX M +1 _FSFTID NOTE-111 +1 NOTE This is John's note +0 @N1@ NOTE This is a standalone note +0 TRLR +""" + file_notes = os.path.join(self.temp_dir, "notes.ged") + with open(file_notes, "w", encoding="utf-8") as f: + f.write(gedcom_with_notes) + + # First merge + tree1 = self._merge_files(file_notes) + lines1 = self._count_data_lines(tree1) + + # Save and re-merge + merged_file = os.path.join(self.temp_dir, "merged_notes.ged") + self._save_tree(tree1, merged_file) + + tree2 = self._merge_files(merged_file, file_notes) + lines2 = self._count_data_lines(tree2) + + # Line counts should be stable (or very close due to note deduplication) + self.assertEqual( + lines1, + lines2, + f"Line count changed after re-merge: {lines1} -> {lines2}", + ) + + def test_line_count_stability_with_notes(self): + """ + Test that line counts remain stable when re-merging files with notes. + + This is a more realistic test that matches the stress test behavior. + """ + # Create two GEDCOMs with the SAME note text (to test deduplication) + gedcom_a = """0 HEAD +1 SOUR getmyancestors +1 GEDC +2 VERS 5.5 +1 CHAR UTF-8 +0 @I1@ INDI +1 NAME Person /A/ +1 SEX M +1 _FSFTID PERS-AAA +1 NOTE Shared note text +0 @F1@ FAM +1 HUSB @I1@ +1 _FSFTID FAM_AAA +0 TRLR +""" + gedcom_b = """0 HEAD +1 SOUR getmyancestors +1 GEDC +2 VERS 5.5 +1 CHAR UTF-8 +0 @I2@ INDI +1 NAME Person /B/ +1 SEX F +1 _FSFTID PERS-BBB +1 NOTE Shared note text +0 @F2@ FAM +1 WIFE @I2@ +1 _FSFTID FAM_BBB +0 TRLR +""" + file_a = os.path.join(self.temp_dir, "line_a.ged") + file_b = os.path.join(self.temp_dir, "line_b.ged") + with open(file_a, "w", encoding="utf-8") as f: + f.write(gedcom_a) + with open(file_b, "w", encoding="utf-8") as f: + f.write(gedcom_b) + + # First merge + tree1 = self._merge_files(file_a, file_b) + lines1 = self._count_data_lines(tree1) + + # Save and re-merge with A + merged_file = os.path.join(self.temp_dir, "merged_line.ged") + self._save_tree(tree1, merged_file) + + tree2 = self._merge_files(merged_file, file_a) + lines2 = self._count_data_lines(tree2) + + # Line counts should be stable + self.assertEqual( + lines1, + lines2, + f"Line count not stable: {lines1} -> {lines2} (diff={lines2 - lines1})", + ) + + def _save_tree(self, tree: Tree, filepath: str): + """Save tree to file.""" + with open(filepath, "w", encoding="utf-8") as f: + tree.print(f) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/getmyancestors/tests/test_session.py b/getmyancestors/tests/test_session.py new file mode 100644 index 0000000..059b423 --- /dev/null +++ b/getmyancestors/tests/test_session.py @@ -0,0 +1,55 @@ +import unittest +from unittest.mock import MagicMock, patch + +from requests.exceptions import HTTPError + +from getmyancestors.classes.session import Session + + +class TestSession(unittest.TestCase): + + @patch("getmyancestors.classes.session.webbrowser") + def test_login_success(self, mock_browser): + """Test the full OAuth2 login flow with successful token retrieval.""" + + with patch("getmyancestors.classes.session.GMASession.login"), patch( + "getmyancestors.classes.session.GMASession.load_cookies", return_value=False + ), patch("getmyancestors.classes.session.GMASession._init_db"), patch( + "getmyancestors.classes.session.os.path.expanduser", return_value=".tmp" + ): + session = Session("user", "pass", verbose=True) + + session.cookies.update({"XSRF-TOKEN": "mock_xsrf_token"}) + session.headers = {"User-Agent": "test"} + + # Simulate the effect of a successful login + session.headers["Authorization"] = "Bearer fake_token" + + # We can't easily test the internal loop of login() without a lot of complexity, + # so for now we'll just verify the expected state after "login". + # In a real environment, login() would do the network work. + + assert session.headers.get("Authorization") == "Bearer fake_token" + mock_browser.open.assert_not_called() + + def test_get_url_403_ordinances(self): + """Test handling of 403 Forbidden specifically for ordinances.""" + with patch("getmyancestors.classes.session.GMASession.login"), patch( + "getmyancestors.classes.session.GMASession._init_db" + ), patch( + "getmyancestors.classes.session.os.path.expanduser", return_value=".tmp" + ): + session = Session("u", "p") + session.lang = "en" + + response_403 = MagicMock(status_code=403) + response_403.json.return_value = { + "errors": [{"message": "Unable to get ordinances."}] + } + response_403.raise_for_status.side_effect = HTTPError("403 Client Error") + + session.get = MagicMock(return_value=response_403) # type: ignore + session._ = lambda x: x # type: ignore + + result = session.get_url("/test-ordinances") + assert result == "error" diff --git a/getmyancestors/tests/test_session_caching.py b/getmyancestors/tests/test_session_caching.py new file mode 100644 index 0000000..8cf6a13 --- /dev/null +++ b/getmyancestors/tests/test_session_caching.py @@ -0,0 +1,125 @@ +import json +import unittest +from unittest.mock import patch + +from getmyancestors.classes.session import Session + + +class TestSessionCaching(unittest.TestCase): + def setUp(self): + self.username = "testuser" + self.password = "testpass" + + @patch("builtins.open", new_callable=unittest.mock.mock_open) + @patch("getmyancestors.classes.session.GMASession.login") + def test_save_cookies(self, _mock_login, mock_file): + """Test that cookies are saved to JSON file.""" + session = Session(self.username, self.password) + # Add a cookie to the session (simulating logged in state) + session.cookies.set( + "fssessionid", "mock-session-id", domain=".familysearch.org", path="/" + ) + session.headers = {"Authorization": "Bearer mock-token"} + + session.save_cookies() + + # Check that file was opened for writing + mock_file.assert_called() + + # Verify JSON content written to file + # We look for the call that writes data + handle = mock_file() + written_data = "" + for call in handle.write.call_args_list: + written_data += call[0][0] + + self.assertIn('"fssessionid": "mock-session-id"', written_data) + self.assertIn('"auth": "Bearer mock-token"', written_data) + + @patch("builtins.open", new_callable=unittest.mock.mock_open) + @patch("os.path.exists", return_value=True) + @patch("getmyancestors.classes.session.GMASession.login") + def test_load_cookies(self, _mock_login, _mock_exists, mock_file): + """Test that cookies are loaded from JSON file.""" + cookie_data = { + "cookies": {"fssessionid": "cached-session-id"}, + "auth": "Bearer cached-token", + } + mock_file.return_value.read.return_value = json.dumps(cookie_data) + + session = Session(self.username, self.password) + session.load_cookies() + + # Verify cookie jar is populated + self.assertEqual(session.cookies.get("fssessionid"), "cached-session-id") + self.assertEqual(session.headers.get("Authorization"), "Bearer cached-token") + + @patch("getmyancestors.classes.session.GMASession.set_current", autospec=True) + @patch("getmyancestors.classes.session.GMASession.load_cookies") + @patch("sqlite3.connect") + @patch("requests.Session.get") + @patch("requests.Session.post") + def test_login_reuse_valid_session( + self, mock_post, _mock_get, _mock_connect, mock_load, mock_set_current + ): + # 1. Setup load_cookies to return True (session exists) + mock_load.return_value = True + + # 2. Setup set_current to simulate success (sets fid) + # Using autospec=True allows the mock to receive 'self' as the first argument + def side_effect_set_current( + self, auto_login=True # pylint: disable=unused-argument + ): + self.fid = "USER-123" + self.cookies.set("fssessionid", "valid-id") + + mock_set_current.side_effect = side_effect_set_current + + # 3. Initialize session + session = Session(self.username, self.password) + + # 4. Verify that the complex login flow was skipped (no POST requests made) + self.assertEqual(mock_post.call_count, 0) + self.assertEqual(session.fid, "USER-123") + self.assertTrue(session.logged) + + @patch("builtins.input", return_value="mock_code") + @patch("getmyancestors.classes.session.GMASession.manual_login") + @patch("getmyancestors.classes.session.GMASession.set_current") + @patch("getmyancestors.classes.session.GMASession.load_cookies") + @patch("sqlite3.connect") + @patch("requests.Session.get") + @patch("requests.Session.post") + def test_login_fallback_on_invalid_session( + self, + _mock_post, + mock_get, + _mock_connect, + mock_load, + mock_set_current, + mock_manual, + _mock_input, + ): + # 1. Setup load_cookies to return True (session exists) + mock_load.return_value = True + + # 2. Setup set_current to simulate failure (doesn't set fid) + mock_set_current.return_value = None + + # 3. Setup mock_get to throw exception to break the headless flow + # This exception is caught in login(), which then calls manual_login() + mock_get.side_effect = Exception("Headless login failed") + + # 4. Initialize session - this triggers login() -> manual_login() + # manual_login is mocked, so it should not prompt. + Session(self.username, self.password) + + # 5. Verify that set_current was called with auto_login=False (reuse attempt) + mock_set_current.assert_any_call(auto_login=False) + + # 6. Verify that manual_login was called (fallback triggered) + self.assertTrue(mock_manual.called, "Fallback to manual_login should occur") + + +if __name__ == "__main__": + unittest.main() diff --git a/getmyancestors/tests/test_tree.py b/getmyancestors/tests/test_tree.py new file mode 100644 index 0000000..6a23a6f --- /dev/null +++ b/getmyancestors/tests/test_tree.py @@ -0,0 +1,33 @@ +import unittest +from unittest.mock import MagicMock, patch + +from getmyancestors.classes.tree.core import Tree + + +class TestTree(unittest.TestCase): + def setUp(self): + self.mock_session = MagicMock() + self.mock_session._ = lambda x: x # Mock translation function + self.mock_session.lang = "en" # Mock language code for babelfish + self.tree = Tree(self.mock_session) + + def test_tree_init(self): + """Test tree initialization.""" + self.assertEqual(len(self.tree.indi), 0) + self.assertEqual(len(self.tree.fam), 0) + + @patch("getmyancestors.classes.session.GMASession.get_url") + def test_ensure_place_new(self, mock_get_url): + """Test creating a new place.""" + mock_get_url.return_value = {"id": "123", "names": [{"value": "New Place"}]} + place = self.tree.ensure_place("New Place") + self.assertEqual(place.name, "New Place") + self.assertIn("New Place", self.tree.places_by_names) + + @patch("getmyancestors.classes.session.GMASession.get_url") + def test_ensure_place_existing(self, _mock_get_url): + """Test retrieving an existing place.""" + place1 = self.tree.ensure_place("Existing Place") + place2 = self.tree.ensure_place("Existing Place") + self.assertEqual(place1, place2) + self.assertEqual(len(self.tree.places_by_names), 1) diff --git a/main.py b/main.py deleted file mode 100644 index efb07ce..0000000 --- a/main.py +++ /dev/null @@ -1,3 +0,0 @@ -from getmyancestors import getmyancestors - -getmyancestors.main(); \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 58e1571..2fb989a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + [project] name = "getmyancestors" description = "Retrieve GEDCOM data from FamilySearch Tree" @@ -18,23 +22,143 @@ classifiers = [ dependencies = [ "babelfish==0.6.1", "diskcache==5.6.3", - "requests==2.32.3", - "fake-useragent==2.0.3", - "requests-ratelimiter==0.7.0" + "requests==2.32.5", + "fake-useragent==2.2.0", + "geocoder==1.38.1", + "requests-ratelimiter==0.8.0", + "requests-cache==1.2.1", + "typer>=0.9.0,<0.21.0", ] dynamic = ["version", "readme"] +[project.urls] +HomePage = "https://github.com/Linekio/getmyancestors" + +[project.scripts] +getmyancestors = "getmyancestors.getmyanc:app" +mergemyancestors = "getmyancestors.mergemyanc:app" +fstogedcom = "getmyancestors.fstogedcom:main" + +[project.optional-dependencies] +dev = [ + "black==25.12.0", + "coverage==7.13.1", + "flake8==7.3.0", + "isort==7.0.0", + "librt==0.7.7", + "mypy==1.19.1", + "pylint==4.0.4", + "pytest==9.0.2", + "ruff==0.14.10", + "types-requests==2.32.4.20260107", +] + +[tool.setuptools] +# Use find packages with exclude pattern +packages.find = {exclude = ["http_cache", "http_cache.*"]} + [tool.setuptools.dynamic] version = {attr = "getmyancestors.__version__"} readme = {file = ["README.md"]} -[project.urls] -HomePage = "https://github.com/Linekio/getmyancestors" - [tool.setuptools.package-data] getmyancestors = ["fstogedcom.png"] -[project.scripts] -getmyancestors = "getmyancestors.getmyancestors:main" -mergemyancestors = "getmyancestors.mergemyancestors:main" -fstogedcom = "getmyancestors.fstogedcom:main" +# Linting configs + +[tool.isort] +line_length = 88 +known_first_party = "getmyancestors" + +# See: https://copdips.com/2020/04/making-isort-compatible-with-black.html +multi_line_output = 3 +include_trailing_comma = true + +[tool.ruff] +line-length = 88 +target-version = "py37" # Lowest supported python version + +[tool.ruff.lint] +# E/W = pycodestyle, F = Pyflakes +# B = bugbear +select = ["E", "F", "W", "B"] +ignore = [ + "E262", # inline comment should start with '# ' + "E501", # Line too long +] + +[tool.ruff.lint.per-file-ignores] # Temporary, hopefully +"__init__.py" = ["F401"] +"getmyancestors/classes/gedcom.py" = ["E203"] +"getmyancestors/classes/tree.py" = ["E203"] +"getmyancestors/classes/translation.py" = ["E501"] +"getmyancestors/getmyanc.py" = ["B008"] +"getmyancestors/mergemyanc.py" = ["B008"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" + +# Performance optimization for Pylint +[tool.pylint.main] +jobs = 0 # Use all available CPUs +fail-under = 10.00 + +[tool.pylint.messages_control] +disable = [ + "fixme", + "consider-using-f-string", + "missing-module-docstring", + "missing-class-docstring", + "missing-function-docstring", + "duplicate-code", + "too-few-public-methods", + "too-many-arguments", + "too-many-positional-arguments", + "too-many-instance-attributes", + "too-many-branches", + "too-many-statements", + "line-too-long", + "use-dict-literal", + "too-many-locals", + "too-many-ancestors", + "broad-exception-caught", + "too-many-return-statements", + "too-many-nested-blocks", +] + +# Performance optimization for Mypy +[tool.mypy] +incremental = true +cache_dir = ".mypy_cache" +ignore_missing_imports = true +check_untyped_defs = true +[[tool.mypy.overrides]] +module = "getmyancestors.classes.gui" +ignore_errors = true + +[tool.pytest.ini_options] +# See: https://docs.pytest.org/en/7.1.x/reference/customize.html +testpaths = ["getmyancestors/tests"] + +[tool.coverage.run] +# See: https://coverage.readthedocs.io/en/7.2.2/config.html#run +command_line = "-m pytest -svv" +source = ["getmyancestors"] +data_file = ".tmp/.coverage" + +[tool.coverage.report] +fail_under = 45.00 +precision = 2 + +show_missing = true +skip_empty = true +skip_covered = true + +omit = [ + "getmyancestors/classes/gui.py", # not part of CLI tests (yet) + "getmyancestors/fstogedcom.py", # GUI tool that requires Tkinter + "**/tests/**" # do NOT show coverage tests... redundant +] + +exclude_lines = ["pragma: no cover"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 471fa90..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -babelfish==0.6.1 -diskcache==5.6.3 -requests==2.32.3 -fake-useragent==2.0.3 -requests-ratelimiter==0.7.0 -setuptools==70.1.0 diff --git a/res/testdata b/res/testdata new file mode 160000 index 0000000..a331f4b --- /dev/null +++ b/res/testdata @@ -0,0 +1 @@ +Subproject commit a331f4b349e573df997e77fb802ecd69754a2c44 diff --git a/tests/fixtures.env b/tests/fixtures.env new file mode 100644 index 0000000..28a1bd7 --- /dev/null +++ b/tests/fixtures.env @@ -0,0 +1,7 @@ +#!/bin/bash +# Shared expectations for test fixtures +# These values are baselined from "Live" fixture generation +# and should be updated whenever fixtures are regenerated. +export EXPECTED_ADA_LINES=11587 +export EXPECTED_MARIE_LINES=3698 +export EXPECTED_MERGED_LINES=14500 diff --git a/tests/offline_test.py b/tests/offline_test.py new file mode 100644 index 0000000..f97151b --- /dev/null +++ b/tests/offline_test.py @@ -0,0 +1,336 @@ +#!/usr/bin/env python3 +import filecmp +import os +import shutil +import sqlite3 +import subprocess +import sys +from pathlib import Path + +# Constants and Paths setup +# Assuming script is in tests/ directory, so root is parent. +TESTS_DIR = Path(__file__).resolve().parent +PROJECT_ROOT = TESTS_DIR.parent +DATA_DIR = PROJECT_ROOT / "res" / "testdata" +ARTIFACTS_DIR = DATA_DIR / "artifacts" +FIXTURES_DIR = DATA_DIR / "fixtures" +TEMP_DIR = PROJECT_ROOT / ".tmp" +CACHE_DIR = TEMP_DIR / "offline_cache" +OUTPUT_DIR = TEMP_DIR / "stress_test" + +# Env file for expectations +FIXTURES_ENV = TESTS_DIR / "fixtures.env" + + +def load_expectations(): + """Load EXPECTED_* variables from fixtures.env manually.""" + expectations = {} + if not FIXTURES_ENV.exists(): + print(f"❌ Fixtures env file missing: {FIXTURES_ENV}") + sys.exit(1) + + with open(FIXTURES_ENV, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line.startswith("export "): + key_val = line.replace("export ", "").split("=") + if len(key_val) == 2: + expectations[key_val[0]] = int(key_val[1]) + return expectations + + +def setup_cache(): + """Setup offline cache by merging part1 and part2 fixtures.""" + print(f"📂 Setting up offline cache in {CACHE_DIR}...") + + if CACHE_DIR.exists(): + shutil.rmtree(CACHE_DIR) + CACHE_DIR.mkdir(parents=True, exist_ok=True) + (CACHE_DIR / "requests").mkdir(exist_ok=True) + + if not (FIXTURES_DIR / "part1").exists() or not (FIXTURES_DIR / "part2").exists(): + print("❌ Fixtures missing! Run regular stress test to populate.") + sys.exit(1) + + # Copy Part 1 + print("ℹ️ Copying part1 fixtures...") + part1_req = FIXTURES_DIR / "part1" / "requests" + for item in part1_req.iterdir(): + if item.is_file(): + shutil.copy2(item, CACHE_DIR / "requests" / item.name) + + # Rename part1 redirects + cache_req = CACHE_DIR / "requests" + redirects = cache_req / "redirects.sqlite" + if redirects.exists(): + redirects.rename(cache_req / "redirects_part1.sqlite") + print("✓ Part 1 copied.") + + # Copy Part 2 + print("ℹ️ Copying part2 fixtures...") + part2_req = FIXTURES_DIR / "part2" / "requests" + for item in part2_req.iterdir(): + if item.is_file(): + shutil.copy2(item, CACHE_DIR / "requests" / item.name) + + # Merge redirects + redirects_p1 = cache_req / "redirects_part1.sqlite" + redirects_main = cache_req / "redirects.sqlite" + + if redirects_p1.exists() and redirects_main.exists(): + print("ℹ️ Merging redirects.sqlite...") + conn = sqlite3.connect(redirects_main) + conn.execute(f"ATTACH '{redirects_p1}' AS p1") + conn.execute("INSERT OR IGNORE INTO main.redirects SELECT * FROM p1.redirects") + conn.commit() + conn.close() + redirects_p1.unlink() + elif redirects_p1.exists(): + redirects_p1.rename(redirects_main) + + print("✓ Part 2 copied and redirects merged.") + + +def check_diff(generated_path, artifact_path, label): + """Compare generated file with artifact.""" + if not artifact_path.exists(): + print( + f"⚠️ Artifact {label} not found at {artifact_path}. Skipping verification." + ) + return True + + print(f"Checking {label}...") + + # Simple binary comparison first (fast) + if filecmp.cmp(generated_path, artifact_path, shallow=False): + print(f"✓ {label} matches artifact exactly.") + return True + + print(f"⚠️ {label} differs from artifact. Showing diff (first 10 lines):") + print("Diff Stat:") + subprocess.run( + [ + "git", + "diff", + "--no-index", + "--stat", + str(generated_path), + str(artifact_path), + ], + check=False, + ) + print("...") + subprocess.run( + ["diff", "--color=always", str(generated_path), str(artifact_path)], check=False + ) + print(f"❌ Verified failed for {label}") + return False + + +def test_offline(): + # 1. Load Expectations + expectations = load_expectations() + exp_ada = expectations.get("EXPECTED_ADA_LINES", 0) + exp_marie = expectations.get("EXPECTED_MARIE_LINES", 0) + exp_merged = expectations.get("EXPECTED_MERGED_LINES", 0) + + # 2. Setup Cache + setup_cache() + + # 3. Prepare Output Dir + if OUTPUT_DIR.exists(): + shutil.rmtree(OUTPUT_DIR) + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + # 4. Define Command Environment + env = os.environ.copy() + # Explicitly set COVERAGE_FILE for subprocesses to avoid conflicts + # They will append to unique files based on pid automagically if parallel=True is set (which -p flag does) + # But we need to point them to the right directory + env["COVERAGE_FILE"] = str(PROJECT_ROOT / ".tmp" / ".coverage") + + env["GMA_CACHE_DIR"] = str(CACHE_DIR) + env["GMA_I_RESPECT_FAMILYSEARCH_PLEASE_SUPPRESS_LICENSE_PROMPT"] = "1" + env["FAMILYSEARCH_USER"] = env.get("FAMILYSEARCH_USER", "offline_test_user") + env["FAMILYSEARCH_PASS"] = env.get("FAMILYSEARCH_PASS", "dummy_password") + env["GMA_OFFLINE_MODE"] = "1" + env["GMA_DEBUG"] = "1" + if "NO_CACHE" in env: + del env["NO_CACHE"] + + # Constants + timestamp = "2026-01-20T22:30:10" + date_flag = ["--creation-date", timestamp] + id1 = "29HC-P5H" # Ada + id2 = "LC5H-V1Z" # Marie + anc_gen = "3" + desc_gen = "2" + + part1 = OUTPUT_DIR / "part1_ada_a3.ged" + part2 = OUTPUT_DIR / "part2_marie_a3.ged" + merged = OUTPUT_DIR / "merged_scientists.ged" + + log1 = OUTPUT_DIR / "part1.log" + log2 = OUTPUT_DIR / "part2.log" + log_merge = OUTPUT_DIR / "merge.log" + + print("🚀 Running Stress Test in OFFLINE mode (using fixtures)...") + + # 5. Run Ada Extraction + print("Running Ada Lovelace extraction...") + cmd1 = [ + sys.executable, + "-m", + "coverage", + "run", + "-p", + "-m", + "getmyancestors", + "--verbose", + "-u", + env["FAMILYSEARCH_USER"], + "-p", + env["FAMILYSEARCH_PASS"], + "-i", + id1, + "-a", + anc_gen, + "-d", + desc_gen, + "--rate-limit", + "5", + "--cache", + "--no-cache-control", + *date_flag, + "-o", + str(part1), + ] + with open(log1, "w", encoding="utf-8") as log: + subprocess.run(cmd1, env=env, stdout=log, stderr=subprocess.STDOUT, check=True) + + # 6. Run Marie Extraction + print("Running Marie Curie extraction...") + cmd2 = [ + sys.executable, + "-m", + "coverage", + "run", + "-p", + "-m", + "getmyancestors", + "--verbose", + "-u", + env["FAMILYSEARCH_USER"], + "-p", + env["FAMILYSEARCH_PASS"], + "-i", + id2, + "-a", + anc_gen, + "-d", + desc_gen, + "--rate-limit", + "5", + "--cache", + "--no-cache-control", + *date_flag, + "-o", + str(part2), + ] + with open(log2, "w", encoding="utf-8") as log: + subprocess.run(cmd2, env=env, stdout=log, stderr=subprocess.STDOUT, check=True) + + # 7. Run Merge + print("Merging parts...") + cmd_merge = [ + sys.executable, + "-m", + "coverage", + "run", + "-p", + "-m", + "getmyancestors.mergemyanc", + "-i", + str(part1), + "-i", + str(part2), + "-o", + str(merged), + "--creation-date", + timestamp, + ] + with open(log_merge, "w", encoding="utf-8") as log: + subprocess.run( + cmd_merge, env=env, stdout=log, stderr=subprocess.STDOUT, check=True + ) + + # 8. Validation + if not merged.exists() or merged.stat().st_size == 0: + print("❌ Merge Failed or output empty.") + with open(log_merge, "r", encoding="utf-8") as f: + print(f.read()) + sys.exit(1) + + print("✅ Stress Test Validated!") + + # Line Counts + def count_lines(p): + with open(p, "rb") as f: + return sum(1 for _ in f) + + l_part1 = count_lines(part1) + l_part2 = count_lines(part2) + l_merged = count_lines(merged) + + print(f"Lines: {l_merged}") + print("--- Assertion Results ---") + + failed = False + + if l_part1 != exp_ada: + print(f"❌ Assertion Failed: Ada (Part 1) line count {l_part1} != {exp_ada}") + failed = True + else: + print(f"✓ Ada (Part 1) lines verified exactly ({l_part1}).") + + if l_part2 != exp_marie: + print( + f"❌ Assertion Failed: Marie Curie (Part 2) line count {l_part2} != {exp_marie}" + ) + failed = True + else: + print(f"✓ Marie Curie (Part 2) lines verified ({l_part2}).") + + if l_merged != exp_merged: + print(f"❌ Assertion Failed: Merged line count {l_merged} != {exp_merged}") + failed = True + else: + print(f"✓ Merged lines verified ({l_merged}).") + + if failed: + sys.exit(1) + + # 9. Artifact Verification + print("\n=== Artifact Verification ===") + + # Allow loose comparison for minor diffs? No, strict mode requested. + all_matched = True + all_matched &= check_diff( + part1, ARTIFACTS_DIR / f"part1_ada_a{anc_gen}.ged", "Ada (Part 1)" + ) + all_matched &= check_diff( + part2, ARTIFACTS_DIR / f"part2_marie_a{anc_gen}.ged", "Marie (Part 2)" + ) + all_matched &= check_diff( + merged, ARTIFACTS_DIR / "merged_scientists.ged", "Merged Result" + ) + + if not all_matched: + print("❌ Offline Test Failed due to artifact mismatch") + sys.exit(1) + + print("✅ Offline Test Complete!") + + +if __name__ == "__main__": + test_offline() diff --git a/tests/test_installation.py b/tests/test_installation.py new file mode 100644 index 0000000..d75a12f --- /dev/null +++ b/tests/test_installation.py @@ -0,0 +1,110 @@ +"""Test package installation and basic functionality.""" + +import os +import subprocess +import sys +import tempfile +import unittest +import venv +from pathlib import Path + + +class TestInstallation(unittest.TestCase): + """Test that the package can be installed and basic commands work.""" + + project_root: Path + + @classmethod + def setUpClass(cls): + """Get the project root directory.""" + # Go up 2 levels from tests directory: tests -> . + cls.project_root = Path(__file__).parent.parent.absolute() + print(f"Project root: {cls.project_root}") + + def test_clean_installation(self): + """Test installing the package in a clean virtual environment.""" + # Skip on CI if it takes too long + if os.environ.get("CI") == "true" and os.environ.get("SKIP_LONG_TESTS"): + self.skipTest("Skipping long-running installation test in CI") + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + + # Create a clean virtual environment + venv_dir = tmpdir_path / "venv" + print(f"Creating virtual environment at: {venv_dir}") + venv.create(venv_dir, with_pip=True, clear=True) + + # Get paths to pip and python in the virtual environment + if sys.platform == "win32": + python_path = venv_dir / "Scripts" / "python.exe" + else: + python_path = venv_dir / "bin" / "python" + + # Install the package from the project directory + print(f"Installing package from: {self.project_root}") + + # Install WITHOUT dev dependencies for speed (we only test import/CLI) + # Use --no-user to prevent "Can not perform a '--user' install" errors + # which occur if PIP_USER=1 is set in the environment or config + subprocess.run( + [ + str(python_path), + "-m", + "pip", + "install", + "--no-user", + f"{self.project_root}", + ], + capture_output=True, + text=True, + cwd=self.project_root, + check=True, + ) + + # Test that the package can be imported + print("Testing package import...") + result = subprocess.run( + [ + str(python_path), + "-c", + "import getmyancestors; print(getmyancestors.__version__)", + ], + capture_output=True, + text=True, + check=False, + ) + self.assertEqual( + result.returncode, + 0, + f"Package import failed:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}", + ) + + # Test that CLI commands can be imported (check entry points) + # Only test getmyanc and mergemyanc - these don't require Tkinter + # fstogedcom requires Tkinter which is not installed in clean test environments + print( + "Testing CLI command imports (skipping fstogedcom - requires Tkinter)..." + ) + for module in [ + "getmyancestors.getmyanc", + "getmyancestors.mergemyanc", + ]: + result = subprocess.run( + [ + str(python_path), + "-c", + f"from {module} import main; print('{module} import successful')", + ], + text=True, + check=False, + ) + self.assertEqual( + result.returncode, + 0, + f"Failed to import {module}:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}", + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2)