From: Barnabas Sudy Date: Sun, 11 Aug 2024 04:12:01 +0000 (+0200) Subject: Retriveing place information from geonames X-Git-Url: https://git.nutra.tk/v1?a=commitdiff_plain;h=29c001992054c5a5bcae06b6a37977b249e5fe54;p=gamesguru%2Fgetmyancestors.git Retriveing place information from geonames --- diff --git a/getmyancestors/classes/tree.py b/getmyancestors/classes/tree.py index d99113d..a48342f 100644 --- a/getmyancestors/classes/tree.py +++ b/getmyancestors/classes/tree.py @@ -2,10 +2,16 @@ import sys import re import time import asyncio +import os from urllib.parse import unquote - +from datetime import datetime +from typing import Set, Dict, List, Tuple, Union, Optional, BinaryIO # global imports import babelfish +import geocoder +import requests +import xml.etree.cElementTree as ET +from requests_cache import CachedSession # local imports import getmyancestors @@ -17,6 +23,52 @@ from getmyancestors.classes.constants import ( ) +COUNTY = 'County' +COUNTRY = 'Country' +CITY = 'City' + +GEONAME_FEATURE_MAP = { + 'ADM1': COUNTY, # first-order administrative division a primary administrative division of a country, such as a state in the United States + 'ADM1H': COUNTY, # historical first-order administrative division a former first-order administrative division + 'ADM2': COUNTY, # second-order administrative division a subdivision of a first-order administrative division + 'ADM2H': COUNTY, # historical second-order administrative division a former second-order administrative division + 'ADM3': COUNTY, # third-order administrative division a subdivision of a second-order administrative division + 'ADM3H': COUNTY, # historical third-order administrative division a former third-order administrative division + 'ADM4': COUNTY, # fourth-order administrative division a subdivision of a third-order administrative division + 'ADM4H': COUNTY, # historical fourth-order administrative division a former fourth-order administrative division + 'ADM5': COUNTY, # fifth-order administrative division a subdivision of a fourth-order administrative division + 'ADM5H': COUNTY, # historical fifth-order administrative division a former fifth-order administrative division + 'ADMD': COUNTY, # administrative division an administrative division of a country, undifferentiated as to administrative level + 'ADMDH': COUNTY, # historical administrative division a former administrative division of a political entity, undifferentiated as to administrative level + # 'LTER': leased area a tract of land leased to another country, usually for military installations + 'PCL': COUNTRY, # political entity + 'PCLD': COUNTRY, # dependent political entity + 'PCLF': COUNTRY, # freely associated state + 'PCLH': COUNTRY, # historical political entity a former political entity + 'PCLI': COUNTRY, # independent political entity + 'PCLIX': COUNTRY, # section of independent political entity + 'PCLS': COUNTRY, # semi-independent political entity + + 'PPL': CITY, # populated place a city, town, village, or other agglomeration of buildings where people live and work + 'PPLA': CITY, # seat of a first-order administrative division seat of a first-order administrative division (PPLC takes precedence over PPLA) + 'PPLA2': CITY, # seat of a second-order administrative division + 'PPLA3': CITY, # seat of a third-order administrative division + 'PPLA4': CITY, # seat of a fourth-order administrative division + 'PPLA5': CITY, # seat of a fifth-order administrative division + 'PPLC': CITY, # capital of a political entity + 'PPLCH': CITY, # historical capital of a political entity a former capital of a political entity + 'PPLF': CITY, # farm village a populated place where the population is largely engaged in agricultural activities + 'PPLG': CITY, # seat of government of a political entity + 'PPLH': CITY, # historical populated place a populated place that no longer exists + 'PPLL': CITY, # populated locality an area similar to a locality but with a small group of dwellings or other buildings + 'PPLQ': CITY, # abandoned populated place + 'PPLR': CITY, # religious populated place a populated place whose population is largely engaged in religious occupations + 'PPLS': CITY, # populated places cities, towns, villages, or other agglomerations of buildings where people live and work + 'PPLW': CITY, # destroyed populated place a village, town or city destroyed by a natural disaster, or by war + 'PPLX': CITY, # section of populated place + +} + # getmyancestors classes and functions def cont(string): """parse a GEDCOM line adding CONT and CONT tags if necessary""" @@ -42,7 +94,6 @@ def cont(string): max_len = 248 return ("\n%s CONT " % level).join(res) + "\n" - class Note: """GEDCOM Note class :param text: the Note content @@ -52,14 +103,16 @@ class Note: counter = {} - def __init__(self, text="", tree=None, num=None, num_perfix=None): - self.num_prefix = num_perfix + def __init__(self, text="", tree=None, num=None, num_prefix=None, note_type=None): + self._handle = None + self.note_type = note_type or 'Source Note' + self.num_prefix = num_prefix if num: self.num = num else: - Note.counter[num_perfix or 'None'] = Note.counter.get(num_perfix or 'None', 0) + 1 - self.num = Note.counter[num_perfix or 'None'] - print(f'##### Creating Note: {num_perfix}, {self.num}', file=sys.stderr) + Note.counter[num_prefix or 'None'] = Note.counter.get(num_prefix or 'None', 0) + 1 + self.num = Note.counter[num_prefix or 'None'] + print(f'##### Creating Note: {num_prefix}, {self.num}', file=sys.stderr) self.text = text.strip() if tree: @@ -79,6 +132,24 @@ class Note: print(f'Linking Note: {self.id}', file=sys.stderr) file.write("%s NOTE @N%s@\n" % (level, self.id)) + + @property + def handle(self): + if not self._handle: + self._handle = '_' + os.urandom(10).hex() + + return self._handle + + def printxml(self, parent_element): + note_element = ET.SubElement( + parent_element, + 'note', + handle=self.handle, + # change='1720382308', + id=self.id, + type='Source Note' + ) + ET.SubElement(note_element, 'text').text = self.text class Source: """GEDCOM Source class @@ -113,7 +184,12 @@ class Source: if "notes" in data: notes = [ n['text'] for n in data["notes"] if n["text"] ] for idx, n in enumerate(notes): - self.notes.add(Note(n, self.tree, num="S%s-%s" % (self.id, idx))) + self.notes.add(Note( + n, + self.tree, + num="S%s-%s" % (self.id, idx), + note_type='Source Note' + )) @property def id(self): @@ -143,8 +219,18 @@ class Fact: :param tree: a tree object """ - def __init__(self, data=None, tree=None, num_prefix=None): - self.value = self.type = self.date = self.place = self.note = self.map = None + counter = {} + + def __init__(self, data=None, tree: Optional['Tree']=None, num_prefix=None): + self.value = self.type = self.date = None + self.place: Optional[Place] = None + self.note = None + self._handle: Optional[str] = None + self.num_prefix = num_prefix + + Fact.counter[num_prefix or 'None'] = Fact.counter.get(num_prefix or 'None', 0) + 1 + self.num = Fact.counter[num_prefix or 'None'] + if data: if "value" in data: self.value = data["value"] @@ -160,15 +246,51 @@ class Fact: self.date = data["date"]["original"] if "place" in data: place = data["place"] - self.place = place["original"] - if "description" in place and place["description"][1:] in tree.places: - self.map = tree.places[place["description"][1:]] + place_name = place["original"] + place_id = place["description"][1:] if "description" in place and place["description"][1:] in tree.places else None + self.place = tree.ensure_place(place_name, place_id) if "changeMessage" in data["attribution"]: - self.note = Note(data["attribution"]["changeMessage"], tree, num_perfix=num_prefix) + self.note = Note( + data["attribution"]["changeMessage"], + tree, + num_prefix='E' + num_prefix if num_prefix else None, + note_type='Event Note', + ) if self.type == "http://gedcomx.org/Death" and not ( self.date or self.place ): self.value = "Y" + if tree: + tree.facts.add(self) + + @property + def id(self): + return f'{self.num_prefix}_{self.num}' if self.num_prefix != None else self.num + + + @property + def handle(self): + if not self._handle: + self._handle = '_' + os.urandom(10).hex() + + return self._handle + + def printxml(self, parent_element): + + event_element = ET.SubElement( + parent_element, + 'event', + handle=self.handle, + # change='1720382301', + id=self.id + ) + ET.SubElement(event_element, 'type').text = FACT_TAGS.get(self.type, self.type) + if self.date: + ET.SubElement(event_element, 'datestr', val=self.date) + if self.place: + ET.SubElement(event_element, 'place', hlink=self.place.handle) + if self.note: + ET.SubElement(event_element, 'noteref', hlink=self.note.handle) def print(self, file=sys.stdout): """print Fact in GEDCOM format @@ -188,7 +310,7 @@ class Fact: if self.date: file.write(cont("2 DATE " + self.date)) if self.place: - file.write(cont("2 PLAC " + self.place)) + self.place.print(file, 2) if self.map: latitude, longitude = self.map file.write("3 MAP\n4 LATI %s\n4 LONG %s\n" % (latitude, longitude)) @@ -247,7 +369,20 @@ class Name: if z["type"] == "http://gedcomx.org/Suffix": self.suffix = z["value"] if "changeMessage" in data["attribution"]: - self.note = Note(data["attribution"]["changeMessage"], tree, num_perfix=f'NAME_{owner_fis}_{kind}') + self.note = Note( + data["attribution"]["changeMessage"], + tree, + num_prefix=f'NAME_{owner_fis}_{kind}', + note_type='Name Note', + ) + + def printxml(self, parent_element): + + person_name = ET.SubElement(parent_element, 'name', type=self.kind) + ET.SubElement(person_name, 'first').text = self.given + ET.SubElement(person_name, 'surname').text = self.surname + # TODO prefix / suffix + def print(self, file=sys.stdout, typ=None): """print Name in GEDCOM format @@ -265,6 +400,69 @@ class Name: self.note.link(file, 2) + +class Place: + """GEDCOM Place class + :param name: the place name + :param tree: a Tree object + :param num: the GEDCOM identifier + """ + + counter = 0 + + def __init__( + self, + id: str, + name: str, + type: Optional[str]=None, + parent: Optional['Place']=None, + latitude: Optional[float]=None, + longitude: Optional[float]=None): + self._handle = None + self.name = name + self.type = type + self.id = id + self.parent = parent + self.latitude = latitude + self.longitude = longitude + + @property + def handle(self): + if not self._handle: + self._handle = '_' + os.urandom(10).hex() + + return self._handle + + + def print(self, file=sys.stdout, indentation=0): + """print Place in GEDCOM format""" + file.write("%d @P%s@ PLAC %s\n" % (indentation, self.num, self.name)) + + def printxml(self, parent_element): + + + # + # + # + # + # + # + # + place_element = ET.SubElement( + parent_element, + 'placeobj', + handle=self.handle, + # change='1720382307', + id=self.id, + type=self.type or 'Unknown' + ) + # ET.SubElement(place_element, 'ptitle').text = self.name + ET.SubElement(place_element, 'pname', value=self.name) + if self.parent: + ET.SubElement(place_element, 'placeref', hlink=self.parent.handle) + if self.latitude and self.longitude: + ET.SubElement(place_element, 'coord', long=str(self.longitude), lat=str(self.latitude)) + class Ordinance: """GEDCOM Ordinance class :param data: FS Ordinance data @@ -300,7 +498,8 @@ class Indi: counter = 0 - def __init__(self, fid=None, tree=None, num=None): + def __init__(self, fid: str, tree: 'Tree', num=None): + self._handle = None if num: self.num = num else: @@ -308,27 +507,29 @@ class Indi: self.num = Indi.counter self.fid = fid self.tree = tree - self.famc_fid = set() - self.fams_fid = set() - self.famc_num = set() - self.fams_num = set() - self.famc_ids = set() - self.fams_ids = set() - self.name = None + self.famc: Set['Fam'] = set() + self.fams: Set['Fam'] = set() + # self.famc_fid = set() + # self.fams_fid = set() + # self.famc_num = set() + # self.fams_num = set() + # self.famc_ids = set() + # self.fams_ids = set() + self.name: Optional[Name] = None self.gender = None self.living = None - self.parents = set() - self.spouses = set() - self.children = set() + self.parents: Set[Tuple[str, str]] = set() # (father_id, mother_id) + self.spouses: Set[Tuple[str, str, str]] = set() # (person1, person2, relfid) + self.children: Set[Tuple[str, str, str]] = set() # (father_id, mother_id, child_id) self.baptism = self.confirmation = self.initiatory = None self.endowment = self.sealing_child = None - self.nicknames = set() - self.facts = set() - self.birthnames = set() - self.married = set() - self.aka = set() - self.notes = set() - self.sources = set() + self.nicknames: Set[Name] = set() + self.birthnames: Set[Name] = set() + self.married: Set[Name] = set() + self.aka: Set[Name] = set() + self.facts: Set[Fact] = set() + self.notes: Set[Note] = set() + self.sources: Set[Source] = set() self.memories = set() def add_data(self, data): @@ -362,7 +563,8 @@ class Indi: "=== %s ===\n%s" % (self.tree.fs._("Life Sketch"), x.get("value", "")), self.tree, - num_perfix=f'INDI_{self.fid}' + num_prefix=f'INDI_{self.fid}', + note_type='Person Note', ) ) else: @@ -397,26 +599,39 @@ class Indi: for val in x.get("titles", []) + x.get("descriptions", []) ) - self.notes.add(Note(text, self.tree, num_perfix=f'INDI_{self.fid}')) + self.notes.add( + Note( + text, + self.tree, + num_prefix=f'INDI_{self.fid}', + note_type='Person Note', + )) else: self.memories.add(Memorie(x)) - def add_fams(self, fams): + def add_fams(self, fam: 'Fam'): """add family fid (for spouse or parent)""" - self.fams_fid.add(fams) + self.fams.add(fam) - def add_famc(self, famc): + def add_famc(self, fam: 'Fam'): """add family fid (for child)""" - self.famc_fid.add(famc) + self.famc.add(fam) def get_notes(self): """retrieve individual notes""" + print(f'Getting Notes for {self.fid}', file=sys.stderr) notes = self.tree.fs.get_url("/platform/tree/persons/%s/notes" % self.fid) if notes: for n in notes["persons"][0]["notes"]: text_note = "=== %s ===\n" % n["subject"] if "subject" in n else "" text_note += n["text"] + "\n" if "text" in n else "" - self.notes.add(Note(text_note, self.tree, num_perfix=f'INDI_{self.fid}')) + self.notes.add( + Note( + text_note, + self.tree, + num_prefix=f'INDI_{self.fid}', + note_type='Person Note', + )) def get_ordinances(self): """retrieve LDS ordinances @@ -468,11 +683,74 @@ class Indi: if n.text == text: self.notes.add(n) return - self.notes.add(Note(text, self.tree, num_perfix=f'INDI_{self.fid}_CONTRIB')) + self.notes.add(Note(text, self.tree, num_prefix=f'INDI_{self.fid}_CONTRIB', note_type='Contribution Note')) @property def id(self): return self.fid or self.num + + + @property + def handle(self): + if not self._handle: + self._handle = '_' + os.urandom(10).hex() + + return self._handle + + def printxml(self, parent_element): + + # + # M + # + # József + # Cser + # + # + # + # + # + # + # + # + person = ET.SubElement(parent_element, + 'person', + handle=self.handle, + # change='1720382301', + id='I' + str(self.id)) + if self.fid: + ET.SubElement(person, 'attribute', type='_FSFTID', value=self.fid) + + if self.name: + self.name.printxml(person) + for name in self.nicknames | self.birthnames | self.aka | self.married: + name.printxml(person) + + gender = ET.SubElement(person, 'gender') + gender.text = self.gender + + if self.fams: + for fam in self.fams: + ET.SubElement(person, 'parentin', hlink=fam.handle) + + if self.famc: + for fam in self.famc: + ET.SubElement(person, 'childof', hlink=fam.handle) + + + ET.SubElement(person, 'attribute', type="_FSFTID", value=self.fid) + + + for fact in self.facts: + ET.SubElement(person, 'eventref', hlink=fact.handle, role='Primary') + + # TODO citations + # TODO notes + for note in self.notes: + ET.SubElement(person, 'noteref', hlink=note.handle) + + # + # + def print(self, file=sys.stdout): """print individual in GEDCOM format""" @@ -508,12 +786,15 @@ class Indi: if self.sealing_child: file.write("1 SLGC\n") self.sealing_child.print(file) - print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr) - for num in self.fams_ids: - file.write("1 FAMS @F%s@\n" % num) - print(f'Famc Ids: {self.famc_ids}', file=sys.stderr) - for num in self.famc_ids: - file.write("1 FAMC @F%s@\n" % num) + for fam in self.fams: + file.write("1 FAMS @F%s@\n" % fam.id) + for fam in self.famc: + file.write("1 FAMC @F%s@\n" % fam.id) + # print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr) + # for num in self.fams_ids: + # print(f'Famc Ids: {self.famc_ids}', file=sys.stderr) + # for num in self.famc_ids: + # file.write("1 FAMC @F%s@\n" % num) file.write("1 _FSFTID %s\n" % self.fid) for o in self.notes: o.link(file) @@ -533,29 +814,44 @@ class Fam: counter = 0 - def __init__(self, husb=None, wife=None, tree=None, num=None): - if num: - self.num = num - else: - Fam.counter += 1 - self.num = Fam.counter - self.husb_fid = husb if husb else None - self.wife_fid = wife if wife else None + def __init__(self, husband: Indi | None, wife: Indi | None, tree: 'Tree'): + self._handle = None + self.num = Fam.gen_id(husband, wife) + self.fid = None + self.husband = husband + self.wife = wife self.tree = tree - self.husb_num = self.wife_num = self.fid = None - self.facts = set() + self.children: Set[Indi] = set() + self.facts: Set[Fact] = set() self.sealing_spouse = None - self.chil_fid = set() - self.chil_num = set() self.notes = set() self.sources = set() - def add_child(self, child): + @property + def handle(self): + if not self._handle: + self._handle = '_' + os.urandom(10).hex() + + return self._handle + + @staticmethod + def gen_id(husband: Indi | None, wife: Indi | None) -> str: + if husband and wife: + return f'FAM_{husband.id}-{wife.id}' + elif husband: + return f'FAM_{husband.id}-UNK' + elif wife: + return f'FAM_UNK-{wife.id}' + else: + Fam.counter += 1 + return f'FAM_UNK-UNK-{Fam.counter}' + + def add_child(self, child: Indi | None): """add a child fid to the family""" - if child not in self.chil_fid: - self.chil_fid.add(child) + if child is not None: + self.children.add(child) - def add_marriage(self, fid): + def add_marriage(self, fid: str): """retrieve and add marriage information :param fid: the marriage fid """ @@ -603,7 +899,7 @@ class Fam: for n in notes["relationships"][0]["notes"]: text_note = "=== %s ===\n" % n["subject"] if "subject" in n else "" text_note += n["text"] + "\n" if "text" in n else "" - self.notes.add(Note(text_note, self.tree, num_perfix=f'FAM_{self.fid}')) + self.notes.add(Note(text_note, self.tree, num_prefix=f'FAM_{self.fid}', note_type='Marriage Note')) def get_contributors(self): """retrieve contributors""" @@ -626,33 +922,44 @@ class Fam: if n.text == text: self.notes.add(n) return - self.notes.add(Note(text, self.tree, num_perfix=f'FAM_{self.fid}_CONTRIB')) + self.notes.add(Note(text, self.tree, num_prefix=f'FAM_{self.fid}_CONTRIB', note_type='Contribution Note')) @property def id(self): - return self.fid if self.fid else self.num - - @property - def husband_id(self): - return self.husb_fid or self.husb_num + return self.num - @property - def wife_id(self): - return self.wife_fid or self.wife_num - - @property - def children_ids(self): - return self.chil_fid or self.chil_num + def printxml(self, parent_element): + # + # + # + # + # + # + # + family = ET.SubElement(parent_element, + 'family', + handle=self.handle, + # change='1720382301', + id=self.id) + ET.SubElement(family, 'rel', type='Unknown') + if self.husband: + ET.SubElement(family, 'father', hlink=self.husband.handle) + if self.wife: + ET.SubElement(family, 'mother', hlink=self.wife.handle) + for child in self.children: + ET.SubElement(family, 'childref', hlink=child.handle) + for fact in self.facts: + ET.SubElement(family, 'eventref', hlink=fact.handle, role='Primary') def print(self, file=sys.stdout): """print family information in GEDCOM format""" file.write("0 @F%s@ FAM\n" % self.id) - if self.husb_num: - file.write("1 HUSB @I%s@\n" % self.husband_id) - if self.wife_num: - file.write("1 WIFE @I%s@\n" % self.wife_id) - for child_id in self.children_ids: - file.write("1 CHIL @I%s@\n" % child_id) + if self.husband: + file.write("1 HUSB @I%s@\n" % self.husband.id) + if self.wife: + file.write("1 WIFE @I%s@\n" % self.wife.id) + for child in self.children: + file.write("1 CHIL @I%s@\n" % child.id) for o in self.facts: o.print(file) if self.sealing_spouse: @@ -673,20 +980,27 @@ class Tree: :param fs: a Session object """ - def __init__(self, fs=None, exclude=None): + def __init__(self, fs: Optional[requests.Session]=None, exclude=None, geonames_key=None): self.fs = fs - self.indi = dict() - self.fam = dict() + self.geonames_key = geonames_key + self.indi: Dict[str, Indi] = dict() + self.fam: Dict[str, Fam] = dict() self.notes = list() + self.facts: Set[Fact] = set() self.sources = dict() - self.places = dict() + self.places: List[Place] = [] + self.places_by_names: Dict[str, Place] = dict() + self.place_cache: Dict[str, Tuple[float, float]] = dict() self.display_name = self.lang = None self.exclude = exclude or [] + self.place_counter = 0 if fs: self.display_name = fs.display_name self.lang = babelfish.Language.fromalpha2(fs.lang).name - def add_indis(self, fids_in): + self.geosession = CachedSession('http_cache', backend='filesystem', expire_after=86400) + + def add_indis(self, fids_in: List[str]): """add individuals to the family tree :param fids: an iterable of fid """ @@ -719,21 +1033,17 @@ class Tree: if data: if "places" in data: for place in data["places"]: - if place["id"] not in self.places: - self.places[place["id"]] = ( - str(place["latitude"]), - str(place["longitude"]), + if place["id"] not in self.place_cache: + self.place_cache[place["id"]] = ( + place["latitude"], + place["longitude"], ) loop.run_until_complete(add_datas(loop, data)) if "childAndParentsRelationships" in data: for rel in data["childAndParentsRelationships"]: - father = ( - rel["parent1"]["resourceId"] if "parent1" in rel else None - ) - mother = ( - rel["parent2"]["resourceId"] if "parent2" in rel else None - ) - child = rel["child"]["resourceId"] if "child" in rel else None + father: str | None = rel.get("parent1", {}).get("resourceId") + mother: str | None = rel.get("parent2", {}).get("resourceId") + child: str | None = rel.get("child", {}).get("resourceId") if child in self.indi: self.indi[child].parents.add((father, mother)) if father in self.indi: @@ -756,30 +1066,111 @@ class Tree: ) new_fids = new_fids[MAX_PERSONS:] - def add_fam(self, father, mother): - """add a family to the family tree - :param father: the father fid or None - :param mother: the mother fid or None - """ - if (father, mother) not in self.fam: - self.fam[(father, mother)] = Fam(father, mother, self) - def add_trio(self, father, mother, child): + + def ensure_family(self, father: Optional['Indi'], mother: Optional['Indi']) -> Fam: + fam_id = Fam.gen_id(father, mother) + if fam_id not in self.fam: + self.fam[fam_id] = Fam(father, mother, self) + return self.fam[fam_id] + + + def place_by_geoname_id(self, id: str) -> Optional[Place]: + for place in self.places: + if place.id == id: + return place + return None + + def get_by_geonames_id(self, geonames_id: str) -> Place: + print('Fetching place hierarchy for', geonames_id, file=sys.stderr) + hierarchy = geocoder.geonames( + geonames_id, + key=self.geonames_key, + lang=['hu', 'en', 'de'], + method='hierarchy', + session=self.geosession, + ) + + if hierarchy and hierarchy.ok: + last_place = None + for item in hierarchy.geojson.get('features', []): + properties = item.get('properties', {}) + code = properties.get('code') + + if code in ['AREA', 'CONT']: + continue + + print('Properties', properties, file=sys.stderr) + id = 'GEO' + str(properties['geonames_id']) + place = self.place_by_geoname_id(id) + if place is None: + place = Place( + id, + properties.get('address'), + GEONAME_FEATURE_MAP.get(code, 'Unknown'), + last_place, + properties.get('lat'), + properties.get('lng') + ) + self.places.append(place) + last_place = place + return last_place + + @property + def _next_place_counter(self): + self.place_counter += 1 + return self.place_counter + + + def ensure_place(self, place_name: str, fid: Optional[str] = None, coord: Optional[Tuple[float, float]] = None) -> Place: + if place_name not in self.places_by_names: + place = None + if self.geonames_key: + print('Fetching place', place_name, file=sys.stderr) + geoname_record = geocoder.geonames( + place_name, + key=self.geonames_key, + session=self.geosession, + ) + if geoname_record and geoname_record.ok: + place = self.get_by_geonames_id(geoname_record.geonames_id) + if place is None: + coord = self.place_cache.get(fid) if coord is None else coord + place = Place( + 'PFSID' + fid if fid is not None else 'P' + str(self._next_place_counter), + place_name, + latitude=coord[0] if coord is not None else None, + longitude=coord[1] if coord is not None else None + ) + self.places.append(place) + self.places_by_names[place_name] = place + return self.places_by_names[place_name] + + # def add_fam(self, father, mother): + # """add a family to the family tree + # :param father: the father fid or None + # :param mother: the mother fid or None + # """ + # if (father, mother) not in self.fam: + # self.fam[(father, mother)] = Fam(father, mother, self) + + def add_trio(self, father: Indi | None, mother: Indi | None, child: Indi | None): """add a children relationship to the family tree :param father: the father fid or None :param mother: the mother fid or None :param child: the child fid or None """ - if father in self.indi: - self.indi[father].add_fams((father, mother)) - if mother in self.indi: - self.indi[mother].add_fams((father, mother)) - if child in self.indi and (father in self.indi or mother in self.indi): - self.indi[child].add_famc((father, mother)) - self.add_fam(father, mother) - self.fam[(father, mother)].add_child(child) - - def add_parents(self, fids): + fam = self.ensure_family(father, mother) + if child is not None: + fam.add_child(child) + child.add_famc(fam) + + if father is not None: + father.add_fams(fam) + if mother is not None: + mother.add_fams(fam) + + def add_parents(self, fids: Set[str]): """add parents relationships :param fids: a set of fids """ @@ -799,27 +1190,32 @@ class Tree: or not mother and father in self.indi ): - self.add_trio(father, mother, fid) + self.add_trio( + self.indi.get(father), + self.indi.get(mother), + self.indi.get(fid), + ) return set(filter(None, parents)) - def add_spouses(self, fids): + def add_spouses(self, fids: Set[str]): """add spouse relationships :param fids: a set of fid """ - async def add(loop, rels): + async def add(loop, rels: Set[Tuple[str, str, str]]): futures = set() for father, mother, relfid in rels: - if (father, mother) in self.fam: + fam_id = Fam.gen_id(self.indi[father], self.indi[mother]) + if self.fam.get(fam_id): futures.add( loop.run_in_executor( - None, self.fam[(father, mother)].add_marriage, relfid + None, self.fam[fam_id].add_marriage, relfid ) ) for future in futures: await future - rels = set() + rels: Set[Tuple[str, str, str]] = set() for fid in fids & self.indi.keys(): rels |= self.indi[fid].spouses loop = asyncio.get_event_loop() @@ -829,16 +1225,19 @@ class Tree: ) for father, mother, _ in rels: if father in self.indi and mother in self.indi: - self.indi[father].add_fams((father, mother)) - self.indi[mother].add_fams((father, mother)) - self.add_fam(father, mother) + father_indi = self.indi[father] + mother_indi = self.indi[mother] + fam = self.ensure_family(father_indi, mother_indi) + father_indi.add_fams(fam) + mother_indi.add_fams(fam) + loop.run_until_complete(add(loop, rels)) def add_children(self, fids): """add children relationships :param fids: a set of fid """ - rels = set() + rels: Set[Tuple[str, str, str]] = set() for fid in fids & self.indi.keys(): rels |= self.indi[fid].children if fid in self.indi else set() children = set() @@ -853,7 +1252,11 @@ class Tree: or not mother and father in self.indi ): - self.add_trio(father, mother, child) + self.add_trio( + self.indi.get(father), + self.indi.get(mother), + self.indi.get(child), + ) children.add(child) return children @@ -874,25 +1277,82 @@ class Tree: def reset_num(self): """reset all GEDCOM identifiers""" - for husb, wife in self.fam: - self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None - self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None - self.fam[(husb, wife)].chil_num = set( - self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid - ) - for fid in self.indi: - self.indi[fid].famc_num = set( - self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid - ) - self.indi[fid].fams_num = set( - self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid - ) - self.indi[fid].famc_ids = set( - self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid - ) - self.indi[fid].fams_ids = set( - self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid - ) + # for husb, wife in self.fam: + # self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None + # self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None + # self.fam[(husb, wife)].chil_num = set( + # self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid + # ) + # for fid in self.indi: + # self.indi[fid].famc_num = set( + # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid + # ) + # self.indi[fid].fams_num = set( + # self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid + # ) + # self.indi[fid].famc_ids = set( + # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid + # ) + # self.indi[fid].fams_ids = set( + # self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid + # ) + + def printxml(self, file: BinaryIO): + +# root = ET.Element("root") +# doc = ET.SubElement(root, "doc") + +# ET.SubElement(doc, "field1", name="blah").text = "some value1" +# ET.SubElement(doc, "field2", name="asdfasd").text = "some vlaue2" + +# tree = ET.ElementTree(root) +# tree.write("filename.xml") + +# +# +# +#
+# +# Barnabás Südy +# +#
+ + root = ET.Element("database", xmlns="http://gramps-project.org/xml/1.7.1/") + + header = ET.SubElement(root, "header") + ET.SubElement(header, "created", date=datetime.strftime(datetime.now(), "%Y-%m-%d"), version="5.2.2") + researcher = ET.SubElement(header, "researcher") + resname = ET.SubElement(researcher, "resname") + resname.text = self.display_name + + people = ET.SubElement(root, "people") + for indi in sorted(self.indi.values(), key=lambda x: x.num): + indi.printxml(people) + + families = ET.SubElement(root, "families") + for fam in sorted(self.fam.values(), key=lambda x: x.num): + fam.printxml(families) + + events = ET.SubElement(root, "events") + for fact in self.facts: + fact.printxml(events) + + notes = ET.SubElement(root, "notes") + for note in sorted(self.notes, key=lambda x: x.id): + note.printxml(notes) + + places = ET.SubElement(root, "places") + for place in self.places: + place.printxml(places) + + tree = ET.ElementTree(root) + + doctype='' + file.write(doctype.encode('utf-8')) + tree.write(file, 'utf-8') + def print(self, file=sys.stdout): """print family tree in GEDCOM format""" @@ -913,8 +1373,8 @@ class Tree: for fid in sorted(self.indi, key=lambda x: self.indi.__getitem__(x).num): self.indi[fid].print(file) - for husb, wife in sorted(self.fam, key=lambda x: self.fam.__getitem__(x).num): - self.fam[(husb, wife)].print(file) + for fam in sorted(self.fam.values(), key=lambda x: x.num): + fam.print(file) sources = sorted(self.sources.values(), key=lambda x: x.num) for s in sources: s.print(file) diff --git a/getmyancestors/getmyancestors.py b/getmyancestors/getmyancestors.py index d9404a7..bee789a 100644 --- a/getmyancestors/getmyancestors.py +++ b/getmyancestors/getmyancestors.py @@ -108,6 +108,14 @@ def main(): default=60, help="Timeout in seconds [60]", ) + + parser.add_argument( + "-x", + "--xml", + action="store_true", + default=False, + help="To print the output in Gramps XML format [False]", + ) parser.add_argument( "--show-password", action="store_true", @@ -120,13 +128,20 @@ def main(): default=False, help="Save settings into file [False]", ) + parser.add_argument( + "-g", + "--geonames", + metavar="", + type=str, + help="Geonames.org username in order to download place data", + ) try: parser.add_argument( "-o", "--outfile", metavar="", - type=argparse.FileType("w", encoding="UTF-8"), - default=sys.stdout, + # type=argparse.FileType("w", encoding="UTF-8"), + # default=sys.stdout, help="output GEDCOM file [stdout]", ) parser.add_argument( @@ -202,7 +217,11 @@ def main(): if not fs.logged: sys.exit(2) _ = fs._ - tree = Tree(fs, exclude=args.exclude) + tree = Tree( + fs, + exclude=args.exclude, + geonames_key=args.geonames, + ) # check LDS account if args.get_ordinances: @@ -320,7 +339,12 @@ def main(): finally: # compute number for family relationships and print GEDCOM file tree.reset_num() - tree.print(args.outfile) + if args.xml: + with open(args.outfile, "wb") as f: + tree.printxml(f) + else: + with open(args.outfile, "w", encoding="UTF-8") as f: + tree.print(f) print( _( "Downloaded %s individuals, %s families, %s sources and %s notes " diff --git a/getmyancestors/mergemyancestors.py b/getmyancestors/mergemyancestors.py index eb48a33..b650a67 100644 --- a/getmyancestors/mergemyancestors.py +++ b/getmyancestors/mergemyancestors.py @@ -65,9 +65,7 @@ def main(): fid = ged.indi[num].fid if fid not in tree.indi: indi_counter += 1 - tree.indi[fid] = Indi(tree=tree, num=indi_counter) - tree.indi[fid].tree = tree - tree.indi[fid].fid = ged.indi[num].fid + tree.indi[fid] = Indi(ged.indi[num].fid, tree, num=indi_counter) tree.indi[fid].fams_fid |= ged.indi[num].fams_fid tree.indi[fid].famc_fid |= ged.indi[num].famc_fid tree.indi[fid].name = ged.indi[num].name