From: Barnabas Sudy Date: Tue, 20 Aug 2024 05:19:08 +0000 (+0200) Subject: Fixed cache, date formatting and envet id X-Git-Url: https://git.nutra.tk/v2?a=commitdiff_plain;h=5016891655dbf40e0213326f81713df032959750;p=gamesguru%2Fgetmyancestors.git Fixed cache, date formatting and envet id --- diff --git a/getmyancestors/classes/constants.py b/getmyancestors/classes/constants.py index 20dab75..9b80a64 100644 --- a/getmyancestors/classes/constants.py +++ b/getmyancestors/classes/constants.py @@ -3,6 +3,14 @@ # Subject to change: see https://www.familysearch.org/developers/docs/api/tree/Persons_resource MAX_PERSONS = 200 +FACT_TAG_EVENT_TYPE = { + 'BIRT': 'Birth', + 'DEAT': 'Death', + 'BURI': 'Burial', + 'CREM': 'Cremation', + 'NATU': 'Naturalization', +} + FACT_TAGS = { "http://gedcomx.org/Birth": "BIRT", "http://gedcomx.org/Christening": "CHR", diff --git a/getmyancestors/classes/session.py b/getmyancestors/classes/session.py index 313fa90..9f9aeca 100644 --- a/getmyancestors/classes/session.py +++ b/getmyancestors/classes/session.py @@ -4,7 +4,7 @@ import time from urllib.parse import urlparse, parse_qs import requests -from requests_cache import CachedSession +from requests_cache import CachedSession as CSession from fake_useragent import UserAgent # local imports @@ -12,7 +12,7 @@ from getmyancestors.classes.translation import translations # class Session(requests.Session): -class Session(CachedSession): +class GMASession: """Create a FamilySearch session :param username and password: valid FamilySearch credentials :param verbose: True to active verbose mode @@ -21,7 +21,7 @@ class Session(CachedSession): """ def __init__(self, username, password, verbose=False, logfile=False, timeout=60): - super().__init__('http_cache', backend='filesystem', expire_after=86400) + # super().__init__('http_cache', backend='filesystem', expire_after=86400) # super().__init__() self.username = username self.password = password @@ -70,7 +70,11 @@ class Session(CachedSession): data = res.json() except ValueError: self.write_log("Invalid auth request") - continue + self.write_log(res.headers) + self.write_log(res.text) + + raise "Invalid auth request" + # continue if "loginError" in data: self.write_log(data["loginError"]) return @@ -210,3 +214,15 @@ class Session(CachedSession): if string in translations and self.lang in translations[string]: return translations[string][self.lang] return string + + +class CachedSession(GMASession, CSession): + + def __init__(self, username, password, verbose=False, logfile=False, timeout=60): + CSession.__init__(self, 'http_cache', backend='filesystem', expire_after=86400) + GMASession.__init__(self, username, password, verbose=verbose, logfile=logfile, timeout=timeout) +class Session(GMASession, requests.Session): + + def __init__(self, username, password, verbose=False, logfile=False, timeout=60): + requests.Session.__init__(self) + GMASession.__init__(self, username, password, verbose=verbose, logfile=logfile, timeout=timeout) diff --git a/getmyancestors/classes/tree.py b/getmyancestors/classes/tree.py index a48342f..9091150 100644 --- a/getmyancestors/classes/tree.py +++ b/getmyancestors/classes/tree.py @@ -3,14 +3,15 @@ import re import time import asyncio import os -from urllib.parse import unquote +from urllib.parse import unquote, unquote_plus from datetime import datetime -from typing import Set, Dict, List, Tuple, Union, Optional, BinaryIO +from typing import Set, Dict, List, Tuple, Union, Optional, BinaryIO, Any # global imports import babelfish import geocoder import requests import xml.etree.cElementTree as ET +from xml.etree.cElementTree import Element from requests_cache import CachedSession # local imports @@ -140,7 +141,7 @@ class Note: return self._handle - def printxml(self, parent_element): + def printxml(self, parent_element: Element) -> None: note_element = ET.SubElement( parent_element, 'note', @@ -167,6 +168,8 @@ class Source: Source.counter += 1 self.num = Source.counter + self._handle = None + self.tree = tree self.url = self.citation = self.title = self.fid = None self.notes = set() @@ -190,10 +193,19 @@ class Source: num="S%s-%s" % (self.id, idx), note_type='Source Note' )) + self.modified = data['attribution']['modified'] @property def id(self): - return self.fid or self.num + return 'S' + str(self.fid or self.num) + + + @property + def handle(self): + if not self._handle: + self._handle = '_' + os.urandom(10).hex() + + return self._handle def print(self, file=sys.stdout): """print Source in GEDCOM format""" @@ -212,6 +224,30 @@ class Source: """print the reference in GEDCOM format""" file.write("%s SOUR @S%s@\n" % (level, self.id)) + def printxml(self, parent_element: Element) -> None: + + # + # Palkovics Cser József, "Hungary Civil Registration, 1895-1980" + # "Hungary Civil Registration, 1895-1980", , <i>FamilySearch</i> (https://www.familysearch.org/ark:/61903/1:1:6JBQ-NKWD : Thu Mar 07 10:23:43 UTC 2024), Entry for Palkovics Cser József and Palkovics Cser István, 27 Aug 1928. + # https://familysearch.org/ark:/61903/1:1:6JBQ-NKWD + # + # + source_element = ET.SubElement( + parent_element, + 'source', + handle=self.handle, + change=str(int(self.modified / 1000)), + id=self.id + ) + if self.title: + ET.SubElement(source_element, 'stitle').text = self.title + if self.citation: + ET.SubElement(source_element, 'sauthor').text = self.citation + if self.url: + ET.SubElement(source_element, 'spubinfo').text = self.url + if self.fid: + ET.SubElement(source_element, 'srcattribute', type='REFN', value=self.fid) + class Fact: """GEDCOM Fact class @@ -223,27 +259,39 @@ class Fact: def __init__(self, data=None, tree: Optional['Tree']=None, num_prefix=None): self.value = self.type = self.date = None + self.date_type = None self.place: Optional[Place] = None self.note = None self._handle: Optional[str] = None - self.num_prefix = num_prefix - - Fact.counter[num_prefix or 'None'] = Fact.counter.get(num_prefix or 'None', 0) + 1 - self.num = Fact.counter[num_prefix or 'None'] - if data: if "value" in data: self.value = data["value"] if "type" in data: self.type = data["type"] + self.fs_type = self.type if self.type in FACT_EVEN: self.type = tree.fs._(FACT_EVEN[self.type]) elif self.type[:6] == "data:,": self.type = unquote(self.type[6:]) elif self.type not in FACT_TAGS: self.type = None + + + self.num_prefix = f'{num_prefix}_{FACT_TAGS[self.type]}' if num_prefix and self.type in FACT_TAGS else num_prefix + Fact.counter[self.num_prefix or 'None'] = Fact.counter.get(self.num_prefix or 'None', 0) + 1 + self.num = Fact.counter[self.num_prefix or 'None'] + if data: if "date" in data: - self.date = data["date"]["original"] + if 'formal' in data['date']: + self.date = data['date']['formal'].split('+')[-1].split('/')[0] + if data['date']['formal'].startswith('A+'): + self.date_type = 'about' + if data['date']['formal'].startswith('/+'): + self.date_type = 'before' + if data['date']['formal'].endswith('/'): + self.date_type = 'after' + else: + self.date = data["date"]["original"] if "place" in data: place = data["place"] place_name = place["original"] @@ -253,15 +301,17 @@ class Fact: self.note = Note( data["attribution"]["changeMessage"], tree, - num_prefix='E' + num_prefix if num_prefix else None, + num_prefix='E' + self.num_prefix if self.num_prefix else None, note_type='Event Note', ) if self.type == "http://gedcomx.org/Death" and not ( self.date or self.place ): self.value = "Y" + if tree: tree.facts.add(self) + @property def id(self): @@ -284,9 +334,20 @@ class Fact: # change='1720382301', id=self.id ) - ET.SubElement(event_element, 'type').text = FACT_TAGS.get(self.type, self.type) + + ET.SubElement(event_element, 'type').text = ( + unquote_plus(self.type[len('http://gedcomx.org/'):]) + if self.type.startswith('http://gedcomx.org/') + else self.type + ) + # FACT_TAGS.get(self.type, self.type) if self.date: - ET.SubElement(event_element, 'datestr', val=self.date) + params={ + 'val': self.date, + } + if self.date_type is not None: + params['type'] = self.date_type + ET.SubElement(event_element, 'datestr', **params) if self.place: ET.SubElement(event_element, 'place', hlink=self.place.handle) if self.note: @@ -343,18 +404,27 @@ class Memorie: file.write(cont("2 FILE " + self.url)) +NAME_MAP = { + "preferred" : 'Preeferred Name', + "nickname" : 'Nickname', + "birthname": 'Birth Name', + "aka": 'Also Known As', + "married": 'Married Name', +} + class Name: """GEDCOM Name class :param data: FS Name data :param tree: a Tree object """ - def __init__(self, data=None, tree=None, owner_fis=None, kind=None): + def __init__(self, data=None, tree=None, owner_fis=None, kind=None, alternative: bool=False): self.given = "" self.surname = "" self.prefix = None self.suffix = None self.note = None + self.alternative = alternative self.owner_fis = owner_fis self.kind = kind if data: @@ -377,8 +447,12 @@ class Name: ) def printxml(self, parent_element): - - person_name = ET.SubElement(parent_element, 'name', type=self.kind) + params = {} + if self.kind is not None: + params['type'] = NAME_MAP.get(self.kind, self.kind) + if self.alternative: + params['alt'] = '1' + person_name = ET.SubElement(parent_element, 'name', **params) ET.SubElement(person_name, 'first').text = self.given ET.SubElement(person_name, 'surname').text = self.surname # TODO prefix / suffix @@ -488,6 +562,46 @@ class Ordinance: if self.famc: file.write("2 FAMC @F%s@\n" % self.famc.num) +class Citation: + + def __init__(self, data: Dict[str, Any], source: Source): + self._handle = None + self.id = data["id"] + self.source = source + self.message = ( + data["attribution"]["changeMessage"] + if "changeMessage" in data["attribution"] + else None + ) + # TODO create citation note out of this. + self.modified = data['attribution']['modified'] + + + @property + def handle(self): + if not self._handle: + self._handle = '_' + os.urandom(10).hex() + + return self._handle + + def printxml(self, parent_element: Element): + +# +# +# 2 +# +# +# + citation_element = ET.SubElement( + parent_element, + 'citation', + handle=self.handle, + change=str(int(self.modified / 1000)), + id='C' + str(self.id) + ) + ET.SubElement(citation_element, 'confidence').text = '2' + ET.SubElement(citation_element, 'sourceref', hlink=self.source.handle) + class Indi: """GEDCOM individual class @@ -529,7 +643,8 @@ class Indi: self.aka: Set[Name] = set() self.facts: Set[Fact] = set() self.notes: Set[Note] = set() - self.sources: Set[Source] = set() + # self.sources: Set[Source] = set() + self.citations: Set[Citation] = set() self.memories = set() def add_data(self, data): @@ -537,17 +652,18 @@ class Indi: if data: self.living = data["living"] for x in data["names"]: - if x["preferred"]: - self.name = Name(x, self.tree, self.fid, "preferred") + alt = not x.get('preferred', False) + if x["type"] == "http://gedcomx.org/Nickname": + self.nicknames.add(Name(x, self.tree, self.fid, "nickname", alt)) + elif x["type"] == "http://gedcomx.org/BirthName": + self.birthnames.add(Name(x, self.tree, self.fid, "birthname", alt)) + elif x["type"] == "http://gedcomx.org/AlsoKnownAs": + self.aka.add(Name(x, self.tree, self.fid, "aka", alt)) + elif x["type"] == "http://gedcomx.org/MarriedName": + self.married.add(Name(x, self.tree, self.fid, "married", alt)) else: - if x["type"] == "http://gedcomx.org/Nickname": - self.nicknames.add(Name(x, self.tree, self.fid, "nickname")) - if x["type"] == "http://gedcomx.org/BirthName": - self.birthnames.add(Name(x, self.tree, self.fid, "birthname")) - if x["type"] == "http://gedcomx.org/AlsoKnownAs": - self.aka.add(Name(x, self.tree, self.fid, "aka")) - if x["type"] == "http://gedcomx.org/MarriedName": - self.married.add(Name(x, self.tree, self.fid, "married")) + print('Unknown name type: ' + x.get('type'), file=sys.stderr) + raise 'Unknown name type' if "gender" in data: if data["gender"]["type"] == "http://gedcomx.org/Male": self.gender = "M" @@ -576,17 +692,16 @@ class Indi: if sources: quotes = dict() for quote in sources["persons"][0]["sources"]: - quotes[quote["descriptionId"]] = ( - quote["attribution"]["changeMessage"] - if "changeMessage" in quote["attribution"] - else None - ) - for source in sources["sourceDescriptions"]: - if source["id"] not in self.tree.sources: - self.tree.sources[source["id"]] = Source(source, self.tree) - self.sources.add( - (self.tree.sources[source["id"]], quotes[source["id"]]) + source_id = quote["descriptionId"] + source_data = next( + (s for s in sources['sourceDescriptions'] if s['id'] == source_id), + None, ) + source = self.tree.ensure_source(source_data) + if source: + citation = self.tree.ensure_citation(quote, source) + self.citations.add(citation) + for evidence in data.get("evidence", []): memory_id, *_ = evidence["id"].partition("-") url = "/platform/memories/memories/%s" % memory_id @@ -743,8 +858,9 @@ class Indi: for fact in self.facts: ET.SubElement(person, 'eventref', hlink=fact.handle, role='Primary') - # TODO citations - # TODO notes + for citation in self.citations: + ET.SubElement(person, 'citationref', hlink=citation.handle) + for note in self.notes: ET.SubElement(person, 'noteref', hlink=note.handle) @@ -980,19 +1096,20 @@ class Tree: :param fs: a Session object """ - def __init__(self, fs: Optional[requests.Session]=None, exclude=None, geonames_key=None): + def __init__(self, fs: Optional[requests.Session]=None, exclude: List[str]=None, geonames_key=None): self.fs = fs self.geonames_key = geonames_key self.indi: Dict[str, Indi] = dict() self.fam: Dict[str, Fam] = dict() self.notes = list() self.facts: Set[Fact] = set() - self.sources = dict() + self.sources: Dict[str, Source] = dict() + self.citations: Dict[str, Citation] = dict() self.places: List[Place] = [] self.places_by_names: Dict[str, Place] = dict() self.place_cache: Dict[str, Tuple[float, float]] = dict() self.display_name = self.lang = None - self.exclude = exclude or [] + self.exclude: List[str] = exclude or [] self.place_counter = 0 if fs: self.display_name = fs.display_name @@ -1066,8 +1183,17 @@ class Tree: ) new_fids = new_fids[MAX_PERSONS:] - + def ensure_source(self, source_data: Dict[str, Any]) -> Source: + if source_data["id"] not in self.sources: + self.sources[source_data["id"]] = Source(source_data, self) + return self.sources.get(source_data["id"]) + def ensure_citation(self, data: Dict[str, Any], source: Source) -> Citation: + citation_id = data["id"] + if citation_id not in self.citations: + self.citations[citation_id] = Citation(data, source) + return self.citations[citation_id] + def ensure_family(self, father: Optional['Indi'], mother: Optional['Indi']) -> Fam: fam_id = Fam.gen_id(father, mother) if fam_id not in self.fam: @@ -1205,6 +1331,8 @@ class Tree: async def add(loop, rels: Set[Tuple[str, str, str]]): futures = set() for father, mother, relfid in rels: + if father in self.exclude or mother in self.exclude: + continue fam_id = Fam.gen_id(self.indi[father], self.indi[mother]) if self.fam.get(fam_id): futures.add( @@ -1347,6 +1475,14 @@ class Tree: for place in self.places: place.printxml(places) + sources = ET.SubElement(root, "sources") + for source in self.sources.values(): + source.printxml(sources) + + citations = ET.SubElement(root, "citations") + for citation in self.citations.values(): + citation.printxml(citations) + tree = ET.ElementTree(root) doctype='' diff --git a/getmyancestors/getmyancestors.py b/getmyancestors/getmyancestors.py index bee789a..3e13e4c 100644 --- a/getmyancestors/getmyancestors.py +++ b/getmyancestors/getmyancestors.py @@ -13,6 +13,7 @@ import argparse # local imports from getmyancestors.classes.tree import Tree from getmyancestors.classes.session import Session +from getmyancestors.classes.session import CachedSession def main(): @@ -79,6 +80,12 @@ def main(): default=False, help="Add spouses and couples information [False]", ) + parser.add_argument( + "--cache", + action="store_true", + default=False, + help="Use of http cache to reduce requests during testing [False]", + ) parser.add_argument( "-r", "--get-contributors", @@ -213,7 +220,11 @@ def main(): # initialize a FamilySearch session and a family tree object print("Login to FamilySearch...", file=sys.stderr) - fs = Session(args.username, args.password, args.verbose, args.logfile, args.timeout) + if args.cache: + print("Using cache...", file=sys.stderr) + fs = CachedSession(args.username, args.password, args.verbose, args.logfile, args.timeout) + else: + fs = Session(args.username, args.password, args.verbose, args.logfile, args.timeout) if not fs.logged: sys.exit(2) _ = fs._