]> Nutra Git (v1) - gamesguru/getmyancestors.git/commitdiff
Retriveing place information from geonames
authorBarnabas Sudy <barnabas.sudy@gmail.com>
Sun, 11 Aug 2024 04:12:01 +0000 (06:12 +0200)
committerBarnabas Sudy <barnabas.sudy@gmail.com>
Sun, 11 Aug 2024 04:12:01 +0000 (06:12 +0200)
getmyancestors/classes/tree.py
getmyancestors/getmyancestors.py
getmyancestors/mergemyancestors.py

index d99113df64264bb5b1b00f9a60eb898a22409c16..a48342f27f497a90a5a21200f988313deed8fa04 100644 (file)
@@ -2,10 +2,16 @@ import sys
 import re
 import time
 import asyncio
+import os
 from urllib.parse import unquote
-
+from datetime import datetime
+from typing import Set, Dict, List, Tuple, Union, Optional, BinaryIO
 # global imports
 import babelfish
+import geocoder
+import requests
+import xml.etree.cElementTree as ET
+from requests_cache import CachedSession
 
 # local imports
 import getmyancestors
@@ -17,6 +23,52 @@ from getmyancestors.classes.constants import (
 )
 
 
+COUNTY = 'County'
+COUNTRY = 'Country'
+CITY = 'City'
+
+GEONAME_FEATURE_MAP = {
+    'ADM1': COUNTY, #  first-order administrative division     a primary administrative division of a country, such as a state in the United States
+    'ADM1H': COUNTY, #  historical first-order administrative division a former first-order administrative division
+    'ADM2': COUNTY, #  second-order administrative division    a subdivision of a first-order administrative division
+    'ADM2H': COUNTY, # historical second-order administrative division a former second-order administrative division
+    'ADM3': COUNTY, #  third-order administrative division     a subdivision of a second-order administrative division
+    'ADM3H': COUNTY, # historical third-order administrative division  a former third-order administrative division
+    'ADM4': COUNTY, #  fourth-order administrative division    a subdivision of a third-order administrative division
+    'ADM4H': COUNTY, # historical fourth-order administrative division a former fourth-order administrative division
+    'ADM5': COUNTY, #  fifth-order administrative division     a subdivision of a fourth-order administrative division
+    'ADM5H': COUNTY, # historical fifth-order administrative division  a former fifth-order administrative division
+    'ADMD': COUNTY, #  administrative division an administrative division of a country, undifferentiated as to administrative level
+    'ADMDH': COUNTY, # historical administrative division      a former administrative division of a political entity, undifferentiated as to administrative level
+    # 'LTER':  leased area     a tract of land leased to another country, usually for military installations
+    'PCL': COUNTRY, # political entity 
+    'PCLD': COUNTRY, # dependent political entity      
+    'PCLF': COUNTRY, # freely associated state 
+    'PCLH': COUNTRY, # historical political entity     a former political entity
+    'PCLI': COUNTRY, # independent political entity    
+    'PCLIX': COUNTRY, # section of independent political entity        
+    'PCLS': COUNTRY, # semi-independent political entity
+
+    'PPL': CITY, # populated place     a city, town, village, or other agglomeration of buildings where people live and work
+    'PPLA': CITY, # seat of a first-order administrative division      seat of a first-order administrative division (PPLC takes precedence over PPLA)
+    'PPLA2': CITY, # seat of a second-order administrative division    
+    'PPLA3': CITY, # seat of a third-order administrative division     
+    'PPLA4': CITY, # seat of a fourth-order administrative division    
+    'PPLA5': CITY, # seat of a fifth-order administrative division     
+    'PPLC': CITY, # capital of a political entity      
+    'PPLCH': CITY, # historical capital of a political entity  a former capital of a political entity
+    'PPLF': CITY, # farm village       a populated place where the population is largely engaged in agricultural activities
+    'PPLG': CITY, # seat of government of a political entity   
+    'PPLH': CITY, # historical populated place a populated place that no longer exists
+    'PPLL': CITY, # populated locality an area similar to a locality but with a small group of dwellings or other buildings
+    'PPLQ': CITY, # abandoned populated place  
+    'PPLR': CITY, # religious populated place  a populated place whose population is largely engaged in religious occupations
+    'PPLS': CITY, # populated places   cities, towns, villages, or other agglomerations of buildings where people live and work
+    'PPLW': CITY, # destroyed populated place  a village, town or city destroyed by a natural disaster, or by war
+    'PPLX': CITY, # section of populated place
+
+}
+
 # getmyancestors classes and functions
 def cont(string):
     """parse a GEDCOM line adding CONT and CONT tags if necessary"""
@@ -42,7 +94,6 @@ def cont(string):
         max_len = 248
     return ("\n%s CONT " % level).join(res) + "\n"
 
-
 class Note:
     """GEDCOM Note class
     :param text: the Note content
@@ -52,14 +103,16 @@ class Note:
 
     counter = {}
 
-    def __init__(self, text="", tree=None, num=None, num_perfix=None):
-        self.num_prefix = num_perfix
+    def __init__(self, text="", tree=None, num=None, num_prefix=None, note_type=None):
+        self._handle = None
+        self.note_type = note_type or 'Source Note'
+        self.num_prefix = num_prefix
         if num:
             self.num = num
         else:
-            Note.counter[num_perfix or 'None'] = Note.counter.get(num_perfix or 'None', 0) + 1
-            self.num = Note.counter[num_perfix or 'None']
-        print(f'##### Creating Note: {num_perfix}, {self.num}', file=sys.stderr)
+            Note.counter[num_prefix or 'None'] = Note.counter.get(num_prefix or 'None', 0) + 1
+            self.num = Note.counter[num_prefix or 'None']
+        print(f'##### Creating Note: {num_prefix}, {self.num}', file=sys.stderr)
         self.text = text.strip()
 
         if tree:
@@ -79,6 +132,24 @@ class Note:
         print(f'Linking Note: {self.id}', file=sys.stderr)
         file.write("%s NOTE @N%s@\n" % (level, self.id))
 
+    
+    @property
+    def handle(self):
+        if not self._handle:
+            self._handle = '_' + os.urandom(10).hex()
+
+        return self._handle
+
+    def printxml(self, parent_element):
+        note_element = ET.SubElement(
+            parent_element,
+            'note', 
+            handle=self.handle,
+            # change='1720382308', 
+            id=self.id, 
+            type='Source Note'
+        )
+        ET.SubElement(note_element, 'text').text = self.text
 
 class Source:
     """GEDCOM Source class
@@ -113,7 +184,12 @@ class Source:
             if "notes" in data:
                 notes = [ n['text'] for n in data["notes"] if n["text"] ]
                 for idx, n in enumerate(notes):
-                    self.notes.add(Note(n, self.tree, num="S%s-%s" % (self.id, idx)))
+                    self.notes.add(Note(
+                        n,
+                        self.tree,
+                        num="S%s-%s" % (self.id, idx),
+                        note_type='Source Note'
+                    ))
 
     @property
     def id(self):
@@ -143,8 +219,18 @@ class Fact:
     :param tree: a tree object
     """
 
-    def __init__(self, data=None, tree=None, num_prefix=None):
-        self.value = self.type = self.date = self.place = self.note = self.map = None
+    counter = {}
+
+    def __init__(self, data=None, tree: Optional['Tree']=None, num_prefix=None):
+        self.value = self.type = self.date = None
+        self.place: Optional[Place] = None
+        self.note = None
+        self._handle: Optional[str] = None
+        self.num_prefix = num_prefix
+
+        Fact.counter[num_prefix or 'None'] = Fact.counter.get(num_prefix or 'None', 0) + 1
+        self.num = Fact.counter[num_prefix or 'None']
+        
         if data:
             if "value" in data:
                 self.value = data["value"]
@@ -160,15 +246,51 @@ class Fact:
                 self.date = data["date"]["original"]
             if "place" in data:
                 place = data["place"]
-                self.place = place["original"]
-                if "description" in place and place["description"][1:] in tree.places:
-                    self.map = tree.places[place["description"][1:]]
+                place_name = place["original"]
+                place_id = place["description"][1:] if "description" in place and place["description"][1:] in tree.places else None
+                self.place = tree.ensure_place(place_name, place_id)
             if "changeMessage" in data["attribution"]:
-                self.note = Note(data["attribution"]["changeMessage"], tree, num_perfix=num_prefix)
+                self.note = Note(
+                    data["attribution"]["changeMessage"], 
+                    tree,
+                    num_prefix='E' + num_prefix if num_prefix else None,
+                    note_type='Event Note',
+                )
             if self.type == "http://gedcomx.org/Death" and not (
                 self.date or self.place
             ):
                 self.value = "Y"
+        if tree:
+            tree.facts.add(self)
+
+    @property
+    def id(self):
+        return f'{self.num_prefix}_{self.num}' if self.num_prefix != None else self.num
+
+
+    @property
+    def handle(self):
+        if not self._handle:
+            self._handle = '_' + os.urandom(10).hex()
+
+        return self._handle
+
+    def printxml(self, parent_element):
+            
+        event_element = ET.SubElement(
+            parent_element,
+            'event',
+            handle=self.handle,
+            # change='1720382301',
+            id=self.id
+        )
+        ET.SubElement(event_element, 'type').text = FACT_TAGS.get(self.type, self.type)
+        if self.date:
+            ET.SubElement(event_element, 'datestr', val=self.date)
+        if self.place:
+            ET.SubElement(event_element, 'place', hlink=self.place.handle)
+        if self.note:
+            ET.SubElement(event_element, 'noteref', hlink=self.note.handle)
 
     def print(self, file=sys.stdout):
         """print Fact in GEDCOM format
@@ -188,7 +310,7 @@ class Fact:
         if self.date:
             file.write(cont("2 DATE " + self.date))
         if self.place:
-            file.write(cont("2 PLAC " + self.place))
+            self.place.print(file, 2)
         if self.map:
             latitude, longitude = self.map
             file.write("3 MAP\n4 LATI %s\n4 LONG %s\n" % (latitude, longitude))
@@ -247,7 +369,20 @@ class Name:
                     if z["type"] == "http://gedcomx.org/Suffix":
                         self.suffix = z["value"]
             if "changeMessage" in data["attribution"]:
-                self.note = Note(data["attribution"]["changeMessage"], tree, num_perfix=f'NAME_{owner_fis}_{kind}')
+                self.note = Note(
+                    data["attribution"]["changeMessage"],
+                    tree,
+                    num_prefix=f'NAME_{owner_fis}_{kind}',
+                    note_type='Name Note',
+                )
+
+    def printxml(self, parent_element):
+
+        person_name = ET.SubElement(parent_element, 'name', type=self.kind)
+        ET.SubElement(person_name, 'first').text = self.given
+        ET.SubElement(person_name, 'surname').text = self.surname
+        # TODO prefix / suffix
+
 
     def print(self, file=sys.stdout, typ=None):
         """print Name in GEDCOM format
@@ -265,6 +400,69 @@ class Name:
             self.note.link(file, 2)
 
 
+
+class Place:
+    """GEDCOM Place class
+    :param name: the place name
+    :param tree: a Tree object
+    :param num: the GEDCOM identifier
+    """
+
+    counter = 0
+
+    def __init__(
+            self, 
+            id: str, 
+            name: str, 
+            type: Optional[str]=None, 
+            parent: Optional['Place']=None,
+            latitude: Optional[float]=None,
+            longitude: Optional[float]=None):
+        self._handle = None
+        self.name = name
+        self.type = type
+        self.id = id
+        self.parent = parent
+        self.latitude = latitude
+        self.longitude = longitude
+
+    @property
+    def handle(self):
+        if not self._handle:
+            self._handle = '_' + os.urandom(10).hex()
+
+        return self._handle
+
+
+    def print(self, file=sys.stdout, indentation=0):
+        """print Place in GEDCOM format"""
+        file.write("%d @P%s@ PLAC %s\n" % (indentation, self.num, self.name))
+
+    def printxml(self, parent_element):
+
+
+    #     <placeobj handle="_fac310617a8744e1d62f3d0dafe" change="1723223127" id="P0000" type="Country">
+    #   <pname value="Magyarország"/>
+    # </placeobj>
+    # <placeobj handle="_fac310962e15149e8244c2ccade" change="1723223149" id="P0001" type="County">
+    #   <pname value="Fejér"/>
+    #   <placeref hlink="_fac310617a8744e1d62f3d0dafe"/>
+    # </placeobj>
+        place_element = ET.SubElement(
+            parent_element, 
+            'placeobj',
+            handle=self.handle,
+            # change='1720382307',
+            id=self.id,
+            type=self.type or 'Unknown'
+        )
+        # ET.SubElement(place_element, 'ptitle').text = self.name
+        ET.SubElement(place_element, 'pname', value=self.name)
+        if self.parent:
+            ET.SubElement(place_element, 'placeref', hlink=self.parent.handle)
+        if self.latitude and self.longitude:
+            ET.SubElement(place_element, 'coord', long=str(self.longitude), lat=str(self.latitude))
+
 class Ordinance:
     """GEDCOM Ordinance class
     :param data: FS Ordinance data
@@ -300,7 +498,8 @@ class Indi:
 
     counter = 0
 
-    def __init__(self, fid=None, tree=None, num=None):
+    def __init__(self, fid: str, tree: 'Tree', num=None):
+        self._handle = None
         if num:
             self.num = num
         else:
@@ -308,27 +507,29 @@ class Indi:
             self.num = Indi.counter
         self.fid = fid
         self.tree = tree
-        self.famc_fid = set()
-        self.fams_fid = set()
-        self.famc_num = set()
-        self.fams_num = set()
-        self.famc_ids = set()
-        self.fams_ids = set()
-        self.name = None
+        self.famc: Set['Fam'] = set()
+        self.fams: Set['Fam'] = set()
+        # self.famc_fid = set()
+        # self.fams_fid = set()
+        # self.famc_num = set()
+        # self.fams_num = set()
+        # self.famc_ids = set()
+        # self.fams_ids = set()
+        self.name: Optional[Name] = None
         self.gender = None
         self.living = None
-        self.parents = set()
-        self.spouses = set()
-        self.children = set()
+        self.parents: Set[Tuple[str, str]] = set() # (father_id, mother_id)
+        self.spouses: Set[Tuple[str, str, str]]  = set() # (person1, person2, relfid)
+        self.children: Set[Tuple[str, str, str]] = set() # (father_id, mother_id, child_id)
         self.baptism = self.confirmation = self.initiatory = None
         self.endowment = self.sealing_child = None
-        self.nicknames = set()
-        self.facts = set()
-        self.birthnames = set()
-        self.married = set()
-        self.aka = set()
-        self.notes = set()
-        self.sources = set()
+        self.nicknames: Set[Name] = set()
+        self.birthnames: Set[Name] = set()
+        self.married: Set[Name] = set()
+        self.aka: Set[Name] = set()
+        self.facts: Set[Fact] = set()
+        self.notes: Set[Note] = set()
+        self.sources: Set[Source] = set()
         self.memories = set()
 
     def add_data(self, data):
@@ -362,7 +563,8 @@ class Indi:
                                 "=== %s ===\n%s"
                                 % (self.tree.fs._("Life Sketch"), x.get("value", "")),
                                 self.tree,
-                                num_perfix=f'INDI_{self.fid}'
+                                num_prefix=f'INDI_{self.fid}',
+                                note_type='Person Note',
                             )
                         )
                     else:
@@ -397,26 +599,39 @@ class Indi:
                                 for val in x.get("titles", [])
                                 + x.get("descriptions", [])
                             )
-                            self.notes.add(Note(text, self.tree, num_perfix=f'INDI_{self.fid}'))
+                            self.notes.add(
+                                Note(
+                                    text,
+                                    self.tree,
+                                    num_prefix=f'INDI_{self.fid}',
+                                    note_type='Person Note',
+                                ))
                         else:
                             self.memories.add(Memorie(x))
 
-    def add_fams(self, fams):
+    def add_fams(self, fam: 'Fam'):
         """add family fid (for spouse or parent)"""
-        self.fams_fid.add(fams)
+        self.fams.add(fam)
 
-    def add_famc(self, famc):
+    def add_famc(self, fam: 'Fam'):
         """add family fid (for child)"""
-        self.famc_fid.add(famc)
+        self.famc.add(fam)
 
     def get_notes(self):
         """retrieve individual notes"""
+        print(f'Getting Notes for {self.fid}', file=sys.stderr)
         notes = self.tree.fs.get_url("/platform/tree/persons/%s/notes" % self.fid)
         if notes:
             for n in notes["persons"][0]["notes"]:
                 text_note = "=== %s ===\n" % n["subject"] if "subject" in n else ""
                 text_note += n["text"] + "\n" if "text" in n else ""
-                self.notes.add(Note(text_note, self.tree, num_perfix=f'INDI_{self.fid}'))
+                self.notes.add(
+                    Note(
+                        text_note,
+                        self.tree,
+                        num_prefix=f'INDI_{self.fid}',
+                        note_type='Person Note',
+                    ))
 
     def get_ordinances(self):
         """retrieve LDS ordinances
@@ -468,11 +683,74 @@ class Indi:
                 if n.text == text:
                     self.notes.add(n)
                     return
-            self.notes.add(Note(text, self.tree, num_perfix=f'INDI_{self.fid}_CONTRIB'))
+            self.notes.add(Note(text, self.tree, num_prefix=f'INDI_{self.fid}_CONTRIB', note_type='Contribution Note'))
 
     @property
     def id(self):
         return self.fid or self.num
+    
+
+    @property
+    def handle(self):
+        if not self._handle:
+            self._handle = '_' + os.urandom(10).hex()
+
+        return self._handle
+
+    def printxml(self, parent_element):
+
+        # <person handle="_fa593c2779e5ed1c947416cba9e" change="1720382301" id="IL43B-D2H">
+        #     <gender>M</gender>
+        #     <name type="Birth Name">
+        #         <first>József</first>
+        #         <surname>Cser</surname>
+        #         <noteref hlink="_fa593c2779f7c527e3afe4623b9"/>
+        #     </name>
+        #     <eventref hlink="_fa593c277a0712aa4241bbf47db" role="Primary"/>
+        #     <attribute type="_FSFTID" value="L43B-D2H"/>
+        #     <childof hlink="_fa593c277af212e6c1f9f44bc4a"/>
+        #     <parentin hlink="_fa593c277af72c83e0e3fbf6ed2"/>
+        #     <citationref hlink="_fa593c277b7715371c26d1b0a81"/>
+        # </person>
+        person = ET.SubElement(parent_element, 
+                'person', 
+                handle=self.handle, 
+                # change='1720382301', 
+                id='I' + str(self.id))
+        if self.fid:
+            ET.SubElement(person, 'attribute', type='_FSFTID', value=self.fid)
+
+        if self.name:
+            self.name.printxml(person)
+        for name in self.nicknames | self.birthnames | self.aka | self.married:
+            name.printxml(person)
+        
+        gender = ET.SubElement(person, 'gender')
+        gender.text = self.gender
+        
+        if self.fams:
+            for fam in self.fams:
+                ET.SubElement(person, 'parentin', hlink=fam.handle)
+
+        if self.famc:
+            for fam in self.famc:
+                ET.SubElement(person, 'childof', hlink=fam.handle)
+
+
+        ET.SubElement(person, 'attribute', type="_FSFTID", value=self.fid)
+
+        
+        for fact in self.facts:
+            ET.SubElement(person, 'eventref', hlink=fact.handle, role='Primary')
+
+        # TODO citations
+        # TODO notes
+        for note in self.notes:
+            ET.SubElement(person, 'noteref', hlink=note.handle)
+
+    #   <noteref hlink="_fac4a686369713d9cd55159ada9"/>
+    #   <citationref hlink="_fac4a72a01b1681293ea1ee8d9"/>
+
 
     def print(self, file=sys.stdout):
         """print individual in GEDCOM format"""
@@ -508,12 +786,15 @@ class Indi:
         if self.sealing_child:
             file.write("1 SLGC\n")
             self.sealing_child.print(file)
-        print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr)
-        for num in self.fams_ids:
-            file.write("1 FAMS @F%s@\n" % num)
-        print(f'Famc Ids: {self.famc_ids}', file=sys.stderr)
-        for num in self.famc_ids:
-            file.write("1 FAMC @F%s@\n" % num)
+        for fam in self.fams:
+            file.write("1 FAMS @F%s@\n" % fam.id)
+        for fam in self.famc:
+            file.write("1 FAMC @F%s@\n" % fam.id)
+        # print(f'Fams Ids: {self.fams_ids}, {self.fams_fid}, {self.fams_num}', file=sys.stderr)
+        # for num in self.fams_ids:
+        # print(f'Famc Ids: {self.famc_ids}', file=sys.stderr)
+        # for num in self.famc_ids:
+            # file.write("1 FAMC @F%s@\n" % num)
         file.write("1 _FSFTID %s\n" % self.fid)
         for o in self.notes:
             o.link(file)
@@ -533,29 +814,44 @@ class Fam:
 
     counter = 0
 
-    def __init__(self, husb=None, wife=None, tree=None, num=None):
-        if num:
-            self.num = num
-        else:
-            Fam.counter += 1
-            self.num = Fam.counter
-        self.husb_fid = husb if husb else None
-        self.wife_fid = wife if wife else None
+    def __init__(self, husband: Indi | None, wife: Indi | None, tree: 'Tree'):
+        self._handle = None
+        self.num = Fam.gen_id(husband, wife)
+        self.fid = None
+        self.husband = husband
+        self.wife = wife
         self.tree = tree
-        self.husb_num = self.wife_num = self.fid = None
-        self.facts = set()
+        self.children: Set[Indi] = set()
+        self.facts: Set[Fact] = set()
         self.sealing_spouse = None
-        self.chil_fid = set()
-        self.chil_num = set()
         self.notes = set()
         self.sources = set()
 
-    def add_child(self, child):
+    @property
+    def handle(self):
+        if not self._handle:
+            self._handle = '_' + os.urandom(10).hex()
+
+        return self._handle
+    
+    @staticmethod
+    def gen_id(husband: Indi | None, wife: Indi | None) -> str:
+        if husband and wife:
+            return f'FAM_{husband.id}-{wife.id}'
+        elif husband:
+            return f'FAM_{husband.id}-UNK'
+        elif wife:
+            return f'FAM_UNK-{wife.id}'
+        else:
+            Fam.counter += 1
+            return f'FAM_UNK-UNK-{Fam.counter}'
+
+    def add_child(self, child: Indi | None):
         """add a child fid to the family"""
-        if child not in self.chil_fid:
-            self.chil_fid.add(child)
+        if child is not None:
+            self.children.add(child)
 
-    def add_marriage(self, fid):
+    def add_marriage(self, fid: str):
         """retrieve and add marriage information
         :param fid: the marriage fid
         """
@@ -603,7 +899,7 @@ class Fam:
                 for n in notes["relationships"][0]["notes"]:
                     text_note = "=== %s ===\n" % n["subject"] if "subject" in n else ""
                     text_note += n["text"] + "\n" if "text" in n else ""
-                    self.notes.add(Note(text_note, self.tree, num_perfix=f'FAM_{self.fid}'))
+                    self.notes.add(Note(text_note, self.tree, num_prefix=f'FAM_{self.fid}', note_type='Marriage Note'))
 
     def get_contributors(self):
         """retrieve contributors"""
@@ -626,33 +922,44 @@ class Fam:
                     if n.text == text:
                         self.notes.add(n)
                         return
-                self.notes.add(Note(text, self.tree, num_perfix=f'FAM_{self.fid}_CONTRIB'))
+                self.notes.add(Note(text, self.tree, num_prefix=f'FAM_{self.fid}_CONTRIB', note_type='Contribution Note'))
 
     @property
     def id(self):
-        return self.fid if self.fid else self.num
-    
-    @property
-    def husband_id(self):
-        return self.husb_fid or self.husb_num
+        return self.num
     
-    @property
-    def wife_id(self):
-        return self.wife_fid or self.wife_num
-    
-    @property
-    def children_ids(self):
-        return self.chil_fid or self.chil_num
+    def printxml(self, parent_element):
+        # <family handle="_fa593c277af212e6c1f9f44bc4a" change="1720382301" id="F9MKP-K92">
+        #   <rel type="Unknown"/>
+        #   <father hlink="_fa593c277f14dc6db9ab19cbe09"/>
+        #   <mother hlink="_fa593c277cd4af15983d7064c59"/>
+        #   <childref hlink="_fa593c279e1466787c923487b98"/>
+        #   <attribute type="_FSFTID" value="9MKP-K92"/>
+        # </family>
+        family = ET.SubElement(parent_element, 
+                'family', 
+                handle=self.handle, 
+                # change='1720382301', 
+                id=self.id)
+        ET.SubElement(family, 'rel', type='Unknown')
+        if self.husband:
+            ET.SubElement(family, 'father', hlink=self.husband.handle)
+        if self.wife:
+            ET.SubElement(family, 'mother', hlink=self.wife.handle)
+        for child in self.children:
+            ET.SubElement(family, 'childref', hlink=child.handle)
+        for fact in self.facts:
+            ET.SubElement(family, 'eventref', hlink=fact.handle, role='Primary')
 
     def print(self, file=sys.stdout):
         """print family information in GEDCOM format"""
         file.write("0 @F%s@ FAM\n" % self.id)
-        if self.husb_num:
-            file.write("1 HUSB @I%s@\n" % self.husband_id)
-        if self.wife_num:
-            file.write("1 WIFE @I%s@\n" % self.wife_id)
-        for child_id in self.children_ids:
-            file.write("1 CHIL @I%s@\n" % child_id)
+        if self.husband:
+            file.write("1 HUSB @I%s@\n" % self.husband.id)
+        if self.wife:
+            file.write("1 WIFE @I%s@\n" % self.wife.id)
+        for child in self.children:
+            file.write("1 CHIL @I%s@\n" % child.id)
         for o in self.facts:
             o.print(file)
         if self.sealing_spouse:
@@ -673,20 +980,27 @@ class Tree:
     :param fs: a Session object
     """
 
-    def __init__(self, fs=None, exclude=None):
+    def __init__(self, fs: Optional[requests.Session]=None, exclude=None, geonames_key=None):
         self.fs = fs
-        self.indi = dict()
-        self.fam = dict()
+        self.geonames_key = geonames_key
+        self.indi: Dict[str, Indi] = dict()
+        self.fam: Dict[str, Fam] = dict()
         self.notes = list()
+        self.facts: Set[Fact] = set()
         self.sources = dict()
-        self.places = dict()
+        self.places: List[Place] = []
+        self.places_by_names: Dict[str, Place] = dict()
+        self.place_cache: Dict[str, Tuple[float, float]] = dict()
         self.display_name = self.lang = None
         self.exclude = exclude or []
+        self.place_counter = 0
         if fs:
             self.display_name = fs.display_name
             self.lang = babelfish.Language.fromalpha2(fs.lang).name
 
-    def add_indis(self, fids_in):
+        self.geosession = CachedSession('http_cache', backend='filesystem', expire_after=86400)
+
+    def add_indis(self, fids_in: List[str]):
         """add individuals to the family tree
         :param fids: an iterable of fid
         """
@@ -719,21 +1033,17 @@ class Tree:
             if data:
                 if "places" in data:
                     for place in data["places"]:
-                        if place["id"] not in self.places:
-                            self.places[place["id"]] = (
-                                str(place["latitude"]),
-                                str(place["longitude"]),
+                        if place["id"] not in self.place_cache:
+                            self.place_cache[place["id"]] = (
+                                place["latitude"],
+                                place["longitude"],
                             )
                 loop.run_until_complete(add_datas(loop, data))
                 if "childAndParentsRelationships" in data:
                     for rel in data["childAndParentsRelationships"]:
-                        father = (
-                            rel["parent1"]["resourceId"] if "parent1" in rel else None
-                        )
-                        mother = (
-                            rel["parent2"]["resourceId"] if "parent2" in rel else None
-                        )
-                        child = rel["child"]["resourceId"] if "child" in rel else None
+                        father: str | None = rel.get("parent1", {}).get("resourceId")
+                        mother: str | None = rel.get("parent2", {}).get("resourceId")
+                        child: str | None = rel.get("child", {}).get("resourceId")
                         if child in self.indi:
                             self.indi[child].parents.add((father, mother))
                         if father in self.indi:
@@ -756,30 +1066,111 @@ class Tree:
                                 )
             new_fids = new_fids[MAX_PERSONS:]
 
-    def add_fam(self, father, mother):
-        """add a family to the family tree
-        :param father: the father fid or None
-        :param mother: the mother fid or None
-        """
-        if (father, mother) not in self.fam:
-            self.fam[(father, mother)] = Fam(father, mother, self)
 
-    def add_trio(self, father, mother, child):
+    
+    def ensure_family(self, father: Optional['Indi'], mother: Optional['Indi']) -> Fam:
+        fam_id = Fam.gen_id(father, mother)
+        if fam_id not in self.fam:
+            self.fam[fam_id] = Fam(father, mother, self)
+        return self.fam[fam_id]
+
+
+    def place_by_geoname_id(self, id: str) -> Optional[Place]:
+        for place in self.places:
+            if place.id == id:
+                return place
+        return None
+
+    def get_by_geonames_id(self, geonames_id: str) -> Place:
+        print('Fetching place hierarchy for', geonames_id, file=sys.stderr)
+        hierarchy = geocoder.geonames(
+            geonames_id,
+            key=self.geonames_key,
+            lang=['hu', 'en', 'de'],
+            method='hierarchy',
+            session=self.geosession,
+        )
+
+        if hierarchy and hierarchy.ok:
+            last_place = None
+            for item in hierarchy.geojson.get('features', []):
+                properties = item.get('properties', {})
+                code = properties.get('code')
+                
+                if code in ['AREA', 'CONT']:
+                    continue
+                
+                print('Properties', properties, file=sys.stderr)
+                id = 'GEO' + str(properties['geonames_id'])
+                place = self.place_by_geoname_id(id)
+                if place is None:
+                    place = Place(
+                        id,
+                        properties.get('address'),
+                        GEONAME_FEATURE_MAP.get(code, 'Unknown'),
+                        last_place,
+                        properties.get('lat'),
+                        properties.get('lng')
+                    )
+                    self.places.append(place)
+                last_place = place
+            return last_place
+
+    @property        
+    def _next_place_counter(self):
+        self.place_counter += 1
+        return self.place_counter
+
+        
+    def ensure_place(self, place_name: str, fid: Optional[str] = None, coord: Optional[Tuple[float, float]] = None) -> Place:
+        if place_name not in self.places_by_names:
+            place = None
+            if self.geonames_key:
+                print('Fetching place', place_name, file=sys.stderr)
+                geoname_record = geocoder.geonames(
+                    place_name,
+                    key=self.geonames_key,
+                    session=self.geosession,
+                )
+                if geoname_record and geoname_record.ok:
+                    place = self.get_by_geonames_id(geoname_record.geonames_id)
+            if place is None:
+                coord = self.place_cache.get(fid) if coord is None else coord
+                place = Place(
+                    'PFSID' + fid if fid is not None else 'P' + str(self._next_place_counter),
+                    place_name,
+                    latitude=coord[0] if coord is not None else None,
+                    longitude=coord[1] if coord is not None else None
+                )
+                self.places.append(place)
+            self.places_by_names[place_name] = place
+        return self.places_by_names[place_name]
+
+    # def add_fam(self, father, mother):
+    #     """add a family to the family tree
+    #     :param father: the father fid or None
+    #     :param mother: the mother fid or None
+    #     """
+    #     if (father, mother) not in self.fam:
+    #         self.fam[(father, mother)] = Fam(father, mother, self)
+
+    def add_trio(self, father: Indi | None, mother: Indi | None, child: Indi | None):
         """add a children relationship to the family tree
         :param father: the father fid or None
         :param mother: the mother fid or None
         :param child: the child fid or None
         """
-        if father in self.indi:
-            self.indi[father].add_fams((father, mother))
-        if mother in self.indi:
-            self.indi[mother].add_fams((father, mother))
-        if child in self.indi and (father in self.indi or mother in self.indi):
-            self.indi[child].add_famc((father, mother))
-            self.add_fam(father, mother)
-            self.fam[(father, mother)].add_child(child)
-
-    def add_parents(self, fids):
+        fam = self.ensure_family(father, mother)
+        if child is not None:
+            fam.add_child(child)
+            child.add_famc(fam)
+        
+        if father is not None:
+            father.add_fams(fam)
+        if mother is not None:
+            mother.add_fams(fam)
+
+    def add_parents(self, fids: Set[str]):
         """add parents relationships
         :param fids: a set of fids
         """
@@ -799,27 +1190,32 @@ class Tree:
                     or not mother
                     and father in self.indi
                 ):
-                    self.add_trio(father, mother, fid)
+                    self.add_trio(
+                        self.indi.get(father), 
+                        self.indi.get(mother), 
+                        self.indi.get(fid),
+                    )
         return set(filter(None, parents))
 
-    def add_spouses(self, fids):
+    def add_spouses(self, fids: Set[str]):
         """add spouse relationships
         :param fids: a set of fid
         """
 
-        async def add(loop, rels):
+        async def add(loop, rels: Set[Tuple[str, str, str]]):
             futures = set()
             for father, mother, relfid in rels:
-                if (father, mother) in self.fam:
+                fam_id = Fam.gen_id(self.indi[father], self.indi[mother])
+                if self.fam.get(fam_id):
                     futures.add(
                         loop.run_in_executor(
-                            None, self.fam[(father, mother)].add_marriage, relfid
+                            None, self.fam[fam_id].add_marriage, relfid
                         )
                     )
             for future in futures:
                 await future
 
-        rels = set()
+        rels: Set[Tuple[str, str, str]] = set()
         for fid in fids & self.indi.keys():
             rels |= self.indi[fid].spouses
         loop = asyncio.get_event_loop()
@@ -829,16 +1225,19 @@ class Tree:
             )
             for father, mother, _ in rels:
                 if father in self.indi and mother in self.indi:
-                    self.indi[father].add_fams((father, mother))
-                    self.indi[mother].add_fams((father, mother))
-                    self.add_fam(father, mother)
+                    father_indi = self.indi[father]
+                    mother_indi = self.indi[mother]
+                    fam = self.ensure_family(father_indi, mother_indi)
+                    father_indi.add_fams(fam)
+                    mother_indi.add_fams(fam)
+
             loop.run_until_complete(add(loop, rels))
 
     def add_children(self, fids):
         """add children relationships
         :param fids: a set of fid
         """
-        rels = set()
+        rels: Set[Tuple[str, str, str]] = set()
         for fid in fids & self.indi.keys():
             rels |= self.indi[fid].children if fid in self.indi else set()
         children = set()
@@ -853,7 +1252,11 @@ class Tree:
                     or not mother
                     and father in self.indi
                 ):
-                    self.add_trio(father, mother, child)
+                    self.add_trio(
+                        self.indi.get(father),
+                        self.indi.get(mother),
+                        self.indi.get(child),
+                    )
                     children.add(child)
         return children
 
@@ -874,25 +1277,82 @@ class Tree:
 
     def reset_num(self):
         """reset all GEDCOM identifiers"""
-        for husb, wife in self.fam:
-            self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None
-            self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None
-            self.fam[(husb, wife)].chil_num = set(
-                self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid
-            )
-        for fid in self.indi:
-            self.indi[fid].famc_num = set(
-                self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid
-            )
-            self.indi[fid].fams_num = set(
-                self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid
-            )            
-            self.indi[fid].famc_ids = set(
-                self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid
-            )
-            self.indi[fid].fams_ids = set(
-                self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid
-            )
+        # for husb, wife in self.fam:
+        #     self.fam[(husb, wife)].husb_num = self.indi[husb].num if husb else None
+        #     self.fam[(husb, wife)].wife_num = self.indi[wife].num if wife else None
+        #     self.fam[(husb, wife)].chil_num = set(
+        #         self.indi[chil].num for chil in self.fam[(husb, wife)].chil_fid
+        #     )
+        # for fid in self.indi:
+        #     self.indi[fid].famc_num = set(
+        #         self.fam[(husb, wife)].num for husb, wife in self.indi[fid].famc_fid
+        #     )
+        #     self.indi[fid].fams_num = set(
+        #         self.fam[(husb, wife)].num for husb, wife in self.indi[fid].fams_fid
+        #     )            
+        #     self.indi[fid].famc_ids = set(
+        #         self.fam[(husb, wife)].id for husb, wife in self.indi[fid].famc_fid
+        #     )
+        #     self.indi[fid].fams_ids = set(
+        #         self.fam[(husb, wife)].id for husb, wife in self.indi[fid].fams_fid
+        #     )
+
+    def printxml(self, file: BinaryIO):
+
+#         root = ET.Element("root")
+#         doc = ET.SubElement(root, "doc")
+
+#         ET.SubElement(doc, "field1", name="blah").text = "some value1"
+#         ET.SubElement(doc, "field2", name="asdfasd").text = "some vlaue2"
+
+#         tree = ET.ElementTree(root)
+#         tree.write("filename.xml")
+
+#         <?xml version="1.0" encoding="UTF-8"?>
+# <!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN"
+# "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
+# <database xmlns="http://gramps-project.org/xml/1.7.1/">
+#   <header
+#     <created date="2024-07-07" version="5.2.2"/>
+#     <researcher>
+#       <resname>Barnabás Südy</resname>
+#     </researcher>
+#   </header>
+
+        root = ET.Element("database", xmlns="http://gramps-project.org/xml/1.7.1/")
+
+        header = ET.SubElement(root, "header")
+        ET.SubElement(header, "created", date=datetime.strftime(datetime.now(), "%Y-%m-%d"), version="5.2.2")
+        researcher = ET.SubElement(header, "researcher")
+        resname = ET.SubElement(researcher, "resname")
+        resname.text = self.display_name
+
+        people = ET.SubElement(root, "people")
+        for indi in sorted(self.indi.values(), key=lambda x: x.num):
+            indi.printxml(people)
+
+        families = ET.SubElement(root, "families")
+        for fam in sorted(self.fam.values(), key=lambda x: x.num):
+            fam.printxml(families)
+
+        events = ET.SubElement(root, "events")
+        for fact in self.facts:
+            fact.printxml(events)
+
+        notes = ET.SubElement(root, "notes")
+        for note in sorted(self.notes, key=lambda x: x.id):
+            note.printxml(notes)
+
+        places = ET.SubElement(root, "places")
+        for place in self.places:
+            place.printxml(places)
+
+        tree = ET.ElementTree(root)
+
+        doctype='<!DOCTYPE database PUBLIC "-//Gramps//DTD Gramps XML 1.7.1//EN" "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">'
+        file.write(doctype.encode('utf-8'))
+        tree.write(file, 'utf-8')
+        
 
     def print(self, file=sys.stdout):
         """print family tree in GEDCOM format"""
@@ -913,8 +1373,8 @@ class Tree:
 
         for fid in sorted(self.indi, key=lambda x: self.indi.__getitem__(x).num):
             self.indi[fid].print(file)
-        for husb, wife in sorted(self.fam, key=lambda x: self.fam.__getitem__(x).num):
-            self.fam[(husb, wife)].print(file)
+        for fam in sorted(self.fam.values(), key=lambda x: x.num):
+            fam.print(file)
         sources = sorted(self.sources.values(), key=lambda x: x.num)
         for s in sources:
             s.print(file)
index d9404a73a98c13f00a0579ad93719e988e3c2cc8..bee789adb4c180ab82487b11cec5b942e5d8af14 100644 (file)
@@ -108,6 +108,14 @@ def main():
         default=60,
         help="Timeout in seconds [60]",
     )
+
+    parser.add_argument(
+        "-x",
+        "--xml",
+        action="store_true",
+        default=False,
+        help="To print the output in Gramps XML format [False]",
+    )
     parser.add_argument(
         "--show-password",
         action="store_true",
@@ -120,13 +128,20 @@ def main():
         default=False,
         help="Save settings into file [False]",
     )
+    parser.add_argument(
+        "-g",
+        "--geonames",
+        metavar="<STR>",
+        type=str,
+        help="Geonames.org username in order to download place data",
+    )
     try:
         parser.add_argument(
             "-o",
             "--outfile",
             metavar="<FILE>",
-            type=argparse.FileType("w", encoding="UTF-8"),
-            default=sys.stdout,
+            type=argparse.FileType("w", encoding="UTF-8"),
+            default=sys.stdout,
             help="output GEDCOM file [stdout]",
         )
         parser.add_argument(
@@ -202,7 +217,11 @@ def main():
     if not fs.logged:
         sys.exit(2)
     _ = fs._
-    tree = Tree(fs, exclude=args.exclude)
+    tree = Tree(
+        fs, 
+        exclude=args.exclude,
+        geonames_key=args.geonames,
+    )
 
     # check LDS account
     if args.get_ordinances:
@@ -320,7 +339,12 @@ def main():
     finally:
         # compute number for family relationships and print GEDCOM file
         tree.reset_num()
-        tree.print(args.outfile)
+        if args.xml:
+            with open(args.outfile, "wb") as f:
+                tree.printxml(f)
+        else:
+            with open(args.outfile, "w", encoding="UTF-8") as f:
+                tree.print(f)
         print(
             _(
                 "Downloaded %s individuals, %s families, %s sources and %s notes "
index eb48a3300da07c1dc9fdedc21aa2e30f4f688f76..b650a67a267728d06f675c4a56cbb7330f8cbc0e 100644 (file)
@@ -65,9 +65,7 @@ def main():
             fid = ged.indi[num].fid
             if fid not in tree.indi:
                 indi_counter += 1
-                tree.indi[fid] = Indi(tree=tree, num=indi_counter)
-                tree.indi[fid].tree = tree
-                tree.indi[fid].fid = ged.indi[num].fid
+                tree.indi[fid] = Indi(ged.indi[num].fid, tree, num=indi_counter)
             tree.indi[fid].fams_fid |= ged.indi[num].fams_fid
             tree.indi[fid].famc_fid |= ged.indi[num].famc_fid
             tree.indi[fid].name = ged.indi[num].name