]> Nutra Git (v1) - gamesguru/getmyancestors.git/commitdiff
add fam notes,
authorlinek <benoitfontaine.ba@gmail.com>
Fri, 14 Jul 2017 19:01:21 +0000 (21:01 +0200)
committerlinek <benoitfontaine.ba@gmail.com>
Fri, 14 Jul 2017 19:01:21 +0000 (21:01 +0200)
update mergemyancestors

getmyancestors.py
mergemyancestors.py

index 001982a859d68a84fd8d7c7910e4e8e9f78e5879..9a83ed106484497141cb373555ef373c0042de3e 100755 (executable)
@@ -32,7 +32,7 @@ except ImportError:
     sys.stderr.write('(run this in your terminal: "python3 -m pip install requests" or "python3 -m pip install --user requests")\n')
     exit(2)
 
-list_notes = set() 
+list_notes = set()
 list_sources = set()
 
 
@@ -212,7 +212,7 @@ class Note:
 
     counter = 0
 
-    def __init__(self, text, num=None):
+    def __init__(self, text='', num=None):
         if num:
             self.num = num
         else:
@@ -233,7 +233,7 @@ class Source:
 
     counter = 0
 
-    def __init__(self, data, num=None):
+    def __init__(self, data=None, num=None):
         if num:
             self.num = num
         else:
@@ -241,20 +241,28 @@ class Source:
             self.num = Source.counter
         list_sources.add(self)
 
-        self.url = self.citation = self.title = None
+        self.url = self.citation = self.title = self.fid = None
         self.notes = set()
 
-        self.id = data['id']
-        if data['about']:
-            self.url = data['about']
-        if data['citations']:
-            self.citation = data['citations'][0]['value']
-        if data['titles']:
-            self.title = data['titles'][0]['value']
-        if data['notes']:
-            for n in data['notes']:
-                if n['text']:
-                    self.notes.add(Note(n['text']))
+        if data:
+            self.fid = data['id']
+            if data['about']:
+                self.url = data['about']
+            if data['citations']:
+                self.citation = data['citations'][0]['value']
+            if data['titles']:
+                self.title = data['titles'][0]['value']
+            if data['notes']:
+                for n in data['notes']:
+                    if n['text']:
+                        self.notes.add(Note(n['text'], fid=n['id']))
+
+    def add_source(data=None):
+        if data:
+            for s in list_sources:
+                if s.fid == data[fid]:
+                    return s
+            return Source(data)
 
     def print(self, file=sys.stdout):
         file.write('0 @S' + str(self.num) + '@ SOUR \n')
@@ -266,6 +274,7 @@ class Source:
             file.write('1 PUBL ' + self.url + '\n')
         for n in self.notes:
             n.link(file, 1)
+        file.write('1 _FSFTID ' + self.fid + '\n')
 
     def link(self, file=sys.stdout, level=1):
         file.write(str(level) + ' SOUR @S' + str(self.num) + '@\n')
@@ -274,14 +283,16 @@ class Source:
 class Fact:
 
     def __init__(self, data=None):
-        self.value = data['value']
+        self.value = ''
         self.date = self.place = self.note = None
-        if 'date' in data:
-            self.date = data['date']['original']
-        if 'place' in data:
-            self.place = data['place']['original']
-        if 'changeMessage' in data['attribution']:
-            self.note = Note(data['attribution']['changeMessage'])
+        if data:
+            self.value = data['value']
+            if 'date' in data:
+                self.date = data['date']['original']
+            if 'place' in data:
+                self.place = data['place']['original']
+            if 'changeMessage' in data['attribution']:
+                self.note = Note(data['attribution']['changeMessage'])
 
 
 class Name:
@@ -292,18 +303,19 @@ class Name:
         self.prefix = None
         self.suffix = None
         self.note = None
-        if 'parts' in data['nameForms'][0]:
-            for z in data['nameForms'][0]['parts']:
-                if z['type'] == u'http://gedcomx.org/Given':
-                    self.given = z['value']
-                if z['type'] == u'http://gedcomx.org/Surname':
-                    self.surname = z['value']
-                if z['type'] == u'http://gedcomx.org/Prefix':
-                    self.prefix = z['value']
-                if z['type'] == u'http://gedcomx.org/Suffix':
-                    self.suffix = z['value']
-        if 'changeMessage' in data['attribution']:
-            self.note = Note(data['attribution']['changeMessage'])
+        if data:
+            if 'parts' in data['nameForms'][0]:
+                for z in data['nameForms'][0]['parts']:
+                    if z['type'] == u'http://gedcomx.org/Given':
+                        self.given = z['value']
+                    if z['type'] == u'http://gedcomx.org/Surname':
+                        self.surname = z['value']
+                    if z['type'] == u'http://gedcomx.org/Prefix':
+                        self.prefix = z['value']
+                    if z['type'] == u'http://gedcomx.org/Suffix':
+                        self.suffix = z['value']
+            if 'changeMessage' in data['attribution']:
+                self.note = Note(data['attribution']['changeMessage'])
 
     def print(self, file=sys.stdout, type=None):
         file.write('1 NAME ' + self.given + ' /' + self.surname + '/')
@@ -395,19 +407,11 @@ class Indi:
                 if 'sources' in x:
                     for y in x['sources']:
                         json = fs.get_url(y['links']['description']['href'])['sourceDescriptions'][0]
-                        source = None
-                        for s in list_sources:
-                            if s.id == json['id']:
-                                source = s
-                                break
-                        if source:
-                            self.sources.add(source)
+                        if 'changeMessage' in y['attribution']:
+                            self.sources.add((Source.add_source(json), y['attribution']['changeMessage']))
                         else:
-                            if 'changeMessage' in y['attribution']:
-                                self.sources.add((Source(json), y['attribution']['changeMessage']))
-                            else:
-                                self.sources.add((Source(json),))
-            self.parents = None
+                            self.sources.add((Source(json),))
+        self.parents = None
         self.children = None
         self.spouses = None
 
@@ -457,7 +461,8 @@ class Indi:
     # print individual information in GEDCOM format
     def print(self, file=sys.stdout):
         file.write('0 @I' + str(self.num) + '@ INDI\n')
-        self.name.print(file)
+        if self.name:
+            self.name.print(file)
         for o in self.nicknames:
             file.write('2 NICK ' + o.given + ' /' + o .surname + '/\n')
         for o in self.birthnames:
@@ -494,6 +499,10 @@ class Indi:
                 file.write('2 PLAC ' + self.buriplac + '\n')
         for o in self.physical_descriptions:
             file.write('1 DSCR ' + o.value + '\n')
+            if o.date:
+                file.write('2 DATE ' + o.date + '\n')
+            if o.place:
+                file.write('2 PLAC ' + o.place + '\n')
             if o.note:
                 o.note.link(file, 2)
         for num in self.fams_num:
@@ -533,6 +542,7 @@ class Fam:
         self.husb_num = self.wife_num = self.fid = self.marrdate = self.marrplac = None
         self.chil_fid = set()
         self.chil_num = set()
+        self.notes = set()
         self.sources = set()
 
     # add a child to the family
@@ -552,21 +562,17 @@ class Fam:
                 self.marrplac = x['place']['original'] if 'place' in x and 'original' in x['place'] else None
             else:
                 self.marrdate = self.marrplac = None
+            notes = fs.get_url(data['relationships'][0]['links']['notes']['href'])
+            if notes:
+                for n in notes['relationships'][0]['notes']:
+                    self.notes.add(Note('===' + n['subject'] + '===\n' + n['text'] + '\n'))
             if data and 'sources' in data['relationships'][0]:
                 for y in data['relationships'][0]['sources']:
                     json = fs.get_url(y['links']['description']['href'])['sourceDescriptions'][0]
-                    source = None
-                    for s in list_sources:
-                        if s.id == json['id']:
-                            source = s
-                            break
-                    if source:
-                        self.sources.add(source)
+                    if 'changeMessage' in y['attribution']:
+                        self.sources.add((Source.add_source(json), y['attribution']['changeMessage']))
                     else:
-                        if 'changeMessage' in y['attribution']:
-                            self.sources.add((Source(json), y['attribution']['changeMessage']))
-                        else:
-                            self.sources.add((Source(json),))
+                        self.sources.add((Source(json),))
 
     # print family information in GEDCOM format
     def print(self, file=sys.stdout):
@@ -585,11 +591,14 @@ class Fam:
                 file.write('2 PLAC ' + self.marrplac + '\n')
         if self.fid:
             file.write('1 _FSFTID ' + self.fid + '\n')
+        for o in self.notes:
+            o.link(file)
         for o in self.sources:
             o[0].link(file, 1)
             if len(o) > 1:
                 file.write('2 PAGE ' + o[1] + '\n')
 
+
 # family tree class
 class Tree:
     def __init__(self, fs=None):
@@ -669,10 +678,20 @@ class Tree:
             self.indi[fid].print(file)
         for husb, wife in sorted(self.fam, key=lambda x: self.fam.__getitem__(x).num):
             self.fam[(husb, wife)].print(file)
-        for n in list_notes:
+        notes = sorted(list_notes, key=lambda x: x.num)
+        for i, n in enumerate(notes):
+            if i > 0:
+                if n.num == notes[i - 1].num:
+                    continue
             n.print(file)
         for s in list_sources:
             s.print(file)
+        sources = sorted(list_sources, key=lambda x: x.num)
+        for i, s in enumerate(sources):
+            if i > 0:
+                if s.num == sources[i - 1].num:
+                    continue
+            s.print(file)
         file.write('0 TRLR\n')
 
 
@@ -744,4 +763,3 @@ if __name__ == '__main__':
     # compute number for family relationships and print GEDCOM file
     tree.reset_num()
     tree.print(args.o)
\ No newline at end of file
index 4719732999d6433c5cb797cb3384e077edf1e5de..dd4e476d71a3d1f9acca228926189905d2f94c49 100755 (executable)
 from __future__ import print_function
 
 # global import
-import os, sys, argparse
+import os
+import sys
+import argparse
 
 # local import
+from getmyancestors import Indi, Fam, Tree, Name, Note, Fact, Source, list_notes, list_sources
+
 sys.path.append(os.path.dirname(sys.argv[0]))
-from getmyancestors import Indi
-from getmyancestors import Fam
-from getmyancestors import Tree
+
 
 class Gedcom:
 
@@ -42,19 +44,31 @@ class Gedcom:
         self.flag = False
         self.indi = dict()
         self.fam = dict()
+        self.note = dict()
+        self.sour = dict()
         self.__parse()
         self.__add_id()
 
     def __parse(self):
         while self.__get_line():
             if self.tag == 'INDI':
-                self.num = int(self.pointer[2:len(self.pointer)-1])
-                self.indi[self.num] = Indi(num = self.num)
+                self.num = int(self.pointer[2:len(self.pointer) - 1])
+                self.indi[self.num] = Indi(num=self.num)
                 self.__get_indi()
             elif self.tag == 'FAM':
-                self.num = int(self.pointer[2:len(self.pointer)-1])
-                self.fam[self.num] = Fam(num = self.num)
+                self.num = int(self.pointer[2:len(self.pointer) - 1])
+                self.fam[self.num] = Fam(num=self.num)
                 self.__get_fam()
+            elif self.tag == 'NOTE':
+                self.num = int(self.pointer[2:len(self.pointer) - 1])
+                if self.num not in self.note:
+                    self.note[self.num] = Note(num=self.num)
+                self.__get_note()
+            elif self.tag == 'SOUR':
+                self.num = int(self.pointer[2:len(self.pointer) - 1])
+                if self.num not in self.sour:
+                    self.sour[self.num] = Source(num=self.num)
+                self.__get_source()
             else:
                 continue
 
@@ -71,7 +85,7 @@ class Gedcom:
         if words[1][0] == '@':
             self.pointer = words[1]
             self.tag = words[2]
-            self.data = None
+            self.data = ' '.join(words[3:])
         else:
             self.pointer = None
             self.tag = words[1]
@@ -81,9 +95,7 @@ class Gedcom:
     def __get_indi(self):
         while self.f and self.__get_line() and self.level > 0:
             if self.tag == 'NAME':
-                name = self.data.split('/')
-                self.indi[self.num].given = name[0].strip()
-                self.indi[self.num].surname = name[1].strip()
+                self.__get_name()
             elif self.tag == 'SEX':
                 self.indi[self.num].gender = self.data
             elif self.tag == 'BIRT':
@@ -94,26 +106,120 @@ class Gedcom:
                 self.__get_deat()
             elif self.tag == 'BURI':
                 self.__get_buri()
+            elif self.tag == 'DSCR' or self.tag == 'OCCU':
+                self.__get_fact()
             elif self.tag == 'FAMS':
-                self.indi[self.num].fams_num.add(int(self.data[2:len(self.data)-1]))
+                self.indi[self.num].fams_num.add(int(self.data[2:len(self.data) - 1]))
             elif self.tag == 'FAMC':
-                self.indi[self.num].famc_num.add(int(self.data[2:len(self.data)-1]))
+                self.indi[self.num].famc_num.add(int(self.data[2:len(self.data) - 1]))
             elif self.tag == '_FSFTID':
                 self.indi[self.num].fid = self.data
+            elif self.tag == 'NOTE':
+                num = int(self.data[2:len(self.data) - 1])
+                self.note[num] = Note(num=num)
+                self.indi[self.num].notes.add(self.note[num])
+            elif self.tag == 'SOUR':
+                num = int(self.data[2:len(self.data) - 1])
+                self.sour[num] = Source(num=num)
+                self.indi[self.num].notes.add(self.note[num])
         self.flag = True
 
     def __get_fam(self):
         while self.__get_line() and self.level > 0:
             if self.tag == 'HUSB':
-                self.fam[self.num].husb_num = int(self.data[2:len(self.data)-1])
+                self.fam[self.num].husb_num = int(self.data[2:len(self.data) - 1])
             elif self.tag == 'WIFE':
-                self.fam[self.num].wife_num = int(self.data[2:len(self.data)-1])
+                self.fam[self.num].wife_num = int(self.data[2:len(self.data) - 1])
             elif self.tag == 'CHIL':
-                self.fam[self.num].chil_num.add(int(self.data[2:len(self.data)-1]))
+                self.fam[self.num].chil_num.add(int(self.data[2:len(self.data) - 1]))
             elif self.tag == 'MARR':
                 self.__get_marr()
             elif self.tag == '_FSFTID':
                 self.fam[self.num].fid = self.data
+            elif self.tag == 'NOTE':
+                num = int(self.data[2:len(self.data) - 1])
+                self.note[num] = Note(num=num)
+                self.fam[self.num].notes.add(self.note[num])
+            elif self.tag == 'SOUR':
+                num = int(self.data[2:len(self.data) - 1])
+                self.sour[num] = Source(num=num)
+                self.fam[self.num].notes.add(self.note[num])
+        self.flag = True
+
+    def __get_note(self):
+        self.note[self.num].text = self.data
+        while self.__get_line() and self.level > 0:
+            if self.tag == 'CONT':
+                self.note[self.num].text += '\n' + self.data
+        self.flag = True
+
+    def __get_source(self):
+        while self.__get_line() and self.level > 0:
+            if self.tag == 'TITL':
+                self.sour[self.num].title = self.data
+            elif self.tag == 'AUTH':
+                self.sour[self.num].citation = self.data
+            elif self.sour == 'PUBL':
+                self.url = self.data
+            elif self.tag == '_FSFTID':
+                self.fid = self.data
+            elif self.tag == 'NOTE':
+                num = int(self.data[2:len(self.data) - 1])
+                self.note[num] = Note(num=num)
+                self.notes.add(self.note[num])
+        self.flag = True
+
+    def __get_fact(self):
+        fact = Fact()
+        fact.value = self.data
+        if self.tag == 'DSCR':
+            self.indi[self.num].physical_descriptions.add(fact)
+        elif self.tag == 'OCCU':
+            self.indi[self.num].occupations.add(fact)
+        while self.__get_line() and self.level > 1:
+            if self.tag == 'DATE':
+                fact.date = self.data
+            elif self.tag == 'PLAC':
+                fact.place = self.data
+            elif self.tag == 'NOTE':
+                num = int(self.data[2:len(self.data) - 1])
+                self.note[num] = Note(num=num)
+                fact.note = (self.note[num])
+        self.flag = True
+
+    def __get_name(self):
+        parts = self.data.split('/')
+        name = Name()
+        added = False
+        name.given = parts[0].strip()
+        name.surname = parts[1].strip()
+        if parts[2]:
+            name.suffix = parts[2]
+        if not self.indi[self.num].name:
+            self.indi[self.num].name = name
+            added = True
+        while self.__get_line() and self.level > 1:
+            if self.tag == 'NPFX':
+                name.prefix = self.data
+            elif self.tag == 'TYPE':
+                if self.data == 'aka':
+                    self.indi[self.num].aka.add(name)
+                    added = True
+                elif self.data == 'married':
+                    self.indi[self.num].married.add(name)
+                    added = True
+            elif self.tag == 'NICK':
+                nick = Name()
+                parts = self.data.split('/')
+                nick.given = parts[0]
+                nick.surname = parts[1]
+                self.indi[self.num].nicknames.add(nick)
+            elif self.tag == 'NOTE':
+                num = int(self.data[2:len(self.data) - 1])
+                self.note[num] = Note(num=num)
+                name.note = self.note[num]
+        if not added:
+            self.indi[self.num].birthnames.add(name)
         self.flag = True
 
     def __get_birt(self):
@@ -169,14 +275,13 @@ class Gedcom:
                 self.indi[num].famc_fid.add((self.fam[famc].husb_fid, self.fam[famc].wife_fid))
             for fams in self.indi[num].fams_num:
                 self.indi[num].fams_fid.add((self.fam[fams].husb_fid, self.fam[fams].wife_fid))
-            
-        
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Merge GEDCOM data from FamilySearch Tree (4 Jul 2016)', add_help=False, usage='mergemyancestors.py -i input1.ged input2.ged ... [options]')
     try:
-        parser.add_argument('-i', metavar = '<FILE>', nargs = '+', type = argparse.FileType('r', encoding='UTF-8'), default = sys.stdin, help = 'input GEDCOM files [stdin]')
-        parser.add_argument('-o', metavar = '<FILE>', nargs = '?', type = argparse.FileType('w', encoding='UTF-8'), default = sys.stdout, help = 'output GEDCOM files [stdout]')
+        parser.add_argument('-i', metavar='<FILE>', nargs='+', type=argparse.FileType('r', encoding='UTF-8'), default=sys.stdin, help='input GEDCOM files [stdin]')
+        parser.add_argument('-o', metavar='<FILE>', nargs='?', type=argparse.FileType('w', encoding='UTF-8'), default=sys.stdout, help='output GEDCOM files [stdout]')
     except TypeError:
         sys.stderr.write('Python >= 3.4 is required to run this script\n')
         sys.stderr.write('(see https://docs.python.org/3/whatsnew/3.4.html#argparse)\n')
@@ -194,6 +299,8 @@ if __name__ == '__main__':
 
     indi_counter = 0
     fam_counter = 0
+    note_counter = 0
+    temp_note = None
 
     # read the GEDCOM data
     for file in args.i:
@@ -204,12 +311,15 @@ if __name__ == '__main__':
             fid = ged.indi[num].fid
             if fid not in tree.indi:
                 indi_counter += 1
-                tree.indi[fid] = Indi(num = indi_counter)
+                tree.indi[fid] = Indi(num=indi_counter)
                 tree.indi[fid].fid = ged.indi[num].fid
             tree.indi[fid].fams_fid |= ged.indi[num].fams_fid
             tree.indi[fid].famc_fid |= ged.indi[num].famc_fid
-            tree.indi[fid].given = ged.indi[num].given
-            tree.indi[fid].surname = ged.indi[num].surname
+            tree.indi[fid].name = ged.indi[num].name
+            tree.indi[fid].birthnames = ged.indi[num].birthnames
+            tree.indi[fid].nicknames = ged.indi[num].nicknames
+            tree.indi[fid].aka = ged.indi[num].aka
+            tree.indi[fid].married = ged.indi[num].married
             tree.indi[fid].gender = ged.indi[num].gender
             tree.indi[fid].birtdate = ged.indi[num].birtdate
             tree.indi[fid].birtplac = ged.indi[num].birtplac
@@ -219,6 +329,10 @@ if __name__ == '__main__':
             tree.indi[fid].deatplac = ged.indi[num].deatplac
             tree.indi[fid].buridate = ged.indi[num].buridate
             tree.indi[fid].buriplac = ged.indi[num].buriplac
+            tree.indi[fid].physical_descriptions = ged.indi[num].physical_descriptions
+            tree.indi[fid].occupations = ged.indi[num].occupations
+            tree.indi[fid].notes = ged.indi[num].notes
+            tree.indi[fid].sources = ged.indi[num].sources
 
         # add informations about families
         for num in ged.fam:
@@ -230,6 +344,30 @@ if __name__ == '__main__':
             tree.fam[(husb, wife)].fid = ged.fam[num].fid
             tree.fam[(husb, wife)].marrdate = ged.fam[num].marrdate
             tree.fam[(husb, wife)].marrplac = ged.fam[num].marrplac
+            tree.fam[(husb, wife)].notes = ged.fam[num].notes
+            tree.fam[(husb, wife)].sources = ged.fam[num].sources
+
+    # merge notes by text
+    list_notes = sorted(list_notes, key=lambda x: x.text)
+    for i, n in enumerate(list_notes):
+        if i == 0:
+            n.num = 1
+            continue
+        if n.text == list_notes[i - 1].text:
+            n.num = list_notes[i - 1].num
+        else:
+            n.num = list_notes[i - 1].num + 1
+
+    # merge notes by fid
+    list_sources = sorted(list_sources, key=lambda x: x.fid)
+    for i, n in enumerate(list_sources):
+        if i == 0:
+            n.num = 1
+            continue
+        if n.fid == list_sources[i - 1].fid:
+            n.num = list_sources[i - 1].num
+        else:
+            n.num = list_sources[i - 1].num + 1
 
     # compute number for family relationships and print GEDCOM file
     tree.reset_num()