From 88e3b62640a0a106cb375aab2571d6da54dc7fb3 Mon Sep 17 00:00:00 2001 From: linek Date: Fri, 14 Jul 2017 21:01:21 +0200 Subject: [PATCH] add fam notes, update mergemyancestors --- getmyancestors.py | 138 ++++++++++++++++++-------------- mergemyancestors.py | 186 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 240 insertions(+), 84 deletions(-) diff --git a/getmyancestors.py b/getmyancestors.py index 001982a..9a83ed1 100755 --- a/getmyancestors.py +++ b/getmyancestors.py @@ -32,7 +32,7 @@ except ImportError: sys.stderr.write('(run this in your terminal: "python3 -m pip install requests" or "python3 -m pip install --user requests")\n') exit(2) -list_notes = set() +list_notes = set() list_sources = set() @@ -212,7 +212,7 @@ class Note: counter = 0 - def __init__(self, text, num=None): + def __init__(self, text='', num=None): if num: self.num = num else: @@ -233,7 +233,7 @@ class Source: counter = 0 - def __init__(self, data, num=None): + def __init__(self, data=None, num=None): if num: self.num = num else: @@ -241,20 +241,28 @@ class Source: self.num = Source.counter list_sources.add(self) - self.url = self.citation = self.title = None + self.url = self.citation = self.title = self.fid = None self.notes = set() - self.id = data['id'] - if data['about']: - self.url = data['about'] - if data['citations']: - self.citation = data['citations'][0]['value'] - if data['titles']: - self.title = data['titles'][0]['value'] - if data['notes']: - for n in data['notes']: - if n['text']: - self.notes.add(Note(n['text'])) + if data: + self.fid = data['id'] + if data['about']: + self.url = data['about'] + if data['citations']: + self.citation = data['citations'][0]['value'] + if data['titles']: + self.title = data['titles'][0]['value'] + if data['notes']: + for n in data['notes']: + if n['text']: + self.notes.add(Note(n['text'], fid=n['id'])) + + def add_source(data=None): + if data: + for s in list_sources: + if s.fid == data[fid]: + return s + return Source(data) def print(self, file=sys.stdout): file.write('0 @S' + str(self.num) + '@ SOUR \n') @@ -266,6 +274,7 @@ class Source: file.write('1 PUBL ' + self.url + '\n') for n in self.notes: n.link(file, 1) + file.write('1 _FSFTID ' + self.fid + '\n') def link(self, file=sys.stdout, level=1): file.write(str(level) + ' SOUR @S' + str(self.num) + '@\n') @@ -274,14 +283,16 @@ class Source: class Fact: def __init__(self, data=None): - self.value = data['value'] + self.value = '' self.date = self.place = self.note = None - if 'date' in data: - self.date = data['date']['original'] - if 'place' in data: - self.place = data['place']['original'] - if 'changeMessage' in data['attribution']: - self.note = Note(data['attribution']['changeMessage']) + if data: + self.value = data['value'] + if 'date' in data: + self.date = data['date']['original'] + if 'place' in data: + self.place = data['place']['original'] + if 'changeMessage' in data['attribution']: + self.note = Note(data['attribution']['changeMessage']) class Name: @@ -292,18 +303,19 @@ class Name: self.prefix = None self.suffix = None self.note = None - if 'parts' in data['nameForms'][0]: - for z in data['nameForms'][0]['parts']: - if z['type'] == u'http://gedcomx.org/Given': - self.given = z['value'] - if z['type'] == u'http://gedcomx.org/Surname': - self.surname = z['value'] - if z['type'] == u'http://gedcomx.org/Prefix': - self.prefix = z['value'] - if z['type'] == u'http://gedcomx.org/Suffix': - self.suffix = z['value'] - if 'changeMessage' in data['attribution']: - self.note = Note(data['attribution']['changeMessage']) + if data: + if 'parts' in data['nameForms'][0]: + for z in data['nameForms'][0]['parts']: + if z['type'] == u'http://gedcomx.org/Given': + self.given = z['value'] + if z['type'] == u'http://gedcomx.org/Surname': + self.surname = z['value'] + if z['type'] == u'http://gedcomx.org/Prefix': + self.prefix = z['value'] + if z['type'] == u'http://gedcomx.org/Suffix': + self.suffix = z['value'] + if 'changeMessage' in data['attribution']: + self.note = Note(data['attribution']['changeMessage']) def print(self, file=sys.stdout, type=None): file.write('1 NAME ' + self.given + ' /' + self.surname + '/') @@ -395,19 +407,11 @@ class Indi: if 'sources' in x: for y in x['sources']: json = fs.get_url(y['links']['description']['href'])['sourceDescriptions'][0] - source = None - for s in list_sources: - if s.id == json['id']: - source = s - break - if source: - self.sources.add(source) + if 'changeMessage' in y['attribution']: + self.sources.add((Source.add_source(json), y['attribution']['changeMessage'])) else: - if 'changeMessage' in y['attribution']: - self.sources.add((Source(json), y['attribution']['changeMessage'])) - else: - self.sources.add((Source(json),)) - self.parents = None + self.sources.add((Source(json),)) + self.parents = None self.children = None self.spouses = None @@ -457,7 +461,8 @@ class Indi: # print individual information in GEDCOM format def print(self, file=sys.stdout): file.write('0 @I' + str(self.num) + '@ INDI\n') - self.name.print(file) + if self.name: + self.name.print(file) for o in self.nicknames: file.write('2 NICK ' + o.given + ' /' + o .surname + '/\n') for o in self.birthnames: @@ -494,6 +499,10 @@ class Indi: file.write('2 PLAC ' + self.buriplac + '\n') for o in self.physical_descriptions: file.write('1 DSCR ' + o.value + '\n') + if o.date: + file.write('2 DATE ' + o.date + '\n') + if o.place: + file.write('2 PLAC ' + o.place + '\n') if o.note: o.note.link(file, 2) for num in self.fams_num: @@ -533,6 +542,7 @@ class Fam: self.husb_num = self.wife_num = self.fid = self.marrdate = self.marrplac = None self.chil_fid = set() self.chil_num = set() + self.notes = set() self.sources = set() # add a child to the family @@ -552,21 +562,17 @@ class Fam: self.marrplac = x['place']['original'] if 'place' in x and 'original' in x['place'] else None else: self.marrdate = self.marrplac = None + notes = fs.get_url(data['relationships'][0]['links']['notes']['href']) + if notes: + for n in notes['relationships'][0]['notes']: + self.notes.add(Note('===' + n['subject'] + '===\n' + n['text'] + '\n')) if data and 'sources' in data['relationships'][0]: for y in data['relationships'][0]['sources']: json = fs.get_url(y['links']['description']['href'])['sourceDescriptions'][0] - source = None - for s in list_sources: - if s.id == json['id']: - source = s - break - if source: - self.sources.add(source) + if 'changeMessage' in y['attribution']: + self.sources.add((Source.add_source(json), y['attribution']['changeMessage'])) else: - if 'changeMessage' in y['attribution']: - self.sources.add((Source(json), y['attribution']['changeMessage'])) - else: - self.sources.add((Source(json),)) + self.sources.add((Source(json),)) # print family information in GEDCOM format def print(self, file=sys.stdout): @@ -585,11 +591,14 @@ class Fam: file.write('2 PLAC ' + self.marrplac + '\n') if self.fid: file.write('1 _FSFTID ' + self.fid + '\n') + for o in self.notes: + o.link(file) for o in self.sources: o[0].link(file, 1) if len(o) > 1: file.write('2 PAGE ' + o[1] + '\n') + # family tree class class Tree: def __init__(self, fs=None): @@ -669,10 +678,20 @@ class Tree: self.indi[fid].print(file) for husb, wife in sorted(self.fam, key=lambda x: self.fam.__getitem__(x).num): self.fam[(husb, wife)].print(file) - for n in list_notes: + notes = sorted(list_notes, key=lambda x: x.num) + for i, n in enumerate(notes): + if i > 0: + if n.num == notes[i - 1].num: + continue n.print(file) for s in list_sources: s.print(file) + sources = sorted(list_sources, key=lambda x: x.num) + for i, s in enumerate(sources): + if i > 0: + if s.num == sources[i - 1].num: + continue + s.print(file) file.write('0 TRLR\n') @@ -744,4 +763,3 @@ if __name__ == '__main__': # compute number for family relationships and print GEDCOM file tree.reset_num() tree.print(args.o) - \ No newline at end of file diff --git a/mergemyancestors.py b/mergemyancestors.py index 4719732..dd4e476 100755 --- a/mergemyancestors.py +++ b/mergemyancestors.py @@ -22,13 +22,15 @@ from __future__ import print_function # global import -import os, sys, argparse +import os +import sys +import argparse # local import +from getmyancestors import Indi, Fam, Tree, Name, Note, Fact, Source, list_notes, list_sources + sys.path.append(os.path.dirname(sys.argv[0])) -from getmyancestors import Indi -from getmyancestors import Fam -from getmyancestors import Tree + class Gedcom: @@ -42,19 +44,31 @@ class Gedcom: self.flag = False self.indi = dict() self.fam = dict() + self.note = dict() + self.sour = dict() self.__parse() self.__add_id() def __parse(self): while self.__get_line(): if self.tag == 'INDI': - self.num = int(self.pointer[2:len(self.pointer)-1]) - self.indi[self.num] = Indi(num = self.num) + self.num = int(self.pointer[2:len(self.pointer) - 1]) + self.indi[self.num] = Indi(num=self.num) self.__get_indi() elif self.tag == 'FAM': - self.num = int(self.pointer[2:len(self.pointer)-1]) - self.fam[self.num] = Fam(num = self.num) + self.num = int(self.pointer[2:len(self.pointer) - 1]) + self.fam[self.num] = Fam(num=self.num) self.__get_fam() + elif self.tag == 'NOTE': + self.num = int(self.pointer[2:len(self.pointer) - 1]) + if self.num not in self.note: + self.note[self.num] = Note(num=self.num) + self.__get_note() + elif self.tag == 'SOUR': + self.num = int(self.pointer[2:len(self.pointer) - 1]) + if self.num not in self.sour: + self.sour[self.num] = Source(num=self.num) + self.__get_source() else: continue @@ -71,7 +85,7 @@ class Gedcom: if words[1][0] == '@': self.pointer = words[1] self.tag = words[2] - self.data = None + self.data = ' '.join(words[3:]) else: self.pointer = None self.tag = words[1] @@ -81,9 +95,7 @@ class Gedcom: def __get_indi(self): while self.f and self.__get_line() and self.level > 0: if self.tag == 'NAME': - name = self.data.split('/') - self.indi[self.num].given = name[0].strip() - self.indi[self.num].surname = name[1].strip() + self.__get_name() elif self.tag == 'SEX': self.indi[self.num].gender = self.data elif self.tag == 'BIRT': @@ -94,26 +106,120 @@ class Gedcom: self.__get_deat() elif self.tag == 'BURI': self.__get_buri() + elif self.tag == 'DSCR' or self.tag == 'OCCU': + self.__get_fact() elif self.tag == 'FAMS': - self.indi[self.num].fams_num.add(int(self.data[2:len(self.data)-1])) + self.indi[self.num].fams_num.add(int(self.data[2:len(self.data) - 1])) elif self.tag == 'FAMC': - self.indi[self.num].famc_num.add(int(self.data[2:len(self.data)-1])) + self.indi[self.num].famc_num.add(int(self.data[2:len(self.data) - 1])) elif self.tag == '_FSFTID': self.indi[self.num].fid = self.data + elif self.tag == 'NOTE': + num = int(self.data[2:len(self.data) - 1]) + self.note[num] = Note(num=num) + self.indi[self.num].notes.add(self.note[num]) + elif self.tag == 'SOUR': + num = int(self.data[2:len(self.data) - 1]) + self.sour[num] = Source(num=num) + self.indi[self.num].notes.add(self.note[num]) self.flag = True def __get_fam(self): while self.__get_line() and self.level > 0: if self.tag == 'HUSB': - self.fam[self.num].husb_num = int(self.data[2:len(self.data)-1]) + self.fam[self.num].husb_num = int(self.data[2:len(self.data) - 1]) elif self.tag == 'WIFE': - self.fam[self.num].wife_num = int(self.data[2:len(self.data)-1]) + self.fam[self.num].wife_num = int(self.data[2:len(self.data) - 1]) elif self.tag == 'CHIL': - self.fam[self.num].chil_num.add(int(self.data[2:len(self.data)-1])) + self.fam[self.num].chil_num.add(int(self.data[2:len(self.data) - 1])) elif self.tag == 'MARR': self.__get_marr() elif self.tag == '_FSFTID': self.fam[self.num].fid = self.data + elif self.tag == 'NOTE': + num = int(self.data[2:len(self.data) - 1]) + self.note[num] = Note(num=num) + self.fam[self.num].notes.add(self.note[num]) + elif self.tag == 'SOUR': + num = int(self.data[2:len(self.data) - 1]) + self.sour[num] = Source(num=num) + self.fam[self.num].notes.add(self.note[num]) + self.flag = True + + def __get_note(self): + self.note[self.num].text = self.data + while self.__get_line() and self.level > 0: + if self.tag == 'CONT': + self.note[self.num].text += '\n' + self.data + self.flag = True + + def __get_source(self): + while self.__get_line() and self.level > 0: + if self.tag == 'TITL': + self.sour[self.num].title = self.data + elif self.tag == 'AUTH': + self.sour[self.num].citation = self.data + elif self.sour == 'PUBL': + self.url = self.data + elif self.tag == '_FSFTID': + self.fid = self.data + elif self.tag == 'NOTE': + num = int(self.data[2:len(self.data) - 1]) + self.note[num] = Note(num=num) + self.notes.add(self.note[num]) + self.flag = True + + def __get_fact(self): + fact = Fact() + fact.value = self.data + if self.tag == 'DSCR': + self.indi[self.num].physical_descriptions.add(fact) + elif self.tag == 'OCCU': + self.indi[self.num].occupations.add(fact) + while self.__get_line() and self.level > 1: + if self.tag == 'DATE': + fact.date = self.data + elif self.tag == 'PLAC': + fact.place = self.data + elif self.tag == 'NOTE': + num = int(self.data[2:len(self.data) - 1]) + self.note[num] = Note(num=num) + fact.note = (self.note[num]) + self.flag = True + + def __get_name(self): + parts = self.data.split('/') + name = Name() + added = False + name.given = parts[0].strip() + name.surname = parts[1].strip() + if parts[2]: + name.suffix = parts[2] + if not self.indi[self.num].name: + self.indi[self.num].name = name + added = True + while self.__get_line() and self.level > 1: + if self.tag == 'NPFX': + name.prefix = self.data + elif self.tag == 'TYPE': + if self.data == 'aka': + self.indi[self.num].aka.add(name) + added = True + elif self.data == 'married': + self.indi[self.num].married.add(name) + added = True + elif self.tag == 'NICK': + nick = Name() + parts = self.data.split('/') + nick.given = parts[0] + nick.surname = parts[1] + self.indi[self.num].nicknames.add(nick) + elif self.tag == 'NOTE': + num = int(self.data[2:len(self.data) - 1]) + self.note[num] = Note(num=num) + name.note = self.note[num] + if not added: + self.indi[self.num].birthnames.add(name) self.flag = True def __get_birt(self): @@ -169,14 +275,13 @@ class Gedcom: self.indi[num].famc_fid.add((self.fam[famc].husb_fid, self.fam[famc].wife_fid)) for fams in self.indi[num].fams_num: self.indi[num].fams_fid.add((self.fam[fams].husb_fid, self.fam[fams].wife_fid)) - - + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Merge GEDCOM data from FamilySearch Tree (4 Jul 2016)', add_help=False, usage='mergemyancestors.py -i input1.ged input2.ged ... [options]') try: - parser.add_argument('-i', metavar = '', nargs = '+', type = argparse.FileType('r', encoding='UTF-8'), default = sys.stdin, help = 'input GEDCOM files [stdin]') - parser.add_argument('-o', metavar = '', nargs = '?', type = argparse.FileType('w', encoding='UTF-8'), default = sys.stdout, help = 'output GEDCOM files [stdout]') + parser.add_argument('-i', metavar='', nargs='+', type=argparse.FileType('r', encoding='UTF-8'), default=sys.stdin, help='input GEDCOM files [stdin]') + parser.add_argument('-o', metavar='', nargs='?', type=argparse.FileType('w', encoding='UTF-8'), default=sys.stdout, help='output GEDCOM files [stdout]') except TypeError: sys.stderr.write('Python >= 3.4 is required to run this script\n') sys.stderr.write('(see https://docs.python.org/3/whatsnew/3.4.html#argparse)\n') @@ -194,6 +299,8 @@ if __name__ == '__main__': indi_counter = 0 fam_counter = 0 + note_counter = 0 + temp_note = None # read the GEDCOM data for file in args.i: @@ -204,12 +311,15 @@ if __name__ == '__main__': fid = ged.indi[num].fid if fid not in tree.indi: indi_counter += 1 - tree.indi[fid] = Indi(num = indi_counter) + tree.indi[fid] = Indi(num=indi_counter) tree.indi[fid].fid = ged.indi[num].fid tree.indi[fid].fams_fid |= ged.indi[num].fams_fid tree.indi[fid].famc_fid |= ged.indi[num].famc_fid - tree.indi[fid].given = ged.indi[num].given - tree.indi[fid].surname = ged.indi[num].surname + tree.indi[fid].name = ged.indi[num].name + tree.indi[fid].birthnames = ged.indi[num].birthnames + tree.indi[fid].nicknames = ged.indi[num].nicknames + tree.indi[fid].aka = ged.indi[num].aka + tree.indi[fid].married = ged.indi[num].married tree.indi[fid].gender = ged.indi[num].gender tree.indi[fid].birtdate = ged.indi[num].birtdate tree.indi[fid].birtplac = ged.indi[num].birtplac @@ -219,6 +329,10 @@ if __name__ == '__main__': tree.indi[fid].deatplac = ged.indi[num].deatplac tree.indi[fid].buridate = ged.indi[num].buridate tree.indi[fid].buriplac = ged.indi[num].buriplac + tree.indi[fid].physical_descriptions = ged.indi[num].physical_descriptions + tree.indi[fid].occupations = ged.indi[num].occupations + tree.indi[fid].notes = ged.indi[num].notes + tree.indi[fid].sources = ged.indi[num].sources # add informations about families for num in ged.fam: @@ -230,6 +344,30 @@ if __name__ == '__main__': tree.fam[(husb, wife)].fid = ged.fam[num].fid tree.fam[(husb, wife)].marrdate = ged.fam[num].marrdate tree.fam[(husb, wife)].marrplac = ged.fam[num].marrplac + tree.fam[(husb, wife)].notes = ged.fam[num].notes + tree.fam[(husb, wife)].sources = ged.fam[num].sources + + # merge notes by text + list_notes = sorted(list_notes, key=lambda x: x.text) + for i, n in enumerate(list_notes): + if i == 0: + n.num = 1 + continue + if n.text == list_notes[i - 1].text: + n.num = list_notes[i - 1].num + else: + n.num = list_notes[i - 1].num + 1 + + # merge notes by fid + list_sources = sorted(list_sources, key=lambda x: x.fid) + for i, n in enumerate(list_sources): + if i == 0: + n.num = 1 + continue + if n.fid == list_sources[i - 1].fid: + n.num = list_sources[i - 1].num + else: + n.num = list_sources[i - 1].num + 1 # compute number for family relationships and print GEDCOM file tree.reset_num() -- 2.52.0