sys.stderr.write('(run this in your terminal: "python3 -m pip install requests" or "python3 -m pip install --user requests")\n')
exit(2)
-list_notes = set()
+list_notes = set()
list_sources = set()
counter = 0
- def __init__(self, text, num=None):
+ def __init__(self, text='', num=None):
if num:
self.num = num
else:
counter = 0
- def __init__(self, data, num=None):
+ def __init__(self, data=None, num=None):
if num:
self.num = num
else:
self.num = Source.counter
list_sources.add(self)
- self.url = self.citation = self.title = None
+ self.url = self.citation = self.title = self.fid = None
self.notes = set()
- self.id = data['id']
- if data['about']:
- self.url = data['about']
- if data['citations']:
- self.citation = data['citations'][0]['value']
- if data['titles']:
- self.title = data['titles'][0]['value']
- if data['notes']:
- for n in data['notes']:
- if n['text']:
- self.notes.add(Note(n['text']))
+ if data:
+ self.fid = data['id']
+ if data['about']:
+ self.url = data['about']
+ if data['citations']:
+ self.citation = data['citations'][0]['value']
+ if data['titles']:
+ self.title = data['titles'][0]['value']
+ if data['notes']:
+ for n in data['notes']:
+ if n['text']:
+ self.notes.add(Note(n['text'], fid=n['id']))
+
+ def add_source(data=None):
+ if data:
+ for s in list_sources:
+ if s.fid == data[fid]:
+ return s
+ return Source(data)
def print(self, file=sys.stdout):
file.write('0 @S' + str(self.num) + '@ SOUR \n')
file.write('1 PUBL ' + self.url + '\n')
for n in self.notes:
n.link(file, 1)
+ file.write('1 _FSFTID ' + self.fid + '\n')
def link(self, file=sys.stdout, level=1):
file.write(str(level) + ' SOUR @S' + str(self.num) + '@\n')
class Fact:
def __init__(self, data=None):
- self.value = data['value']
+ self.value = ''
self.date = self.place = self.note = None
- if 'date' in data:
- self.date = data['date']['original']
- if 'place' in data:
- self.place = data['place']['original']
- if 'changeMessage' in data['attribution']:
- self.note = Note(data['attribution']['changeMessage'])
+ if data:
+ self.value = data['value']
+ if 'date' in data:
+ self.date = data['date']['original']
+ if 'place' in data:
+ self.place = data['place']['original']
+ if 'changeMessage' in data['attribution']:
+ self.note = Note(data['attribution']['changeMessage'])
class Name:
self.prefix = None
self.suffix = None
self.note = None
- if 'parts' in data['nameForms'][0]:
- for z in data['nameForms'][0]['parts']:
- if z['type'] == u'http://gedcomx.org/Given':
- self.given = z['value']
- if z['type'] == u'http://gedcomx.org/Surname':
- self.surname = z['value']
- if z['type'] == u'http://gedcomx.org/Prefix':
- self.prefix = z['value']
- if z['type'] == u'http://gedcomx.org/Suffix':
- self.suffix = z['value']
- if 'changeMessage' in data['attribution']:
- self.note = Note(data['attribution']['changeMessage'])
+ if data:
+ if 'parts' in data['nameForms'][0]:
+ for z in data['nameForms'][0]['parts']:
+ if z['type'] == u'http://gedcomx.org/Given':
+ self.given = z['value']
+ if z['type'] == u'http://gedcomx.org/Surname':
+ self.surname = z['value']
+ if z['type'] == u'http://gedcomx.org/Prefix':
+ self.prefix = z['value']
+ if z['type'] == u'http://gedcomx.org/Suffix':
+ self.suffix = z['value']
+ if 'changeMessage' in data['attribution']:
+ self.note = Note(data['attribution']['changeMessage'])
def print(self, file=sys.stdout, type=None):
file.write('1 NAME ' + self.given + ' /' + self.surname + '/')
if 'sources' in x:
for y in x['sources']:
json = fs.get_url(y['links']['description']['href'])['sourceDescriptions'][0]
- source = None
- for s in list_sources:
- if s.id == json['id']:
- source = s
- break
- if source:
- self.sources.add(source)
+ if 'changeMessage' in y['attribution']:
+ self.sources.add((Source.add_source(json), y['attribution']['changeMessage']))
else:
- if 'changeMessage' in y['attribution']:
- self.sources.add((Source(json), y['attribution']['changeMessage']))
- else:
- self.sources.add((Source(json),))
- self.parents = None
+ self.sources.add((Source(json),))
+ self.parents = None
self.children = None
self.spouses = None
# print individual information in GEDCOM format
def print(self, file=sys.stdout):
file.write('0 @I' + str(self.num) + '@ INDI\n')
- self.name.print(file)
+ if self.name:
+ self.name.print(file)
for o in self.nicknames:
file.write('2 NICK ' + o.given + ' /' + o .surname + '/\n')
for o in self.birthnames:
file.write('2 PLAC ' + self.buriplac + '\n')
for o in self.physical_descriptions:
file.write('1 DSCR ' + o.value + '\n')
+ if o.date:
+ file.write('2 DATE ' + o.date + '\n')
+ if o.place:
+ file.write('2 PLAC ' + o.place + '\n')
if o.note:
o.note.link(file, 2)
for num in self.fams_num:
self.husb_num = self.wife_num = self.fid = self.marrdate = self.marrplac = None
self.chil_fid = set()
self.chil_num = set()
+ self.notes = set()
self.sources = set()
# add a child to the family
self.marrplac = x['place']['original'] if 'place' in x and 'original' in x['place'] else None
else:
self.marrdate = self.marrplac = None
+ notes = fs.get_url(data['relationships'][0]['links']['notes']['href'])
+ if notes:
+ for n in notes['relationships'][0]['notes']:
+ self.notes.add(Note('===' + n['subject'] + '===\n' + n['text'] + '\n'))
if data and 'sources' in data['relationships'][0]:
for y in data['relationships'][0]['sources']:
json = fs.get_url(y['links']['description']['href'])['sourceDescriptions'][0]
- source = None
- for s in list_sources:
- if s.id == json['id']:
- source = s
- break
- if source:
- self.sources.add(source)
+ if 'changeMessage' in y['attribution']:
+ self.sources.add((Source.add_source(json), y['attribution']['changeMessage']))
else:
- if 'changeMessage' in y['attribution']:
- self.sources.add((Source(json), y['attribution']['changeMessage']))
- else:
- self.sources.add((Source(json),))
+ self.sources.add((Source(json),))
# print family information in GEDCOM format
def print(self, file=sys.stdout):
file.write('2 PLAC ' + self.marrplac + '\n')
if self.fid:
file.write('1 _FSFTID ' + self.fid + '\n')
+ for o in self.notes:
+ o.link(file)
for o in self.sources:
o[0].link(file, 1)
if len(o) > 1:
file.write('2 PAGE ' + o[1] + '\n')
+
# family tree class
class Tree:
def __init__(self, fs=None):
self.indi[fid].print(file)
for husb, wife in sorted(self.fam, key=lambda x: self.fam.__getitem__(x).num):
self.fam[(husb, wife)].print(file)
- for n in list_notes:
+ notes = sorted(list_notes, key=lambda x: x.num)
+ for i, n in enumerate(notes):
+ if i > 0:
+ if n.num == notes[i - 1].num:
+ continue
n.print(file)
for s in list_sources:
s.print(file)
+ sources = sorted(list_sources, key=lambda x: x.num)
+ for i, s in enumerate(sources):
+ if i > 0:
+ if s.num == sources[i - 1].num:
+ continue
+ s.print(file)
file.write('0 TRLR\n')
# compute number for family relationships and print GEDCOM file
tree.reset_num()
tree.print(args.o)
-
\ No newline at end of file
from __future__ import print_function
# global import
-import os, sys, argparse
+import os
+import sys
+import argparse
# local import
+from getmyancestors import Indi, Fam, Tree, Name, Note, Fact, Source, list_notes, list_sources
+
sys.path.append(os.path.dirname(sys.argv[0]))
-from getmyancestors import Indi
-from getmyancestors import Fam
-from getmyancestors import Tree
+
class Gedcom:
self.flag = False
self.indi = dict()
self.fam = dict()
+ self.note = dict()
+ self.sour = dict()
self.__parse()
self.__add_id()
def __parse(self):
while self.__get_line():
if self.tag == 'INDI':
- self.num = int(self.pointer[2:len(self.pointer)-1])
- self.indi[self.num] = Indi(num = self.num)
+ self.num = int(self.pointer[2:len(self.pointer) - 1])
+ self.indi[self.num] = Indi(num=self.num)
self.__get_indi()
elif self.tag == 'FAM':
- self.num = int(self.pointer[2:len(self.pointer)-1])
- self.fam[self.num] = Fam(num = self.num)
+ self.num = int(self.pointer[2:len(self.pointer) - 1])
+ self.fam[self.num] = Fam(num=self.num)
self.__get_fam()
+ elif self.tag == 'NOTE':
+ self.num = int(self.pointer[2:len(self.pointer) - 1])
+ if self.num not in self.note:
+ self.note[self.num] = Note(num=self.num)
+ self.__get_note()
+ elif self.tag == 'SOUR':
+ self.num = int(self.pointer[2:len(self.pointer) - 1])
+ if self.num not in self.sour:
+ self.sour[self.num] = Source(num=self.num)
+ self.__get_source()
else:
continue
if words[1][0] == '@':
self.pointer = words[1]
self.tag = words[2]
- self.data = None
+ self.data = ' '.join(words[3:])
else:
self.pointer = None
self.tag = words[1]
def __get_indi(self):
while self.f and self.__get_line() and self.level > 0:
if self.tag == 'NAME':
- name = self.data.split('/')
- self.indi[self.num].given = name[0].strip()
- self.indi[self.num].surname = name[1].strip()
+ self.__get_name()
elif self.tag == 'SEX':
self.indi[self.num].gender = self.data
elif self.tag == 'BIRT':
self.__get_deat()
elif self.tag == 'BURI':
self.__get_buri()
+ elif self.tag == 'DSCR' or self.tag == 'OCCU':
+ self.__get_fact()
elif self.tag == 'FAMS':
- self.indi[self.num].fams_num.add(int(self.data[2:len(self.data)-1]))
+ self.indi[self.num].fams_num.add(int(self.data[2:len(self.data) - 1]))
elif self.tag == 'FAMC':
- self.indi[self.num].famc_num.add(int(self.data[2:len(self.data)-1]))
+ self.indi[self.num].famc_num.add(int(self.data[2:len(self.data) - 1]))
elif self.tag == '_FSFTID':
self.indi[self.num].fid = self.data
+ elif self.tag == 'NOTE':
+ num = int(self.data[2:len(self.data) - 1])
+ self.note[num] = Note(num=num)
+ self.indi[self.num].notes.add(self.note[num])
+ elif self.tag == 'SOUR':
+ num = int(self.data[2:len(self.data) - 1])
+ self.sour[num] = Source(num=num)
+ self.indi[self.num].notes.add(self.note[num])
self.flag = True
def __get_fam(self):
while self.__get_line() and self.level > 0:
if self.tag == 'HUSB':
- self.fam[self.num].husb_num = int(self.data[2:len(self.data)-1])
+ self.fam[self.num].husb_num = int(self.data[2:len(self.data) - 1])
elif self.tag == 'WIFE':
- self.fam[self.num].wife_num = int(self.data[2:len(self.data)-1])
+ self.fam[self.num].wife_num = int(self.data[2:len(self.data) - 1])
elif self.tag == 'CHIL':
- self.fam[self.num].chil_num.add(int(self.data[2:len(self.data)-1]))
+ self.fam[self.num].chil_num.add(int(self.data[2:len(self.data) - 1]))
elif self.tag == 'MARR':
self.__get_marr()
elif self.tag == '_FSFTID':
self.fam[self.num].fid = self.data
+ elif self.tag == 'NOTE':
+ num = int(self.data[2:len(self.data) - 1])
+ self.note[num] = Note(num=num)
+ self.fam[self.num].notes.add(self.note[num])
+ elif self.tag == 'SOUR':
+ num = int(self.data[2:len(self.data) - 1])
+ self.sour[num] = Source(num=num)
+ self.fam[self.num].notes.add(self.note[num])
+ self.flag = True
+
+ def __get_note(self):
+ self.note[self.num].text = self.data
+ while self.__get_line() and self.level > 0:
+ if self.tag == 'CONT':
+ self.note[self.num].text += '\n' + self.data
+ self.flag = True
+
+ def __get_source(self):
+ while self.__get_line() and self.level > 0:
+ if self.tag == 'TITL':
+ self.sour[self.num].title = self.data
+ elif self.tag == 'AUTH':
+ self.sour[self.num].citation = self.data
+ elif self.sour == 'PUBL':
+ self.url = self.data
+ elif self.tag == '_FSFTID':
+ self.fid = self.data
+ elif self.tag == 'NOTE':
+ num = int(self.data[2:len(self.data) - 1])
+ self.note[num] = Note(num=num)
+ self.notes.add(self.note[num])
+ self.flag = True
+
+ def __get_fact(self):
+ fact = Fact()
+ fact.value = self.data
+ if self.tag == 'DSCR':
+ self.indi[self.num].physical_descriptions.add(fact)
+ elif self.tag == 'OCCU':
+ self.indi[self.num].occupations.add(fact)
+ while self.__get_line() and self.level > 1:
+ if self.tag == 'DATE':
+ fact.date = self.data
+ elif self.tag == 'PLAC':
+ fact.place = self.data
+ elif self.tag == 'NOTE':
+ num = int(self.data[2:len(self.data) - 1])
+ self.note[num] = Note(num=num)
+ fact.note = (self.note[num])
+ self.flag = True
+
+ def __get_name(self):
+ parts = self.data.split('/')
+ name = Name()
+ added = False
+ name.given = parts[0].strip()
+ name.surname = parts[1].strip()
+ if parts[2]:
+ name.suffix = parts[2]
+ if not self.indi[self.num].name:
+ self.indi[self.num].name = name
+ added = True
+ while self.__get_line() and self.level > 1:
+ if self.tag == 'NPFX':
+ name.prefix = self.data
+ elif self.tag == 'TYPE':
+ if self.data == 'aka':
+ self.indi[self.num].aka.add(name)
+ added = True
+ elif self.data == 'married':
+ self.indi[self.num].married.add(name)
+ added = True
+ elif self.tag == 'NICK':
+ nick = Name()
+ parts = self.data.split('/')
+ nick.given = parts[0]
+ nick.surname = parts[1]
+ self.indi[self.num].nicknames.add(nick)
+ elif self.tag == 'NOTE':
+ num = int(self.data[2:len(self.data) - 1])
+ self.note[num] = Note(num=num)
+ name.note = self.note[num]
+ if not added:
+ self.indi[self.num].birthnames.add(name)
self.flag = True
def __get_birt(self):
self.indi[num].famc_fid.add((self.fam[famc].husb_fid, self.fam[famc].wife_fid))
for fams in self.indi[num].fams_num:
self.indi[num].fams_fid.add((self.fam[fams].husb_fid, self.fam[fams].wife_fid))
-
-
+
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Merge GEDCOM data from FamilySearch Tree (4 Jul 2016)', add_help=False, usage='mergemyancestors.py -i input1.ged input2.ged ... [options]')
try:
- parser.add_argument('-i', metavar = '<FILE>', nargs = '+', type = argparse.FileType('r', encoding='UTF-8'), default = sys.stdin, help = 'input GEDCOM files [stdin]')
- parser.add_argument('-o', metavar = '<FILE>', nargs = '?', type = argparse.FileType('w', encoding='UTF-8'), default = sys.stdout, help = 'output GEDCOM files [stdout]')
+ parser.add_argument('-i', metavar='<FILE>', nargs='+', type=argparse.FileType('r', encoding='UTF-8'), default=sys.stdin, help='input GEDCOM files [stdin]')
+ parser.add_argument('-o', metavar='<FILE>', nargs='?', type=argparse.FileType('w', encoding='UTF-8'), default=sys.stdout, help='output GEDCOM files [stdout]')
except TypeError:
sys.stderr.write('Python >= 3.4 is required to run this script\n')
sys.stderr.write('(see https://docs.python.org/3/whatsnew/3.4.html#argparse)\n')
indi_counter = 0
fam_counter = 0
+ note_counter = 0
+ temp_note = None
# read the GEDCOM data
for file in args.i:
fid = ged.indi[num].fid
if fid not in tree.indi:
indi_counter += 1
- tree.indi[fid] = Indi(num = indi_counter)
+ tree.indi[fid] = Indi(num=indi_counter)
tree.indi[fid].fid = ged.indi[num].fid
tree.indi[fid].fams_fid |= ged.indi[num].fams_fid
tree.indi[fid].famc_fid |= ged.indi[num].famc_fid
- tree.indi[fid].given = ged.indi[num].given
- tree.indi[fid].surname = ged.indi[num].surname
+ tree.indi[fid].name = ged.indi[num].name
+ tree.indi[fid].birthnames = ged.indi[num].birthnames
+ tree.indi[fid].nicknames = ged.indi[num].nicknames
+ tree.indi[fid].aka = ged.indi[num].aka
+ tree.indi[fid].married = ged.indi[num].married
tree.indi[fid].gender = ged.indi[num].gender
tree.indi[fid].birtdate = ged.indi[num].birtdate
tree.indi[fid].birtplac = ged.indi[num].birtplac
tree.indi[fid].deatplac = ged.indi[num].deatplac
tree.indi[fid].buridate = ged.indi[num].buridate
tree.indi[fid].buriplac = ged.indi[num].buriplac
+ tree.indi[fid].physical_descriptions = ged.indi[num].physical_descriptions
+ tree.indi[fid].occupations = ged.indi[num].occupations
+ tree.indi[fid].notes = ged.indi[num].notes
+ tree.indi[fid].sources = ged.indi[num].sources
# add informations about families
for num in ged.fam:
tree.fam[(husb, wife)].fid = ged.fam[num].fid
tree.fam[(husb, wife)].marrdate = ged.fam[num].marrdate
tree.fam[(husb, wife)].marrplac = ged.fam[num].marrplac
+ tree.fam[(husb, wife)].notes = ged.fam[num].notes
+ tree.fam[(husb, wife)].sources = ged.fam[num].sources
+
+ # merge notes by text
+ list_notes = sorted(list_notes, key=lambda x: x.text)
+ for i, n in enumerate(list_notes):
+ if i == 0:
+ n.num = 1
+ continue
+ if n.text == list_notes[i - 1].text:
+ n.num = list_notes[i - 1].num
+ else:
+ n.num = list_notes[i - 1].num + 1
+
+ # merge notes by fid
+ list_sources = sorted(list_sources, key=lambda x: x.fid)
+ for i, n in enumerate(list_sources):
+ if i == 0:
+ n.num = 1
+ continue
+ if n.fid == list_sources[i - 1].fid:
+ n.num = list_sources[i - 1].num
+ else:
+ n.num = list_sources[i - 1].num + 1
# compute number for family relationships and print GEDCOM file
tree.reset_num()