Skip to content
Snippets Groups Projects
Commit 7d7f1210 authored by Donatus Herre's avatar Donatus Herre
Browse files

extraction updated

parent f40b6df8
Branches
No related tags found
No related merge requests found
no GND identifier present for bertram-august-wilhelm
no GND identifier present for henrici-heinrich
no GND identifier present for metz-johann-albrecht-friedrich
no GND identifier present for ostrow-ostrowsky-martin
no GND identifier present for supprian-friedrich-leberecht
no GND identifier present for tribechow-johannes
......@@ -176,5 +176,8 @@
"138519285",
"141780061",
"141878886",
"143695991"
"143695991",
"104171421",
"1020385375",
"1042343306"
]
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Creator: D. Herre
GitLab: dikon/afh-gnd
Created: 2020-04-30
Last Modified: 2020-06-19
"""
from librair.parsers import Beacon
from librair.schemas import json
BEACON = "https://dikon.gitlab.io/cph-beacon/data/gnd.txt"
HREF = "edits/index1694-href.txt"
NAMES = "edits/index1694-names.txt"
OUT = "data/index1694.json"
href = []
with open(HREF, 'r', encoding="utf-8") as f:
href = [l.strip() for l in f.readlines()]
names = []
with open(NAMES, 'r', encoding="utf-8") as f:
names = [l.strip() for l in f.readlines()]
cph = Beacon(url=BEACON)
gnds = []
targets = [t[1] for t in cph.targets]
additions = {
"morgenstern-nikolaus": "104171421",
"reichhelm-karl": "1020385375",
"sperlette-bartholomaeus-johann": "1042343306"
}
for h in href:
if h not in targets and h not in additions:
print("no GND identifier present for", h)
for i, target in enumerate(cph.targets):
if target[1] in href:
pos = href.index(target[1])
gnds.append(cph.links[i])
for person in additions:
gnds.append(additions[person])
json.writer(gnds, OUT)
#!/usr/bin/env bash
source env/bin/activate
python -m prep > build.log 2>&1
deactivate
#!/usr/bin/env bash
virtualenv -p python3 env
source env/bin/activate
pip install -r requirements.txt
deactivate
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment