diff options
Diffstat (limited to 'data/noun-query.sh')
-rwxr-xr-x | data/noun-query.sh | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/data/noun-query.sh b/data/noun-query.sh index c9d3224..06bf202 100755 --- a/data/noun-query.sh +++ b/data/noun-query.sh @@ -23,6 +23,9 @@ sp = [l.strip().rstrip('}}').lstrip('{{Deutsch Substantiv Übersicht|').split('| # list[dict[str:str]] dicts = [{i.split('=')[0] : i.split('=')[1] for i in entry if len(i.split('=')) > 1} for entry in sp] +with open('common-nouns', 'r', encoding='utf-8') as common: + commonNouns = [n.strip().lower() for n in common.readlines()] + with open('nouns.csv', 'w', encoding='utf-8') as nouns: nouns.write('gender,nom-sin,nom-plu,akk-sin,akk-plu,dat-sin,dat-plu,gen-sin,gen-plu\n') @@ -30,10 +33,14 @@ with open('nouns.csv', 'w', encoding='utf-8') as nouns: try: if not re.match(r'[A-Za-z_]', d['Nominativ Singular'][0]): continue + + if d['Nominativ Singular'].strip().lower() not in commonNouns: + continue + line = ','.join([d['Genus'], d['Nominativ Singular'], d['Nominativ Plural'], d['Akkusativ Singular'], d['Akkusativ Plural'], d['Dativ Singular'], d['Dativ Plural'], d['Genitiv Singular'], d['Genitiv Plural']]) nouns.write(line + '\n') except: pass """ -#rm noun-stage* +rm noun-stage* |