summaryrefslogtreecommitdiffstats
path: root/data/noun-query.sh
diff options
context:
space:
mode:
Diffstat (limited to 'data/noun-query.sh')
-rwxr-xr-xdata/noun-query.sh9
1 files changed, 8 insertions, 1 deletions
diff --git a/data/noun-query.sh b/data/noun-query.sh
index c9d3224..06bf202 100755
--- a/data/noun-query.sh
+++ b/data/noun-query.sh
@@ -23,6 +23,9 @@ sp = [l.strip().rstrip('}}').lstrip('{{Deutsch Substantiv Übersicht|').split('|
# list[dict[str:str]]
dicts = [{i.split('=')[0] : i.split('=')[1] for i in entry if len(i.split('=')) > 1} for entry in sp]
+with open('common-nouns', 'r', encoding='utf-8') as common:
+ commonNouns = [n.strip().lower() for n in common.readlines()]
+
with open('nouns.csv', 'w', encoding='utf-8') as nouns:
nouns.write('gender,nom-sin,nom-plu,akk-sin,akk-plu,dat-sin,dat-plu,gen-sin,gen-plu\n')
@@ -30,10 +33,14 @@ with open('nouns.csv', 'w', encoding='utf-8') as nouns:
try:
if not re.match(r'[A-Za-z_]', d['Nominativ Singular'][0]):
continue
+
+ if d['Nominativ Singular'].strip().lower() not in commonNouns:
+ continue
+
line = ','.join([d['Genus'], d['Nominativ Singular'], d['Nominativ Plural'], d['Akkusativ Singular'], d['Akkusativ Plural'], d['Dativ Singular'], d['Dativ Plural'], d['Genitiv Singular'], d['Genitiv Plural']])
nouns.write(line + '\n')
except:
pass
"""
-#rm noun-stage*
+rm noun-stage*