summaryrefslogtreecommitdiffstats
path: root/data
diff options
context:
space:
mode:
authorEddy Pedroni <eddy@0xf7.com>2022-10-18 19:29:06 +0200
committerEddy Pedroni <eddy@0xf7.com>2022-10-18 19:29:06 +0200
commit0d07220aeceae94fc05b12c4c98bec9ee28026b4 (patch)
tree1d9f8a57031c3d959faf1c48881e87a1fdfae081 /data
parent31639b35e17732cf4c543194ec6d830da0178540 (diff)
MVP done
Diffstat (limited to 'data')
-rwxr-xr-xdata/noun-query.sh7
1 files changed, 5 insertions, 2 deletions
diff --git a/data/noun-query.sh b/data/noun-query.sh
index 948bf02..c9d3224 100755
--- a/data/noun-query.sh
+++ b/data/noun-query.sh
@@ -12,6 +12,8 @@ sed -r -z "s/\n(\|[A-Z])/\1/g" noun-stage2 > noun-stage3
grep -e "Deutsch Substantiv Übersicht" noun-stage3 > noun-stage4
python -c """
+import re
+
with open('noun-stage4', 'r', encoding='utf8') as clean:
cleanLines = clean.readlines()
@@ -26,11 +28,12 @@ with open('nouns.csv', 'w', encoding='utf-8') as nouns:
for d in dicts:
try:
+ if not re.match(r'[A-Za-z_]', d['Nominativ Singular'][0]):
+ continue
line = ','.join([d['Genus'], d['Nominativ Singular'], d['Nominativ Plural'], d['Akkusativ Singular'], d['Akkusativ Plural'], d['Dativ Singular'], d['Dativ Plural'], d['Genitiv Singular'], d['Genitiv Plural']])
nouns.write(line + '\n')
except:
pass
"""
-
-rm noun-stage*
+#rm noun-stage*