diff options
Diffstat (limited to 'data/noun-query.sh')
| -rwxr-xr-x | data/noun-query.sh | 9 | 
1 files changed, 8 insertions, 1 deletions
diff --git a/data/noun-query.sh b/data/noun-query.sh index c9d3224..06bf202 100755 --- a/data/noun-query.sh +++ b/data/noun-query.sh @@ -23,6 +23,9 @@ sp = [l.strip().rstrip('}}').lstrip('{{Deutsch Substantiv Übersicht|').split('|  # list[dict[str:str]]  dicts = [{i.split('=')[0] : i.split('=')[1] for i in entry if len(i.split('=')) > 1} for entry in sp] +with open('common-nouns', 'r', encoding='utf-8') as common: +    commonNouns = [n.strip().lower() for n in common.readlines()] +  with open('nouns.csv', 'w', encoding='utf-8') as nouns:      nouns.write('gender,nom-sin,nom-plu,akk-sin,akk-plu,dat-sin,dat-plu,gen-sin,gen-plu\n') @@ -30,10 +33,14 @@ with open('nouns.csv', 'w', encoding='utf-8') as nouns:          try:              if not re.match(r'[A-Za-z_]', d['Nominativ Singular'][0]):                  continue + +            if d['Nominativ Singular'].strip().lower() not in commonNouns: +                continue +              line = ','.join([d['Genus'], d['Nominativ Singular'], d['Nominativ Plural'], d['Akkusativ Singular'], d['Akkusativ Plural'], d['Dativ Singular'], d['Dativ Plural'], d['Genitiv Singular'], d['Genitiv Plural']])              nouns.write(line + '\n')          except:              pass  """ -#rm noun-stage* +rm noun-stage*  | 
