From f272f083af3d135e82ecdf0ce4e340ee2782364b Mon Sep 17 00:00:00 2001 From: Eddy Pedroni Date: Wed, 15 May 2024 15:48:09 +0200 Subject: Add makefile, use latest datasets --- .gitignore | 1 + Makefile | 19 +++++++++++++++++++ data/adj-query.sh | 2 +- data/noun-query.sh | 2 +- requirements.txt | 1 + 5 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 Makefile create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 9b2a8cd..25e4e1e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__/ +venv/ data/* !data/*.sh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fb7f7aa --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +all: data/adjectives.json data/nouns.csv venv + +run: all + ./venv/bin/python kasus.py + +clean: + rm -rf data/adjectives.json data/nouns.csv venv + +data/adjectives.json: data/adj-query.sh data/grab-dump.sh + cd data; ./adj-query.sh + +data/nouns.csv: data/noun-query.sh data/grab-dump.sh data/common-nouns + cd data; ./noun-query.sh + +venv: requirements.txt + python -m venv venv + ./venv/bin/pip install -r requirements.txt + +.PHONY: all run clean diff --git a/data/adj-query.sh b/data/adj-query.sh index 986c435..450268d 100755 --- a/data/adj-query.sh +++ b/data/adj-query.sh @@ -1,6 +1,6 @@ #!/usr/bin/zsh -dump_date="20221001" +dump_date="latest" dump_file="dewiktionary-${dump_date}-pages-articles.xml" dump_url="https://dumps.wikimedia.org/dewiktionary/${dump_date}/${dump_file}.bz2" diff --git a/data/noun-query.sh b/data/noun-query.sh index 06bf202..9526da2 100755 --- a/data/noun-query.sh +++ b/data/noun-query.sh @@ -1,6 +1,6 @@ #!/usr/bin/zsh -dump_date="20221001" +dump_date="latest" dump_file="dewiktionary-${dump_date}-pages-articles.xml" dump_url="https://dumps.wikimedia.org/dewiktionary/${dump_date}/${dump_file}.bz2" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3fcfb51 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +colorama -- cgit v1.2.3