diff options
author | Eddy Pedroni <epedroni@pm.me> | 2024-05-15 15:48:09 +0200 |
---|---|---|
committer | Eddy Pedroni <epedroni@pm.me> | 2024-05-15 15:48:09 +0200 |
commit | f272f083af3d135e82ecdf0ce4e340ee2782364b (patch) | |
tree | 3467d8d5f362e1c95c7cc54a6781514015c913ff | |
parent | 43c615646ab31626e4f8b8fa074a669195b4306a (diff) |
Add makefile, use latest datasets
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 19 | ||||
-rwxr-xr-x | data/adj-query.sh | 2 | ||||
-rwxr-xr-x | data/noun-query.sh | 2 | ||||
-rw-r--r-- | requirements.txt | 1 |
5 files changed, 23 insertions, 2 deletions
@@ -1,4 +1,5 @@ __pycache__/ +venv/ data/* !data/*.sh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fb7f7aa --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +all: data/adjectives.json data/nouns.csv venv + +run: all + ./venv/bin/python kasus.py + +clean: + rm -rf data/adjectives.json data/nouns.csv venv + +data/adjectives.json: data/adj-query.sh data/grab-dump.sh + cd data; ./adj-query.sh + +data/nouns.csv: data/noun-query.sh data/grab-dump.sh data/common-nouns + cd data; ./noun-query.sh + +venv: requirements.txt + python -m venv venv + ./venv/bin/pip install -r requirements.txt + +.PHONY: all run clean diff --git a/data/adj-query.sh b/data/adj-query.sh index 986c435..450268d 100755 --- a/data/adj-query.sh +++ b/data/adj-query.sh @@ -1,6 +1,6 @@ #!/usr/bin/zsh -dump_date="20221001" +dump_date="latest" dump_file="dewiktionary-${dump_date}-pages-articles.xml" dump_url="https://dumps.wikimedia.org/dewiktionary/${dump_date}/${dump_file}.bz2" diff --git a/data/noun-query.sh b/data/noun-query.sh index 06bf202..9526da2 100755 --- a/data/noun-query.sh +++ b/data/noun-query.sh @@ -1,6 +1,6 @@ #!/usr/bin/zsh -dump_date="20221001" +dump_date="latest" dump_file="dewiktionary-${dump_date}-pages-articles.xml" dump_url="https://dumps.wikimedia.org/dewiktionary/${dump_date}/${dump_file}.bz2" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3fcfb51 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +colorama |