From 2c8b068fe2e5241a4c96d2284eddebeb4b526c05 Mon Sep 17 00:00:00 2001 From: Eddy Pedroni Date: Sun, 22 Sep 2024 18:12:06 +0200 Subject: Add new parser with tests, venv --- .gitignore | 2 ++ bootstrap-venv.sh | 4 +++ requirements.txt | 2 ++ src/parser.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser_unittest.py | 79 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 182 insertions(+) create mode 100644 .gitignore create mode 100755 bootstrap-venv.sh create mode 100644 requirements.txt create mode 100644 src/parser.py create mode 100644 src/parser_unittest.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..92afa22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +venv/ diff --git a/bootstrap-venv.sh b/bootstrap-venv.sh new file mode 100755 index 0000000..b105996 --- /dev/null +++ b/bootstrap-venv.sh @@ -0,0 +1,4 @@ +#!/usr/bin/zsh + +python -m venv venv +./venv/bin/pip install -r requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ad90bc2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +click +pytest diff --git a/src/parser.py b/src/parser.py new file mode 100644 index 0000000..db05089 --- /dev/null +++ b/src/parser.py @@ -0,0 +1,95 @@ +from pathlib import Path +from collections import namedtuple +from enum import Enum +from typing import TextIO, Iterator + +Card = namedtuple('Card', ['id', 'front', 'back']) + +def _getCard(front_lines: list[str], back_lines: list[str]) -> Card: + front_text = "".join(front_lines).strip() + back_text = "".join(back_lines).strip() + id = hash(front_text + back_text) + return Card(id, front_text, back_text) + +def _getCards(f: TextIO) -> Iterator[Card]: + class State(Enum): + PARSE_FRONT = 1, + PARSE_BACK = 2 + + state = None + front_lines = [] + back_lines = [] + + for i, line in enumerate(f): + match line.strip(): + case "FRONT": + # Edge case: FRONT twice in a row + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected 'FRONT': {f}:{i}") + + # Next card is starting, wrap up current one + if state == State.PARSE_BACK: + yield _getCard(front_lines, back_lines) + front_lines.clear() + back_lines.clear() + + state = State.PARSE_FRONT + + case "BACK": + # Edge case: BACK without FRONT before it + if state != State.PARSE_FRONT: + raise Exception(f"Unexpected 'BACK': {f}:{i}") + + state = State.PARSE_BACK + + case _: + match state: + case State.PARSE_FRONT: + front_lines += line + case State.PARSE_BACK: + back_lines += line + # Edge case: file does not start with FRONT, flush preamble + case _: + continue + + # Edge case: file did not end with contents of BACK + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected end of file") + + # Edge case: file was empty + if state is None: + return + + yield _getCard(front_lines, back_lines) + +def parse(path: Path) -> list[Card]: + """ + Parse a .fcard file and return a list of Card instances. + + The parser expects .fcard files in the following format: + + FRONT + This is the front of the first card. + + BACK + This is the back of the first card. + + FRONT + This is another card. + + Multiple lines on the front are allowed. + + BACK + Multiple lines on the back? + + Also allowed. + + FRONT + ... + """ + if not path.is_file(): + print(f"[Warning] Not a file: {path}") + return [] + + with open(path, "r") as f: + return [card for card in _getCards(f)] diff --git a/src/parser_unittest.py b/src/parser_unittest.py new file mode 100644 index 0000000..02638e6 --- /dev/null +++ b/src/parser_unittest.py @@ -0,0 +1,79 @@ +import pytest +import parser +from pathlib import Path + +# Happy path +def test_validFile(tmp_path): + file_contents = """ + +FRONT + + +Foo + +Bar + +BACK + + +Fizz + +Buzz + + + +FRONT + +Another card + +BACK + +Another back + + + """ + expected = [ + ("Foo\n\nBar", "Fizz\n\nBuzz"), + ("Another card", "Another back") + ] + + path = tmp_path / "valid_file.fcard" + with open(path, "w") as f: + f.write(file_contents) + + cards = parser.parse(path) + + assert expected == [(c.front, c.back) for c in cards] + + # Cards have unique IDs + assert len(set([c.id for c in cards])) == len(cards) + +# Edge cases +def test_emptyFile(tmp_path): + path = tmp_path / "empty.fcard" + with open(path, "w") as f: + f.write("") + + cards = parser.parse(path) + assert cards == [] + +def test_missingFile(tmp_path): + cards = parser.parse(tmp_path / "missing_file.fcard") + assert cards == [] + +def checkException(tmp_path, file_contents): + path = tmp_path / "invalid_file.fcard" + with open(path, "w") as f: + f.write(file_contents) + + with pytest.raises(Exception): + cards = parser.parse(path) + +def test_doesNotStartWithFront(tmp_path): + checkException(tmp_path, "BACK\noops") + +def test_frontTwiceInARow(tmp_path): + checkException(tmp_path, "FRONT\noops\nFRONT\nbad") + +def test_doesNotEndWithBack(tmp_path): + checkException(tmp_path, "FRONT\ntest\nBACK\ntest\nFRONT\noops") -- cgit v1.2.3