summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/parser.py95
-rw-r--r--src/parser_unittest.py79
2 files changed, 174 insertions, 0 deletions
diff --git a/src/parser.py b/src/parser.py
new file mode 100644
index 0000000..db05089
--- /dev/null
+++ b/src/parser.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+from collections import namedtuple
+from enum import Enum
+from typing import TextIO, Iterator
+
+Card = namedtuple('Card', ['id', 'front', 'back'])
+
+def _getCard(front_lines: list[str], back_lines: list[str]) -> Card:
+ front_text = "".join(front_lines).strip()
+ back_text = "".join(back_lines).strip()
+ id = hash(front_text + back_text)
+ return Card(id, front_text, back_text)
+
+def _getCards(f: TextIO) -> Iterator[Card]:
+ class State(Enum):
+ PARSE_FRONT = 1,
+ PARSE_BACK = 2
+
+ state = None
+ front_lines = []
+ back_lines = []
+
+ for i, line in enumerate(f):
+ match line.strip():
+ case "FRONT":
+ # Edge case: FRONT twice in a row
+ if state == State.PARSE_FRONT:
+ raise Exception(f"Unexpected 'FRONT': {f}:{i}")
+
+ # Next card is starting, wrap up current one
+ if state == State.PARSE_BACK:
+ yield _getCard(front_lines, back_lines)
+ front_lines.clear()
+ back_lines.clear()
+
+ state = State.PARSE_FRONT
+
+ case "BACK":
+ # Edge case: BACK without FRONT before it
+ if state != State.PARSE_FRONT:
+ raise Exception(f"Unexpected 'BACK': {f}:{i}")
+
+ state = State.PARSE_BACK
+
+ case _:
+ match state:
+ case State.PARSE_FRONT:
+ front_lines += line
+ case State.PARSE_BACK:
+ back_lines += line
+ # Edge case: file does not start with FRONT, flush preamble
+ case _:
+ continue
+
+ # Edge case: file did not end with contents of BACK
+ if state == State.PARSE_FRONT:
+ raise Exception(f"Unexpected end of file")
+
+ # Edge case: file was empty
+ if state is None:
+ return
+
+ yield _getCard(front_lines, back_lines)
+
+def parse(path: Path) -> list[Card]:
+ """
+ Parse a .fcard file and return a list of Card instances.
+
+ The parser expects .fcard files in the following format:
+
+ FRONT
+ This is the front of the first card.
+
+ BACK
+ This is the back of the first card.
+
+ FRONT
+ This is another card.
+
+ Multiple lines on the front are allowed.
+
+ BACK
+ Multiple lines on the back?
+
+ Also allowed.
+
+ FRONT
+ ...
+ """
+ if not path.is_file():
+ print(f"[Warning] Not a file: {path}")
+ return []
+
+ with open(path, "r") as f:
+ return [card for card in _getCards(f)]
diff --git a/src/parser_unittest.py b/src/parser_unittest.py
new file mode 100644
index 0000000..02638e6
--- /dev/null
+++ b/src/parser_unittest.py
@@ -0,0 +1,79 @@
+import pytest
+import parser
+from pathlib import Path
+
+# Happy path
+def test_validFile(tmp_path):
+ file_contents = """
+
+FRONT
+
+
+Foo
+
+Bar
+
+BACK
+
+
+Fizz
+
+Buzz
+
+
+
+FRONT
+
+Another card
+
+BACK
+
+Another back
+
+
+ """
+ expected = [
+ ("Foo\n\nBar", "Fizz\n\nBuzz"),
+ ("Another card", "Another back")
+ ]
+
+ path = tmp_path / "valid_file.fcard"
+ with open(path, "w") as f:
+ f.write(file_contents)
+
+ cards = parser.parse(path)
+
+ assert expected == [(c.front, c.back) for c in cards]
+
+ # Cards have unique IDs
+ assert len(set([c.id for c in cards])) == len(cards)
+
+# Edge cases
+def test_emptyFile(tmp_path):
+ path = tmp_path / "empty.fcard"
+ with open(path, "w") as f:
+ f.write("")
+
+ cards = parser.parse(path)
+ assert cards == []
+
+def test_missingFile(tmp_path):
+ cards = parser.parse(tmp_path / "missing_file.fcard")
+ assert cards == []
+
+def checkException(tmp_path, file_contents):
+ path = tmp_path / "invalid_file.fcard"
+ with open(path, "w") as f:
+ f.write(file_contents)
+
+ with pytest.raises(Exception):
+ cards = parser.parse(path)
+
+def test_doesNotStartWithFront(tmp_path):
+ checkException(tmp_path, "BACK\noops")
+
+def test_frontTwiceInARow(tmp_path):
+ checkException(tmp_path, "FRONT\noops\nFRONT\nbad")
+
+def test_doesNotEndWithBack(tmp_path):
+ checkException(tmp_path, "FRONT\ntest\nBACK\ntest\nFRONT\noops")