diff options
Diffstat (limited to 'src/parser.py')
-rw-r--r-- | src/parser.py | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/src/parser.py b/src/parser.py new file mode 100644 index 0000000..db05089 --- /dev/null +++ b/src/parser.py @@ -0,0 +1,95 @@ +from pathlib import Path +from collections import namedtuple +from enum import Enum +from typing import TextIO, Iterator + +Card = namedtuple('Card', ['id', 'front', 'back']) + +def _getCard(front_lines: list[str], back_lines: list[str]) -> Card: + front_text = "".join(front_lines).strip() + back_text = "".join(back_lines).strip() + id = hash(front_text + back_text) + return Card(id, front_text, back_text) + +def _getCards(f: TextIO) -> Iterator[Card]: + class State(Enum): + PARSE_FRONT = 1, + PARSE_BACK = 2 + + state = None + front_lines = [] + back_lines = [] + + for i, line in enumerate(f): + match line.strip(): + case "FRONT": + # Edge case: FRONT twice in a row + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected 'FRONT': {f}:{i}") + + # Next card is starting, wrap up current one + if state == State.PARSE_BACK: + yield _getCard(front_lines, back_lines) + front_lines.clear() + back_lines.clear() + + state = State.PARSE_FRONT + + case "BACK": + # Edge case: BACK without FRONT before it + if state != State.PARSE_FRONT: + raise Exception(f"Unexpected 'BACK': {f}:{i}") + + state = State.PARSE_BACK + + case _: + match state: + case State.PARSE_FRONT: + front_lines += line + case State.PARSE_BACK: + back_lines += line + # Edge case: file does not start with FRONT, flush preamble + case _: + continue + + # Edge case: file did not end with contents of BACK + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected end of file") + + # Edge case: file was empty + if state is None: + return + + yield _getCard(front_lines, back_lines) + +def parse(path: Path) -> list[Card]: + """ + Parse a .fcard file and return a list of Card instances. + + The parser expects .fcard files in the following format: + + FRONT + This is the front of the first card. + + BACK + This is the back of the first card. + + FRONT + This is another card. + + Multiple lines on the front are allowed. + + BACK + Multiple lines on the back? + + Also allowed. + + FRONT + ... + """ + if not path.is_file(): + print(f"[Warning] Not a file: {path}") + return [] + + with open(path, "r") as f: + return [card for card in _getCards(f)] |