diff options
author | Eddy Pedroni <epedroni@pm.me> | 2024-09-26 10:02:15 +0200 |
---|---|---|
committer | Eddy Pedroni <epedroni@pm.me> | 2024-09-26 10:02:15 +0200 |
commit | e65bef9c22244fc9bcd22a37d335f5f76ba16ff5 (patch) | |
tree | 9af6fa41bfee6fc03c3ab30cf1b23a82bdf8f2e7 /flashcards-project/src/flashcards/parser.py | |
parent | ce76b00d7b2ccac6843732f92becfabb753864a0 (diff) |
Create separate packages for library and CLI
Diffstat (limited to 'flashcards-project/src/flashcards/parser.py')
-rw-r--r-- | flashcards-project/src/flashcards/parser.py | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/flashcards-project/src/flashcards/parser.py b/flashcards-project/src/flashcards/parser.py new file mode 100644 index 0000000..38abdcc --- /dev/null +++ b/flashcards-project/src/flashcards/parser.py @@ -0,0 +1,108 @@ +""" +Load .fcard files into dictionaries. + +The parser expects .fcard files in the following format: + +FRONT +This is the front of the first card. + +BACK +This is the back of the first card. + +FRONT +This is another card. + +Multiple lines on the front are allowed. + +BACK +Multiple lines on the back? + +Also allowed. + +FRONT +... + +The cards are represented in dictionary entries of the form: + +id: card.Card +""" +from pathlib import Path +from enum import Enum +from typing import TextIO, Iterator + +from .card import Card, getId + +def _getCard(front_lines: list[str], back_lines: list[str]) -> tuple[str, Card]: + front_text = "".join(front_lines).strip() + back_text = "".join(back_lines).strip() + card = Card(front_text, back_text) + id = getId(card) + return id, card + +def _getCards(f: TextIO) -> Iterator[tuple[str, Card]]: + class State(Enum): + PARSE_FRONT = 1, + PARSE_BACK = 2 + + state = None + front_lines = [] + back_lines = [] + + for i, line in enumerate(f): + match line.strip(): + case "FRONT": + # Edge case: FRONT twice in a row + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected 'FRONT': {f}:{i}") + + # Next card is starting, wrap up current one + if state == State.PARSE_BACK: + yield _getCard(front_lines, back_lines) + front_lines.clear() + back_lines.clear() + + state = State.PARSE_FRONT + + case "BACK": + # Edge case: BACK without FRONT before it + if state != State.PARSE_FRONT: + raise Exception(f"Unexpected 'BACK': {f}:{i}") + + state = State.PARSE_BACK + + case _: + match state: + case State.PARSE_FRONT: + front_lines += line + case State.PARSE_BACK: + back_lines += line + # Edge case: file does not start with FRONT, flush preamble + case _: + continue + + # Edge case: file did not end with contents of BACK + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected end of file") + + # Edge case: file was empty + if state is None: + return + + yield _getCard(front_lines, back_lines) + +def parseFile(path: str) -> dict[str, Card]: + """ + Parse a .fcard file and return a dictionary of Card instances indexed by ID. + """ + with open(path, "r") as f: + return {id : card for id, card in _getCards(f)} + +def parseFiles(paths: list[str]) -> dict[str, Card]: + """ + Parse a list of .fcard files and return a dictionary of Card instances indexed by ID. + """ + cards = {} + for p in paths: + cards |= parseFile(p) + return cards + |