summaryrefslogtreecommitdiffstats
path: root/flashcards-project/src/flashcards/parser.py
diff options
context:
space:
mode:
authorEddy Pedroni <epedroni@pm.me>2024-09-26 10:02:15 +0200
committerEddy Pedroni <epedroni@pm.me>2024-09-26 10:02:15 +0200
commite65bef9c22244fc9bcd22a37d335f5f76ba16ff5 (patch)
tree9af6fa41bfee6fc03c3ab30cf1b23a82bdf8f2e7 /flashcards-project/src/flashcards/parser.py
parentce76b00d7b2ccac6843732f92becfabb753864a0 (diff)
Create separate packages for library and CLI
Diffstat (limited to 'flashcards-project/src/flashcards/parser.py')
-rw-r--r--flashcards-project/src/flashcards/parser.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/flashcards-project/src/flashcards/parser.py b/flashcards-project/src/flashcards/parser.py
new file mode 100644
index 0000000..38abdcc
--- /dev/null
+++ b/flashcards-project/src/flashcards/parser.py
@@ -0,0 +1,108 @@
+"""
+Load .fcard files into dictionaries.
+
+The parser expects .fcard files in the following format:
+
+FRONT
+This is the front of the first card.
+
+BACK
+This is the back of the first card.
+
+FRONT
+This is another card.
+
+Multiple lines on the front are allowed.
+
+BACK
+Multiple lines on the back?
+
+Also allowed.
+
+FRONT
+...
+
+The cards are represented in dictionary entries of the form:
+
+id: card.Card
+"""
+from pathlib import Path
+from enum import Enum
+from typing import TextIO, Iterator
+
+from .card import Card, getId
+
+def _getCard(front_lines: list[str], back_lines: list[str]) -> tuple[str, Card]:
+ front_text = "".join(front_lines).strip()
+ back_text = "".join(back_lines).strip()
+ card = Card(front_text, back_text)
+ id = getId(card)
+ return id, card
+
+def _getCards(f: TextIO) -> Iterator[tuple[str, Card]]:
+ class State(Enum):
+ PARSE_FRONT = 1,
+ PARSE_BACK = 2
+
+ state = None
+ front_lines = []
+ back_lines = []
+
+ for i, line in enumerate(f):
+ match line.strip():
+ case "FRONT":
+ # Edge case: FRONT twice in a row
+ if state == State.PARSE_FRONT:
+ raise Exception(f"Unexpected 'FRONT': {f}:{i}")
+
+ # Next card is starting, wrap up current one
+ if state == State.PARSE_BACK:
+ yield _getCard(front_lines, back_lines)
+ front_lines.clear()
+ back_lines.clear()
+
+ state = State.PARSE_FRONT
+
+ case "BACK":
+ # Edge case: BACK without FRONT before it
+ if state != State.PARSE_FRONT:
+ raise Exception(f"Unexpected 'BACK': {f}:{i}")
+
+ state = State.PARSE_BACK
+
+ case _:
+ match state:
+ case State.PARSE_FRONT:
+ front_lines += line
+ case State.PARSE_BACK:
+ back_lines += line
+ # Edge case: file does not start with FRONT, flush preamble
+ case _:
+ continue
+
+ # Edge case: file did not end with contents of BACK
+ if state == State.PARSE_FRONT:
+ raise Exception(f"Unexpected end of file")
+
+ # Edge case: file was empty
+ if state is None:
+ return
+
+ yield _getCard(front_lines, back_lines)
+
+def parseFile(path: str) -> dict[str, Card]:
+ """
+ Parse a .fcard file and return a dictionary of Card instances indexed by ID.
+ """
+ with open(path, "r") as f:
+ return {id : card for id, card in _getCards(f)}
+
+def parseFiles(paths: list[str]) -> dict[str, Card]:
+ """
+ Parse a list of .fcard files and return a dictionary of Card instances indexed by ID.
+ """
+ cards = {}
+ for p in paths:
+ cards |= parseFile(p)
+ return cards
+