From e65bef9c22244fc9bcd22a37d335f5f76ba16ff5 Mon Sep 17 00:00:00 2001
From: Eddy Pedroni <epedroni@pm.me>
Date: Thu, 26 Sep 2024 10:02:15 +0200
Subject: Create separate packages for library and CLI

---
 flashcards-project/pyproject.toml                  |  14 +++
 flashcards-project/src/flashcards/__init__.py      |   2 +
 flashcards-project/src/flashcards/card.py          |  12 +++
 flashcards-project/src/flashcards/parser.py        | 108 +++++++++++++++++++++
 flashcards-project/src/flashcards/scheduler.py     |  62 ++++++++++++
 .../src/flashcards/scheduler_brutal.py             |  79 +++++++++++++++
 flashcards-project/src/flashcards/session.py       |  51 ++++++++++
 flashcards-project/src/flashcards/state_json.py    |  23 +++++
 8 files changed, 351 insertions(+)
 create mode 100644 flashcards-project/pyproject.toml
 create mode 100644 flashcards-project/src/flashcards/__init__.py
 create mode 100644 flashcards-project/src/flashcards/card.py
 create mode 100644 flashcards-project/src/flashcards/parser.py
 create mode 100644 flashcards-project/src/flashcards/scheduler.py
 create mode 100644 flashcards-project/src/flashcards/scheduler_brutal.py
 create mode 100644 flashcards-project/src/flashcards/session.py
 create mode 100644 flashcards-project/src/flashcards/state_json.py

(limited to 'flashcards-project')

diff --git a/flashcards-project/pyproject.toml b/flashcards-project/pyproject.toml
new file mode 100644
index 0000000..06a6931
--- /dev/null
+++ b/flashcards-project/pyproject.toml
@@ -0,0 +1,14 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "flashcards"
+authors = [
+    {name = "Eddy Pedroni", email = "epedroni@pm.me"},
+]
+description = "A library for memorising information with flashcards"
+requires-python = ">=3.12"
+dependencies = [
+]
+dynamic = ["version"]
diff --git a/flashcards-project/src/flashcards/__init__.py b/flashcards-project/src/flashcards/__init__.py
new file mode 100644
index 0000000..38c9936
--- /dev/null
+++ b/flashcards-project/src/flashcards/__init__.py
@@ -0,0 +1,2 @@
+from .session import Session
+from .scheduler import SCHEDULERS
diff --git a/flashcards-project/src/flashcards/card.py b/flashcards-project/src/flashcards/card.py
new file mode 100644
index 0000000..3278343
--- /dev/null
+++ b/flashcards-project/src/flashcards/card.py
@@ -0,0 +1,12 @@
+"""
+Defines a struct representing a single card. The struct takes the form:
+
+(front, back)
+"""
+from collections import namedtuple
+from hashlib import md5
+
+Card = namedtuple('Card', ['front', 'back'])
+
+def getId(card: Card) -> str:
+    return md5((card.front + card.back).encode("utf-8")).hexdigest()
diff --git a/flashcards-project/src/flashcards/parser.py b/flashcards-project/src/flashcards/parser.py
new file mode 100644
index 0000000..38abdcc
--- /dev/null
+++ b/flashcards-project/src/flashcards/parser.py
@@ -0,0 +1,108 @@
+"""
+Load .fcard files into dictionaries.
+
+The parser expects .fcard files in the following format:
+
+FRONT
+This is the front of the first card.
+
+BACK
+This is the back of the first card.
+
+FRONT
+This is another card.
+
+Multiple lines on the front are allowed.
+
+BACK
+Multiple lines on the back?
+
+Also allowed.
+
+FRONT
+...
+
+The cards are represented in dictionary entries of the form:
+
+id: card.Card
+"""
+from pathlib import Path
+from enum import Enum
+from typing import TextIO, Iterator
+
+from .card import Card, getId
+
+def _getCard(front_lines: list[str], back_lines: list[str]) -> tuple[str, Card]:
+    front_text = "".join(front_lines).strip()
+    back_text = "".join(back_lines).strip()
+    card = Card(front_text, back_text)
+    id = getId(card)
+    return id, card
+
+def _getCards(f: TextIO) -> Iterator[tuple[str, Card]]:
+    class State(Enum):
+        PARSE_FRONT = 1,
+        PARSE_BACK = 2
+    
+    state = None
+    front_lines = []
+    back_lines = []
+
+    for i, line in enumerate(f):
+        match line.strip():
+            case "FRONT":
+                # Edge case: FRONT twice in a row
+                if state == State.PARSE_FRONT:
+                    raise Exception(f"Unexpected 'FRONT': {f}:{i}")
+
+                # Next card is starting, wrap up current one
+                if state == State.PARSE_BACK:
+                    yield _getCard(front_lines, back_lines)
+                    front_lines.clear()
+                    back_lines.clear()
+
+                state = State.PARSE_FRONT
+
+            case "BACK":
+                # Edge case: BACK without FRONT before it
+                if state != State.PARSE_FRONT:
+                    raise Exception(f"Unexpected 'BACK': {f}:{i}")
+
+                state = State.PARSE_BACK
+                
+            case _:
+                match state:
+                    case State.PARSE_FRONT:
+                        front_lines += line
+                    case State.PARSE_BACK:
+                        back_lines += line
+                    # Edge case: file does not start with FRONT, flush preamble
+                    case _:
+                        continue
+
+    # Edge case: file did not end with contents of BACK
+    if state == State.PARSE_FRONT:
+        raise Exception(f"Unexpected end of file")
+
+    # Edge case: file was empty
+    if state is None:
+        return
+
+    yield _getCard(front_lines, back_lines)
+
+def parseFile(path: str) -> dict[str, Card]:
+    """
+    Parse a .fcard file and return a dictionary of Card instances indexed by ID.
+    """
+    with open(path, "r") as f:
+        return {id : card for id, card in _getCards(f)}
+
+def parseFiles(paths: list[str]) -> dict[str, Card]:
+    """
+    Parse a list of .fcard files and return a dictionary of Card instances indexed by ID.
+    """
+    cards = {}
+    for p in paths:
+        cards |= parseFile(p)
+    return cards
+
diff --git a/flashcards-project/src/flashcards/scheduler.py b/flashcards-project/src/flashcards/scheduler.py
new file mode 100644
index 0000000..a9d9470
--- /dev/null
+++ b/flashcards-project/src/flashcards/scheduler.py
@@ -0,0 +1,62 @@
+from typing import Protocol
+from abc import abstractmethod
+
+from .card import Card
+
+class Scheduler(Protocol):
+    """
+    Schedulers must implement this interface to be usable in a session.
+    """
+    @abstractmethod
+    def __init__(self, cards: dict[str, Card], state: dict):
+        """
+        Create a new instance of the scheduler from a dictionary of
+        Cards indexed by ID and a scheduler-specific state as a dict.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def practice(self, size: int) -> list[str]:
+        """
+        Return a list of card IDs of the requested size, if possible.
+        This list is intended for practice.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def test(self, size: int) -> list[str]:
+        """
+        Return a list of card IDs of the requested size, if possible.
+        This list is intended to test the player's knowledge.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def update(self, results: dict[str, int]) -> None:
+        """
+        Takes a dictionary of card IDs and integers, where the integer
+        is 0 if the player failed to guess the other side of the card,
+        of 1 if the player succeeded.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def getState(self) -> dict:
+        """
+        Return the scheduler's state for storage.
+        """
+        raise NotImplementedError
+
+SCHEDULERS = ["brutal"]
+
+def getSchedulerClass(name: str) -> Scheduler:
+    """
+    Returns the class object for the requested scheduler, if one exists.
+    """
+    match name:
+        case "brutal":
+            from .scheduler_brutal import SchedulerBrutal
+            return SchedulerBrutal
+        case _:
+            raise Exception(f"Unknown scheduler: {name}")
+
diff --git a/flashcards-project/src/flashcards/scheduler_brutal.py b/flashcards-project/src/flashcards/scheduler_brutal.py
new file mode 100644
index 0000000..ebbc0ff
--- /dev/null
+++ b/flashcards-project/src/flashcards/scheduler_brutal.py
@@ -0,0 +1,79 @@
+from random import shuffle
+
+from .scheduler import Scheduler
+from .card import Card
+
+HISTORY_DEPTH = 8
+
+class SchedulerBrutal(Scheduler):
+    """
+    The brutal scheduler tracks how well the player has consolidated each card
+    and also how often the card has been shown. 
+
+    Using this information, it prioritizes cards that have been shown less
+    frequently and recently, which means the player will often see totally new
+    cards in test sessions.
+    """
+    def __init__(self, cards: dict[str, Card], state: dict):
+        self._cards = cards
+        self._state = {}
+
+        # Synchronise state with current card collection
+        for id, card in self._cards.items():
+            history = state.get(id, [None] * HISTORY_DEPTH)
+
+            # Adjust history if depth has changed
+            if len(history) > HISTORY_DEPTH:
+                history = history[-HISTORY_DEPTH:]
+            elif len(history) < HISTORY_DEPTH:
+                history = ([None] * (HISTORY_DEPTH - len(history))) + history
+
+            self._state[id] = history
+
+    def practice(self, size: int) -> list[str]:
+        return self._schedule(size)
+
+    def test(self, size: int) -> list[str]:
+        return self._schedule(size)
+
+    def update(self, results: dict[str, int]) -> None:
+        # Add card result to sliding window, or None if card was not shown
+        self._state = {id: history[1:] + [results.get(id, None)] 
+                       for id, history in self._state.items()}
+
+    def getState(self) -> dict:
+        return self._state
+
+    @staticmethod
+    def _consolidationIndex(history: list, weights: range) -> float:
+        """
+        Consolidation index is a measure of how well the player has guessed the card recently
+        """
+        relevant_history = [(h, w) for h, w in zip(history, weights) if h is not None]
+        weighted_history = sum([h * w for h, w in relevant_history])
+        total_weights = sum([w for h, w in relevant_history])
+        return weighted_history / total_weights if total_weights > 0 else 0.0
+
+    @staticmethod
+    def _exposureIndex(history: list) -> float:
+        """
+        Exposure index is a measure of how much and how recently a card has been shown
+        """
+        return sum([i + 1 for i, h in enumerate(history) if h is not None])
+
+    def _schedule(self, size: int) -> list[str]:
+        weights = range(10, 10 + HISTORY_DEPTH)
+        cards = [id for id, card in self._cards.items()]
+
+        # First sort by consolidation index
+        cards.sort(key=lambda id: SchedulerBrutal._consolidationIndex(self._state[id], weights))
+
+        # Next sort by exposure index
+        cards.sort(key=lambda id: SchedulerBrutal._exposureIndex(self._state[id]))
+
+        # Return least exposed and least consolidated cards, shuffled
+        cards = cards[0:size]
+
+        shuffle(cards)
+
+        return cards
diff --git a/flashcards-project/src/flashcards/session.py b/flashcards-project/src/flashcards/session.py
new file mode 100644
index 0000000..da444dd
--- /dev/null
+++ b/flashcards-project/src/flashcards/session.py
@@ -0,0 +1,51 @@
+from typing import Iterator, Callable
+
+from .card import Card
+from .scheduler import getSchedulerClass
+from .parser import parseFiles
+from .state_json import load, save
+
+class Session:
+    """
+    Represents a play session. During a session, multiple practice and test runs
+    can be made with the same scheduler.
+    """
+    def __init__(self, scheduler_name: str, card_files: list[str], state_file: str):
+        self._cards = parseFiles(card_files)
+        self._state_file = state_file
+        self._scheduler = getSchedulerClass(scheduler_name)(self._cards, load(state_file))
+
+    def practice(self, size: int) -> Iterator[Card]:
+        """
+        Yields cards for a practice run of the requested size.
+
+        Practice runs do not affect the scheduler state.
+        """
+        ids = self._scheduler.practice(size)
+        for id in ids:
+            yield self._cards[id]
+
+    def test(self, size: int) -> Iterator[tuple[Card, Callable]]:
+        """
+        Yields cards for a test run of the requested size.
+
+        A function is yielded with each card that takes single boolean argument.
+        The UI is expected to call the function for each card to indicate whether
+        the user correctly guessed the card (True) or not (False).
+
+        Multiple subsequent calls to the same function overwrite past results.
+
+        When the test run is done, the scheduler state is updated with the
+        collected results
+        """
+        ids = self._scheduler.practice(size)
+        results = {}
+        
+        for id in ids:
+            def result(correct: bool) -> None:
+                results[id] = int(correct)
+            yield self._cards[id], result
+
+        self._scheduler.update(results)
+        save(self._state_file, self._scheduler.getState())
+
diff --git a/flashcards-project/src/flashcards/state_json.py b/flashcards-project/src/flashcards/state_json.py
new file mode 100644
index 0000000..673d904
--- /dev/null
+++ b/flashcards-project/src/flashcards/state_json.py
@@ -0,0 +1,23 @@
+"""
+Helper functions to store scheduler state as json
+"""
+import json
+from pathlib import Path
+
+def save(file: str, state: dict) -> None:
+    """
+    Dump the specified state dictionary in JSON format
+    """
+    with open(file, "w") as f:
+        json.dump(state, f)
+
+def load(file: str) -> dict:
+    """
+    Load the state from the specified file and return
+    an empty dictionary silently if the file doesn't exist.
+    """
+    try:
+        with open(file, "r") as f:
+            return json.load(f)
+    except:
+        return {}
-- 
cgit v1.2.3