From e65bef9c22244fc9bcd22a37d335f5f76ba16ff5 Mon Sep 17 00:00:00 2001 From: Eddy Pedroni Date: Thu, 26 Sep 2024 10:02:15 +0200 Subject: Create separate packages for library and CLI --- flashcards-project/pyproject.toml | 14 +++ flashcards-project/src/flashcards/__init__.py | 2 + flashcards-project/src/flashcards/card.py | 12 +++ flashcards-project/src/flashcards/parser.py | 108 +++++++++++++++++++++ flashcards-project/src/flashcards/scheduler.py | 62 ++++++++++++ .../src/flashcards/scheduler_brutal.py | 79 +++++++++++++++ flashcards-project/src/flashcards/session.py | 51 ++++++++++ flashcards-project/src/flashcards/state_json.py | 23 +++++ 8 files changed, 351 insertions(+) create mode 100644 flashcards-project/pyproject.toml create mode 100644 flashcards-project/src/flashcards/__init__.py create mode 100644 flashcards-project/src/flashcards/card.py create mode 100644 flashcards-project/src/flashcards/parser.py create mode 100644 flashcards-project/src/flashcards/scheduler.py create mode 100644 flashcards-project/src/flashcards/scheduler_brutal.py create mode 100644 flashcards-project/src/flashcards/session.py create mode 100644 flashcards-project/src/flashcards/state_json.py (limited to 'flashcards-project') diff --git a/flashcards-project/pyproject.toml b/flashcards-project/pyproject.toml new file mode 100644 index 0000000..06a6931 --- /dev/null +++ b/flashcards-project/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "flashcards" +authors = [ + {name = "Eddy Pedroni", email = "epedroni@pm.me"}, +] +description = "A library for memorising information with flashcards" +requires-python = ">=3.12" +dependencies = [ +] +dynamic = ["version"] diff --git a/flashcards-project/src/flashcards/__init__.py b/flashcards-project/src/flashcards/__init__.py new file mode 100644 index 0000000..38c9936 --- /dev/null +++ b/flashcards-project/src/flashcards/__init__.py @@ -0,0 +1,2 @@ +from .session import Session +from .scheduler import SCHEDULERS diff --git a/flashcards-project/src/flashcards/card.py b/flashcards-project/src/flashcards/card.py new file mode 100644 index 0000000..3278343 --- /dev/null +++ b/flashcards-project/src/flashcards/card.py @@ -0,0 +1,12 @@ +""" +Defines a struct representing a single card. The struct takes the form: + +(front, back) +""" +from collections import namedtuple +from hashlib import md5 + +Card = namedtuple('Card', ['front', 'back']) + +def getId(card: Card) -> str: + return md5((card.front + card.back).encode("utf-8")).hexdigest() diff --git a/flashcards-project/src/flashcards/parser.py b/flashcards-project/src/flashcards/parser.py new file mode 100644 index 0000000..38abdcc --- /dev/null +++ b/flashcards-project/src/flashcards/parser.py @@ -0,0 +1,108 @@ +""" +Load .fcard files into dictionaries. + +The parser expects .fcard files in the following format: + +FRONT +This is the front of the first card. + +BACK +This is the back of the first card. + +FRONT +This is another card. + +Multiple lines on the front are allowed. + +BACK +Multiple lines on the back? + +Also allowed. + +FRONT +... + +The cards are represented in dictionary entries of the form: + +id: card.Card +""" +from pathlib import Path +from enum import Enum +from typing import TextIO, Iterator + +from .card import Card, getId + +def _getCard(front_lines: list[str], back_lines: list[str]) -> tuple[str, Card]: + front_text = "".join(front_lines).strip() + back_text = "".join(back_lines).strip() + card = Card(front_text, back_text) + id = getId(card) + return id, card + +def _getCards(f: TextIO) -> Iterator[tuple[str, Card]]: + class State(Enum): + PARSE_FRONT = 1, + PARSE_BACK = 2 + + state = None + front_lines = [] + back_lines = [] + + for i, line in enumerate(f): + match line.strip(): + case "FRONT": + # Edge case: FRONT twice in a row + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected 'FRONT': {f}:{i}") + + # Next card is starting, wrap up current one + if state == State.PARSE_BACK: + yield _getCard(front_lines, back_lines) + front_lines.clear() + back_lines.clear() + + state = State.PARSE_FRONT + + case "BACK": + # Edge case: BACK without FRONT before it + if state != State.PARSE_FRONT: + raise Exception(f"Unexpected 'BACK': {f}:{i}") + + state = State.PARSE_BACK + + case _: + match state: + case State.PARSE_FRONT: + front_lines += line + case State.PARSE_BACK: + back_lines += line + # Edge case: file does not start with FRONT, flush preamble + case _: + continue + + # Edge case: file did not end with contents of BACK + if state == State.PARSE_FRONT: + raise Exception(f"Unexpected end of file") + + # Edge case: file was empty + if state is None: + return + + yield _getCard(front_lines, back_lines) + +def parseFile(path: str) -> dict[str, Card]: + """ + Parse a .fcard file and return a dictionary of Card instances indexed by ID. + """ + with open(path, "r") as f: + return {id : card for id, card in _getCards(f)} + +def parseFiles(paths: list[str]) -> dict[str, Card]: + """ + Parse a list of .fcard files and return a dictionary of Card instances indexed by ID. + """ + cards = {} + for p in paths: + cards |= parseFile(p) + return cards + diff --git a/flashcards-project/src/flashcards/scheduler.py b/flashcards-project/src/flashcards/scheduler.py new file mode 100644 index 0000000..a9d9470 --- /dev/null +++ b/flashcards-project/src/flashcards/scheduler.py @@ -0,0 +1,62 @@ +from typing import Protocol +from abc import abstractmethod + +from .card import Card + +class Scheduler(Protocol): + """ + Schedulers must implement this interface to be usable in a session. + """ + @abstractmethod + def __init__(self, cards: dict[str, Card], state: dict): + """ + Create a new instance of the scheduler from a dictionary of + Cards indexed by ID and a scheduler-specific state as a dict. + """ + raise NotImplementedError + + @abstractmethod + def practice(self, size: int) -> list[str]: + """ + Return a list of card IDs of the requested size, if possible. + This list is intended for practice. + """ + raise NotImplementedError + + @abstractmethod + def test(self, size: int) -> list[str]: + """ + Return a list of card IDs of the requested size, if possible. + This list is intended to test the player's knowledge. + """ + raise NotImplementedError + + @abstractmethod + def update(self, results: dict[str, int]) -> None: + """ + Takes a dictionary of card IDs and integers, where the integer + is 0 if the player failed to guess the other side of the card, + of 1 if the player succeeded. + """ + raise NotImplementedError + + @abstractmethod + def getState(self) -> dict: + """ + Return the scheduler's state for storage. + """ + raise NotImplementedError + +SCHEDULERS = ["brutal"] + +def getSchedulerClass(name: str) -> Scheduler: + """ + Returns the class object for the requested scheduler, if one exists. + """ + match name: + case "brutal": + from .scheduler_brutal import SchedulerBrutal + return SchedulerBrutal + case _: + raise Exception(f"Unknown scheduler: {name}") + diff --git a/flashcards-project/src/flashcards/scheduler_brutal.py b/flashcards-project/src/flashcards/scheduler_brutal.py new file mode 100644 index 0000000..ebbc0ff --- /dev/null +++ b/flashcards-project/src/flashcards/scheduler_brutal.py @@ -0,0 +1,79 @@ +from random import shuffle + +from .scheduler import Scheduler +from .card import Card + +HISTORY_DEPTH = 8 + +class SchedulerBrutal(Scheduler): + """ + The brutal scheduler tracks how well the player has consolidated each card + and also how often the card has been shown. + + Using this information, it prioritizes cards that have been shown less + frequently and recently, which means the player will often see totally new + cards in test sessions. + """ + def __init__(self, cards: dict[str, Card], state: dict): + self._cards = cards + self._state = {} + + # Synchronise state with current card collection + for id, card in self._cards.items(): + history = state.get(id, [None] * HISTORY_DEPTH) + + # Adjust history if depth has changed + if len(history) > HISTORY_DEPTH: + history = history[-HISTORY_DEPTH:] + elif len(history) < HISTORY_DEPTH: + history = ([None] * (HISTORY_DEPTH - len(history))) + history + + self._state[id] = history + + def practice(self, size: int) -> list[str]: + return self._schedule(size) + + def test(self, size: int) -> list[str]: + return self._schedule(size) + + def update(self, results: dict[str, int]) -> None: + # Add card result to sliding window, or None if card was not shown + self._state = {id: history[1:] + [results.get(id, None)] + for id, history in self._state.items()} + + def getState(self) -> dict: + return self._state + + @staticmethod + def _consolidationIndex(history: list, weights: range) -> float: + """ + Consolidation index is a measure of how well the player has guessed the card recently + """ + relevant_history = [(h, w) for h, w in zip(history, weights) if h is not None] + weighted_history = sum([h * w for h, w in relevant_history]) + total_weights = sum([w for h, w in relevant_history]) + return weighted_history / total_weights if total_weights > 0 else 0.0 + + @staticmethod + def _exposureIndex(history: list) -> float: + """ + Exposure index is a measure of how much and how recently a card has been shown + """ + return sum([i + 1 for i, h in enumerate(history) if h is not None]) + + def _schedule(self, size: int) -> list[str]: + weights = range(10, 10 + HISTORY_DEPTH) + cards = [id for id, card in self._cards.items()] + + # First sort by consolidation index + cards.sort(key=lambda id: SchedulerBrutal._consolidationIndex(self._state[id], weights)) + + # Next sort by exposure index + cards.sort(key=lambda id: SchedulerBrutal._exposureIndex(self._state[id])) + + # Return least exposed and least consolidated cards, shuffled + cards = cards[0:size] + + shuffle(cards) + + return cards diff --git a/flashcards-project/src/flashcards/session.py b/flashcards-project/src/flashcards/session.py new file mode 100644 index 0000000..da444dd --- /dev/null +++ b/flashcards-project/src/flashcards/session.py @@ -0,0 +1,51 @@ +from typing import Iterator, Callable + +from .card import Card +from .scheduler import getSchedulerClass +from .parser import parseFiles +from .state_json import load, save + +class Session: + """ + Represents a play session. During a session, multiple practice and test runs + can be made with the same scheduler. + """ + def __init__(self, scheduler_name: str, card_files: list[str], state_file: str): + self._cards = parseFiles(card_files) + self._state_file = state_file + self._scheduler = getSchedulerClass(scheduler_name)(self._cards, load(state_file)) + + def practice(self, size: int) -> Iterator[Card]: + """ + Yields cards for a practice run of the requested size. + + Practice runs do not affect the scheduler state. + """ + ids = self._scheduler.practice(size) + for id in ids: + yield self._cards[id] + + def test(self, size: int) -> Iterator[tuple[Card, Callable]]: + """ + Yields cards for a test run of the requested size. + + A function is yielded with each card that takes single boolean argument. + The UI is expected to call the function for each card to indicate whether + the user correctly guessed the card (True) or not (False). + + Multiple subsequent calls to the same function overwrite past results. + + When the test run is done, the scheduler state is updated with the + collected results + """ + ids = self._scheduler.practice(size) + results = {} + + for id in ids: + def result(correct: bool) -> None: + results[id] = int(correct) + yield self._cards[id], result + + self._scheduler.update(results) + save(self._state_file, self._scheduler.getState()) + diff --git a/flashcards-project/src/flashcards/state_json.py b/flashcards-project/src/flashcards/state_json.py new file mode 100644 index 0000000..673d904 --- /dev/null +++ b/flashcards-project/src/flashcards/state_json.py @@ -0,0 +1,23 @@ +""" +Helper functions to store scheduler state as json +""" +import json +from pathlib import Path + +def save(file: str, state: dict) -> None: + """ + Dump the specified state dictionary in JSON format + """ + with open(file, "w") as f: + json.dump(state, f) + +def load(file: str) -> dict: + """ + Load the state from the specified file and return + an empty dictionary silently if the file doesn't exist. + """ + try: + with open(file, "r") as f: + return json.load(f) + except: + return {} -- cgit v1.2.3