sshworkshop/namepicker/xkcd_passwords.py

#!/usr/bin/env python3
"""
XKCD “correct-horse-battery-staple” password generator.

Features
--------
* Uses the official EFF large word list (7776 words, ~12.9 bits/word).
* Caches the list in the user's cache directory.
* Fully type‑annotated, no external dependencies beyond the standard library.
* Optional extra digit / symbol and customizable separator.
"""

import hashlib
import os
import random
import sys
import urllib.request
from pathlib import Path
from typing import List

# ----------------------------------------------------------------------
# 1️⃣  Load / cache the word list
# ----------------------------------------------------------------------
WORDLIST_URL = (
    # "https://www.eff.org/files/2016/07/18/eff_large_wordlist.txt"
    "https://github.com/first20hours/google-10000-english/raw/refs/heads/master/google-10000-english.txt"
)
CACHE_DIR = Path(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache")) / "xkcd_passgen"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
# WORDLIST_PATH = CACHE_DIR / "eff_large_wordlist.txt"
WORDLIST_PATH = CACHE_DIR / "google-10000-english.txt"


def _download_wordlist() -> None:
    """Download the EFF word list to the cache directory."""
    print("Downloading word list …", file=sys.stderr)
    with urllib.request.urlopen(WORDLIST_URL) as resp, open(
        WORDLIST_PATH, "wb"
    ) as out:
        out.write(resp.read())


def _load_wordlist() -> List[str]:
    """Return the list of words (one per line, stripped)."""
    if not WORDLIST_PATH.is_file():
        _download_wordlist()
    words = []
    with open(WORDLIST_PATH, "r", encoding="utf-8") as f:
        for line in f:
            # The file format is: 12345 word
            line = line.strip()
            if " " in line:
                parts = line.strip().split()
                if len(parts) == 2:
                    words.append(parts[1])
            else:
                if len(line) > 4:
                    words.append(line)
    if not words:
        raise RuntimeError("Failed to load any words from the word list.")
    return words


# Load once at import time – cheap after the first run
WORDLIST = _load_wordlist()


# ----------------------------------------------------------------------
# 2️⃣  Core password generator
# ----------------------------------------------------------------------
def generate_password(
    num_words: int = 4,
    separator: str = " ",
    capitalize: bool = False,
    add_digit: bool = False,
    add_symbol: bool = False,
    rng: random.Random | None = None,
) -> str:
    """
    Return a password consisting of *num_words* random words.

    Parameters
    ----------
    num_words:
        Number of words to concatenate (default 4 → ~52 bits of entropy).
    separator:
        String placed between words (default space). Use ``''`` for a
        “passphrase” without spaces.
    capitalize:
        Capitalise the first word (adds ~1 bit of entropy).
    add_digit:
        Append a random decimal digit (adds ~3.3 bits).
    add_symbol:
        Append a random symbol from ``!@#$%^&*()-_=+[]{};:,.?`` (adds ~5 bits).

    Returns
    -------
    str
        The generated password.
    """
    rng = rng or random.SystemRandom()  # cryptographically strong RNG

    chosen = [rng.choice(WORDLIST) for _ in range(num_words)]

    if capitalize:
        chosen = [c.capitalize() for c in chosen]
        # chosen[0] = chosen[0].capitalize()

    password = separator.join(chosen)

    if add_digit:
        password += str(rng.randint(0, 9))

    if add_symbol:
        symbols = "!@#$%^&*()-_=+[]{};:,.?"
        password += rng.choice(symbols)

    return password


# ----------------------------------------------------------------------
# 3️⃣  Helper to compute entropy (optional but nice to have)
# ----------------------------------------------------------------------
def password_entropy(num_words: int, add_digit: bool, add_symbol: bool) -> float:
    """
    Approximate entropy in bits for the given configuration.

    The EFF list has 7776 words → log₂(7776) ≈ 12.9 bits per word.
    """
    bits_per_word = 12.9
    entropy = num_words * bits_per_word
    if add_digit:
        entropy += 3.32  # log₂(10)
    if add_symbol:
        entropy += 5.0   # log₂(≈32 symbols)
    return entropy


# ----------------------------------------------------------------------
# 4️⃣  Command‑line interface (optional)
# ----------------------------------------------------------------------
def _cli() -> None:
    import argparse

    parser = argparse.ArgumentParser(
        description="Generate XKCD-style passphrases."
    )
    parser.add_argument(
        "-n", "--num-words", type=int, default=4,
        help="Number of words (default: 4)."
    )
    parser.add_argument(
        "-s", "--separator", default=" ",
        help="String placed between words (default: space)."
    )
    parser.add_argument(
        "-c", "--capitalize", action="store_true",
        help="Capitalize the first word."
    )
    parser.add_argument(
        "-d", "--digit", action="store_true",
        help="Append a random digit."
    )
    parser.add_argument(
        "-S", "--symbol", action="store_true",
        help="Append a random symbol."
    )
    args = parser.parse_args()

    pwd = generate_password(
        num_words=args.num_words,
        separator=args.separator,
        capitalize=args.capitalize,
        add_digit=args.digit,
        add_symbol=args.symbol,
    )
    print(pwd)
    # print(f"≈ {password_entropy(args.num_words, args.digit, args.symbol):.1f} bits of entropy")


if __name__ == "__main__":
    _cli()