#!/usr/bin/env python3 """ XKCD “correct-horse-battery-staple” password generator. Features -------- * Uses the official EFF large word list (7776 words, ~12.9 bits/word). * Caches the list in the user's cache directory. * Fully type‑annotated, no external dependencies beyond the standard library. * Optional extra digit / symbol and customizable separator. """ import hashlib import os import random import sys import urllib.request from pathlib import Path from typing import List # ---------------------------------------------------------------------- # 1️⃣ Load / cache the word list # ---------------------------------------------------------------------- WORDLIST_URL = ( # "https://www.eff.org/files/2016/07/18/eff_large_wordlist.txt" "https://github.com/first20hours/google-10000-english/raw/refs/heads/master/google-10000-english.txt" ) CACHE_DIR = Path(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache")) / "xkcd_passgen" CACHE_DIR.mkdir(parents=True, exist_ok=True) # WORDLIST_PATH = CACHE_DIR / "eff_large_wordlist.txt" WORDLIST_PATH = CACHE_DIR / "google-10000-english.txt" def _download_wordlist() -> None: """Download the EFF word list to the cache directory.""" print("Downloading word list …", file=sys.stderr) with urllib.request.urlopen(WORDLIST_URL) as resp, open( WORDLIST_PATH, "wb" ) as out: out.write(resp.read()) def _load_wordlist() -> List[str]: """Return the list of words (one per line, stripped).""" if not WORDLIST_PATH.is_file(): _download_wordlist() words = [] with open(WORDLIST_PATH, "r", encoding="utf-8") as f: for line in f: # The file format is: 12345 word line = line.strip() if " " in line: parts = line.strip().split() if len(parts) == 2: words.append(parts[1]) else: if len(line) > 4: words.append(line) if not words: raise RuntimeError("Failed to load any words from the word list.") return words # Load once at import time – cheap after the first run WORDLIST = _load_wordlist() # ---------------------------------------------------------------------- # 2️⃣ Core password generator # ---------------------------------------------------------------------- def generate_password( num_words: int = 4, separator: str = " ", capitalize: bool = False, add_digit: bool = False, add_symbol: bool = False, rng: random.Random | None = None, ) -> str: """ Return a password consisting of *num_words* random words. Parameters ---------- num_words: Number of words to concatenate (default 4 → ~52 bits of entropy). separator: String placed between words (default space). Use ``''`` for a “passphrase” without spaces. capitalize: Capitalise the first word (adds ~1 bit of entropy). add_digit: Append a random decimal digit (adds ~3.3 bits). add_symbol: Append a random symbol from ``!@#$%^&*()-_=+[]{};:,.?`` (adds ~5 bits). Returns ------- str The generated password. """ rng = rng or random.SystemRandom() # cryptographically strong RNG chosen = [rng.choice(WORDLIST) for _ in range(num_words)] if capitalize: chosen = [c.capitalize() for c in chosen] # chosen[0] = chosen[0].capitalize() password = separator.join(chosen) if add_digit: password += str(rng.randint(0, 9)) if add_symbol: symbols = "!@#$%^&*()-_=+[]{};:,.?" password += rng.choice(symbols) return password # ---------------------------------------------------------------------- # 3️⃣ Helper to compute entropy (optional but nice to have) # ---------------------------------------------------------------------- def password_entropy(num_words: int, add_digit: bool, add_symbol: bool) -> float: """ Approximate entropy in bits for the given configuration. The EFF list has 7776 words → log₂(7776) ≈ 12.9 bits per word. """ bits_per_word = 12.9 entropy = num_words * bits_per_word if add_digit: entropy += 3.32 # log₂(10) if add_symbol: entropy += 5.0 # log₂(≈32 symbols) return entropy # ---------------------------------------------------------------------- # 4️⃣ Command‑line interface (optional) # ---------------------------------------------------------------------- def _cli() -> None: import argparse parser = argparse.ArgumentParser( description="Generate XKCD-style passphrases." ) parser.add_argument( "-n", "--num-words", type=int, default=4, help="Number of words (default: 4)." ) parser.add_argument( "-s", "--separator", default=" ", help="String placed between words (default: space)." ) parser.add_argument( "-c", "--capitalize", action="store_true", help="Capitalize the first word." ) parser.add_argument( "-d", "--digit", action="store_true", help="Append a random digit." ) parser.add_argument( "-S", "--symbol", action="store_true", help="Append a random symbol." ) args = parser.parse_args() pwd = generate_password( num_words=args.num_words, separator=args.separator, capitalize=args.capitalize, add_digit=args.digit, add_symbol=args.symbol, ) print(pwd) # print(f"≈ {password_entropy(args.num_words, args.digit, args.symbol):.1f} bits of entropy") if __name__ == "__main__": _cli()