Files
sshworkshop/namepicker/xkcd_passwords.py
2025-11-04 21:10:31 +01:00

183 lines
5.6 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
XKCD “correct-horse-battery-staple” password generator.
Features
--------
* Uses the official EFF large word list (7776 words, ~12.9bits/word).
* Caches the list in the user's cache directory.
* Fully typeannotated, no external dependencies beyond the standard library.
* Optional extra digit / symbol and customizable separator.
"""
import hashlib
import os
import random
import sys
import urllib.request
from pathlib import Path
from typing import List
# ----------------------------------------------------------------------
# 1⃣ Load / cache the word list
# ----------------------------------------------------------------------
WORDLIST_URL = (
# "https://www.eff.org/files/2016/07/18/eff_large_wordlist.txt"
"https://github.com/first20hours/google-10000-english/raw/refs/heads/master/google-10000-english.txt"
)
CACHE_DIR = Path(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache")) / "xkcd_passgen"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
# WORDLIST_PATH = CACHE_DIR / "eff_large_wordlist.txt"
WORDLIST_PATH = CACHE_DIR / "google-10000-english.txt"
def _download_wordlist() -> None:
"""Download the EFF word list to the cache directory."""
print("Downloading word list …", file=sys.stderr)
with urllib.request.urlopen(WORDLIST_URL) as resp, open(
WORDLIST_PATH, "wb"
) as out:
out.write(resp.read())
def _load_wordlist() -> List[str]:
"""Return the list of words (one per line, stripped)."""
if not WORDLIST_PATH.is_file():
_download_wordlist()
words = []
with open(WORDLIST_PATH, "r", encoding="utf-8") as f:
for line in f:
# The file format is: 12345 word
line = line.strip()
if " " in line:
parts = line.strip().split()
if len(parts) == 2:
words.append(parts[1])
else:
if len(line) > 4:
words.append(line)
if not words:
raise RuntimeError("Failed to load any words from the word list.")
return words
# Load once at import time cheap after the first run
WORDLIST = _load_wordlist()
# ----------------------------------------------------------------------
# 2⃣ Core password generator
# ----------------------------------------------------------------------
def generate_password(
num_words: int = 4,
separator: str = " ",
capitalize: bool = False,
add_digit: bool = False,
add_symbol: bool = False,
rng: random.Random | None = None,
) -> str:
"""
Return a password consisting of *num_words* random words.
Parameters
----------
num_words:
Number of words to concatenate (default4 → ~52bits of entropy).
separator:
String placed between words (default space). Use ``''`` for a
“passphrase” without spaces.
capitalize:
Capitalise the first word (adds ~1bit of entropy).
add_digit:
Append a random decimal digit (adds ~3.3bits).
add_symbol:
Append a random symbol from ``!@#$%^&*()-_=+[]{};:,.?`` (adds ~5bits).
Returns
-------
str
The generated password.
"""
rng = rng or random.SystemRandom() # cryptographically strong RNG
chosen = [rng.choice(WORDLIST) for _ in range(num_words)]
if capitalize:
chosen = [c.capitalize() for c in chosen]
# chosen[0] = chosen[0].capitalize()
password = separator.join(chosen)
if add_digit:
password += str(rng.randint(0, 9))
if add_symbol:
symbols = "!@#$%^&*()-_=+[]{};:,.?"
password += rng.choice(symbols)
return password
# ----------------------------------------------------------------------
# 3⃣ Helper to compute entropy (optional but nice to have)
# ----------------------------------------------------------------------
def password_entropy(num_words: int, add_digit: bool, add_symbol: bool) -> float:
"""
Approximate entropy in bits for the given configuration.
The EFF list has 7776 words → log₂(7776) ≈ 12.9bits per word.
"""
bits_per_word = 12.9
entropy = num_words * bits_per_word
if add_digit:
entropy += 3.32 # log₂(10)
if add_symbol:
entropy += 5.0 # log₂(≈32 symbols)
return entropy
# ----------------------------------------------------------------------
# 4⃣ Commandline interface (optional)
# ----------------------------------------------------------------------
def _cli() -> None:
import argparse
parser = argparse.ArgumentParser(
description="Generate XKCD-style passphrases."
)
parser.add_argument(
"-n", "--num-words", type=int, default=4,
help="Number of words (default: 4)."
)
parser.add_argument(
"-s", "--separator", default=" ",
help="String placed between words (default: space)."
)
parser.add_argument(
"-c", "--capitalize", action="store_true",
help="Capitalize the first word."
)
parser.add_argument(
"-d", "--digit", action="store_true",
help="Append a random digit."
)
parser.add_argument(
"-S", "--symbol", action="store_true",
help="Append a random symbol."
)
args = parser.parse_args()
pwd = generate_password(
num_words=args.num_words,
separator=args.separator,
capitalize=args.capitalize,
add_digit=args.digit,
add_symbol=args.symbol,
)
print(pwd)
# print(f"≈ {password_entropy(args.num_words, args.digit, args.symbol):.1f} bits of entropy")
if __name__ == "__main__":
_cli()