Infinite Iterators
These iterators produce values indefinitely. Always use with islice(), break, or zip() to limit output.
from itertools import count, cycle, repeat, islice
# count(start=0, step=1) - infinite counter
counter = count(10, 5) # 10, 15, 20, 25, ...
print(list(islice(counter, 5))) # [10, 15, 20, 25, 30]
# Practical: generate unique IDs
id_gen = count(1)
users = [{"id": next(id_gen), "name": n} for n in ["Alice", "Bob", "Charlie"]]
print(users)
# [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}, {'id': 3, 'name': 'Charlie'}]
# cycle(iterable) - repeats the iterable infinitely
colors = cycle(["red", "green", "blue"])
palette = list(islice(colors, 7))
print(palette) # ['red', 'green', 'blue', 'red', 'green', 'blue', 'red']
# Round-robin assignment
workers = ["Alice", "Bob", "Charlie"]
tasks = ["T1", "T2", "T3", "T4", "T5", "T6"]
assigned = list(zip(tasks, cycle(workers)))
print(assigned)
# [('T1', 'Alice'), ('T2', 'Bob'), ('T3', 'Charlie'), ('T4', 'Alice'), ...]
# repeat(object, times=None) - repeat a value N times (or infinitely)
print(list(repeat(0, 5))) # [0, 0, 0, 0, 0]
# map with repeat: apply a function with a constant argument
from itertools import starmap
print(list(starmap(pow, zip(range(5), repeat(2))))) # [0, 1, 4, 9, 16] (x**2)
Slicing and Combining
from itertools import (
islice, chain, chain_from_iterable,
zip_longest, takewhile, dropwhile, compress
)
import itertools
# islice(iterable, stop) or islice(iterable, start, stop, step)
gen = (x**2 for x in range(1000))
first_five = list(islice(gen, 5)) # [0, 1, 4, 9, 16]
skip_ten = list(islice(gen, 10, 15)) # skip 10, take 5
# chain(*iterables) - concatenate iterables lazily
a = [1, 2, 3]
b = [4, 5, 6]
c = [7, 8, 9]
all_items = list(chain(a, b, c)) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# chain.from_iterable - flatten one level
nested = [[1, 2], [3, 4], [5, 6]]
flat = list(itertools.chain.from_iterable(nested)) # [1, 2, 3, 4, 5, 6]
# zip_longest - zip to the longest, filling with fillvalue
a = [1, 2, 3]
b = [10, 20]
print(list(zip_longest(a, b, fillvalue=0))) # [(1, 10), (2, 20), (3, 0)]
# takewhile - take items while predicate is true
nums = [1, 3, 5, 7, 2, 4, 8] # unsorted for demo
below_6 = list(takewhile(lambda x: x < 6, nums))
print(below_6) # [1, 3, 5] (stops at 7, even though 2 and 4 come later)
# dropwhile - skip items while predicate is true, then take the rest
rest = list(dropwhile(lambda x: x < 6, nums))
print(rest) # [7, 2, 4, 8]
# compress - filter with a boolean selector
data = ['A', 'B', 'C', 'D', 'E']
selector = [True, False, True, True, False]
selected = list(compress(data, selector))
print(selected) # ['A', 'C', 'D']
Combinatorics
from itertools import product, permutations, combinations, combinations_with_replacement
# product - Cartesian product (all combinations across iterables)
colors = ["red", "blue"]
sizes = ["S", "M", "L"]
for c, s in product(colors, sizes):
print(f"{c}-{s}", end=" ")
# red-S red-M red-L blue-S blue-M blue-L
# product with repeat - equivalent to N nested loops
print(list(product([0, 1], repeat=3)))
# [(0,0,0),(0,0,1),(0,1,0),(0,1,1),(1,0,0),(1,0,1),(1,1,0),(1,1,1)]
# permutations - all ordered arrangements
letters = ['A', 'B', 'C']
print(list(permutations(letters))) # all 6 orderings (3! = 6)
print(list(permutations(letters, 2))) # length-2 permutations (6 items)
# combinations - unordered subsets (order doesn't matter)
print(list(combinations(letters, 2)))
# [('A', 'B'), ('A', 'C'), ('B', 'C')] -- no ('B', 'A'), etc.
print(list(combinations([1, 2, 3, 4], 2)))
# [(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)] -- C(4,2)=6 items
# combinations_with_replacement - allow repeats
print(list(combinations_with_replacement(['a', 'b', 'c'], 2)))
# [('a','a'),('a','b'),('a','c'),('b','b'),('b','c'),('c','c')]
# Practical: test all pairs from a list
servers = ["server1", "server2", "server3"]
print("Testing latency between:")
for a, b in combinations(servers, 2):
print(f" {a} <-> {b}")
# server1 <-> server2
# server1 <-> server3
# server2 <-> server3
# Count without generating
from math import comb, perm, factorial
print(f"C(10, 3) = {comb(10, 3)}") # 120 - how many without listing all
groupby and filterfalse
from itertools import groupby, filterfalse
# groupby - groups CONSECUTIVE elements with the same key
# MUST sort first if you want all same-key items together
data = [
{"name": "Alice", "dept": "Eng"},
{"name": "Bob", "dept": "HR"},
{"name": "Charlie", "dept": "Eng"},
{"name": "Dave", "dept": "HR"},
{"name": "Eve", "dept": "Eng"},
]
# Sort by department first
sorted_data = sorted(data, key=lambda x: x["dept"])
# Then group
for dept, employees in groupby(sorted_data, key=lambda x: x["dept"]):
names = [e["name"] for e in employees]
print(f"{dept}: {names}")
# Eng: ['Alice', 'Charlie', 'Eve']
# HR: ['Bob', 'Dave']
# groupby on consecutive identical values
text_data = "AAABBBCCDDDDEE"
for char, group in groupby(text_data):
print(f"{char}: {len(list(group))}", end=" ")
# A: 3 B: 3 C: 2 D: 4 E: 2
# Run-length encoding
def rle(s):
return [(char, len(list(group))) for char, group in groupby(s)]
print(rle("AAABBBCC")) # [('A', 3), ('B', 3), ('C', 2)]
# filterfalse - opposite of filter (keep items where predicate is False)
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
odd = list(filterfalse(lambda x: x % 2 == 0, numbers))
print(odd) # [1, 3, 5, 7, 9] (items where is_even is False)
# Partition into two groups
def partition(pred, iterable):
from itertools import tee, filterfalse
t1, t2 = tee(iterable)
return filterfalse(pred, t1), filter(pred, t2)
is_even = lambda x: x % 2 == 0
odds, evens = partition(is_even, range(10))
print(list(odds)) # [1, 3, 5, 7, 9]
print(list(evens)) # [0, 2, 4, 6, 8]
accumulate
from itertools import accumulate
import operator
nums = [1, 2, 3, 4, 5]
# Default: running sum
print(list(accumulate(nums))) # [1, 3, 6, 10, 15]
# Running product
print(list(accumulate(nums, operator.mul))) # [1, 2, 6, 24, 120]
# Running maximum
data = [3, 1, 4, 1, 5, 9, 2, 6]
print(list(accumulate(data, max))) # [3, 3, 4, 4, 5, 9, 9, 9]
# With initial value (Python 3.8+)
print(list(accumulate(nums, initial=100))) # [100, 101, 103, 106, 110, 115]
# Running balance (bank account)
transactions = [1000, -200, -150, 500, -75]
balances = list(accumulate(transactions, initial=0))
print(balances) # [0, 1000, 800, 650, 1150, 1075]
# Cumulative sum for percentages
weights = [0.2, 0.3, 0.15, 0.35]
cumulative = list(accumulate(weights))
print(cumulative) # [0.2, 0.5, 0.65, 1.0]
Iterator Pipelines
from itertools import chain, islice, filterfalse
import itertools
# Build efficient data pipelines without loading everything into memory
# Pipeline: read multiple files, filter lines, process
def pipeline_example():
# Simulate multiple data sources
source1 = ["Alice,30,Eng", "Bob,25,HR"]
source2 = ["Charlie,35,Eng", "", "Dave,28,Finance", "# comment"]
# Chain all sources
all_lines = chain(source1, source2)
# Filter empty and comment lines
valid = (line for line in all_lines if line.strip() and not line.startswith("#"))
# Parse each line
parsed = (line.split(",") for line in valid)
# Convert types
records = (
{"name": parts[0], "age": int(parts[1]), "dept": parts[2]}
for parts in parsed
)
return list(records)
print(pipeline_example())
# Batching with islice
def batched(iterable, n):
"""Yield successive n-sized chunks from iterable."""
it = iter(iterable)
while (batch := list(islice(it, n))):
yield batch
data = range(10)
for batch in batched(data, 3):
print(batch)
# [0, 1, 2]
# [3, 4, 5]
# [6, 7, 8]
# [9]
# Note: Python 3.12 added itertools.batched() natively