Creating Sets
Sets are unordered collections of unique, hashable elements. Duplicate values are automatically removed.
# Literal syntax - curly braces
fruits = {"apple", "banana", "cherry"}
numbers = {1, 2, 3, 4, 5}
# Duplicates are removed automatically
deduped = {1, 2, 2, 3, 3, 3}
print(deduped) # {1, 2, 3}
# set() constructor - from any iterable
from_list = set([1, 2, 3, 2, 1]) # {1, 2, 3}
from_string = set("hello") # {'h', 'e', 'l', 'o'} (unique chars)
from_range = set(range(5)) # {0, 1, 2, 3, 4}
# IMPORTANT: empty set must use set(), not {}
empty_set = set() # correct: set
empty_dict = {} # this is an empty DICT, not a set
print(type(empty_set)) #
print(type(empty_dict)) #
# Sets are unordered - no guaranteed order
s = {5, 1, 3, 2, 4}
print(s) # might print {1, 2, 3, 4, 5} or any order
# Membership test - O(1) hash table lookup
print("banana" in fruits) # True
print("grape" not in fruits) # True
# Size
print(len(fruits)) # 3
# Add and remove
fruits.add("mango")
fruits.discard("grape") # safe: no error if not in set
fruits.remove("apple") # raises KeyError if not found
popped = fruits.pop() # remove and return arbitrary element
# Iteration (unordered)
for fruit in sorted(fruits):
print(fruit)
Set Operations
Python supports the standard mathematical set operations using both operators and methods.
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7}
# Union - all elements from both sets
print(a | b) # {1, 2, 3, 4, 5, 6, 7}
print(a.union(b)) # same (union() accepts any iterable)
# Intersection - elements in both sets
print(a & b) # {3, 4, 5}
print(a.intersection(b)) # same
# Difference - in a but not in b
print(a - b) # {1, 2}
print(a.difference(b)) # same
print(b - a) # {6, 7}
# Symmetric difference - in either, but not both
print(a ^ b) # {1, 2, 6, 7}
print(a.symmetric_difference(b)) # same
# Subset and superset
small = {3, 4}
print(small.issubset(a)) # True - all of small in a
print(small <= a) # True - same
print(a.issuperset(small)) # True - a contains all of small
print(a >= small) # True - same
print(small < a) # True - proper subset (small != a)
# Disjoint - no common elements
print({1, 2}.isdisjoint({3, 4})) # True
print({1, 2}.isdisjoint({2, 3})) # False
# In-place operations (modify the set)
c = {1, 2, 3}
c |= {3, 4, 5} # union update: {1, 2, 3, 4, 5}
c &= {2, 3, 4} # intersection update: {2, 3, 4}
c -= {3} # difference update: {2, 4}
c ^= {2, 5} # symmetric diff update: {4, 5}
print(c) # {4, 5}
| Operation | Operator | Method | Result |
|---|---|---|---|
| Union | a | b | a.union(b) | All elements from both |
| Intersection | a & b | a.intersection(b) | Elements in both |
| Difference | a - b | a.difference(b) | In a but not b |
| Sym. Difference | a ^ b | a.symmetric_difference(b) | In one but not both |
Set Methods
s = {1, 2, 3}
# Mutating methods
s.add(4) # add single element: {1, 2, 3, 4}
s.update([5, 6], {7}) # add multiple iterables: {1, ..., 7}
s.remove(7) # remove (KeyError if missing)
s.discard(99) # remove (no error if missing)
item = s.pop() # remove and return arbitrary element
# Non-mutating methods (return new set)
# (see set-operations.py for union/intersection/difference)
copy = s.copy()
# Test methods
print({1, 2} <= {1, 2, 3}) # True (subset)
print({1, 2} < {1, 2, 3}) # True (proper subset)
print({1, 2, 3} >= {1, 2}) # True (superset)
print({1, 2}.isdisjoint({3})) # True (no overlap)
# Convert to sorted list for display
print(sorted(s))
# Multiple intersection
sets = [{1, 2, 3}, {2, 3, 4}, {2, 3, 5}]
common = set.intersection(*sets) # {2, 3}
print(common)
frozenset
A frozenset is an immutable set. Because it is hashable, it can be used as a dictionary key or stored inside another set.
# frozenset is immutable
fs = frozenset([1, 2, 3, 2, 1])
print(fs) # frozenset({1, 2, 3})
# Cannot be modified
# fs.add(4) # AttributeError
# fs.remove(1) # AttributeError
# All read-only set operations work
a = frozenset({1, 2, 3})
b = frozenset({2, 3, 4})
print(a | b) # frozenset({1, 2, 3, 4})
print(a & b) # frozenset({2, 3})
print(a - b) # frozenset({1})
# Hashable - can be a dict key
permissions = {
frozenset({"read"}): "read-only",
frozenset({"read", "write"}): "editor",
frozenset({"read", "write", "admin"}): "admin",
}
user_perms = frozenset({"read", "write"})
print(permissions[user_perms]) # editor
# Stored inside a set (set of sets)
unique_groups = {frozenset({1, 2}), frozenset({3, 4}), frozenset({1, 2})}
print(len(unique_groups)) # 2 (duplicate removed)
Set Comprehensions
# Basic: {expression for item in iterable}
squares = {x**2 for x in range(1, 6)}
print(squares) # {1, 4, 9, 16, 25} (unordered)
# With condition
even_squares = {x**2 for x in range(10) if x % 2 == 0}
print(even_squares) # {0, 4, 16, 36, 64}
# Unique first characters
words = ["apple", "banana", "cherry", "avocado"]
first_chars = {w[0] for w in words}
print(first_chars) # {'a', 'b', 'c'}
# Deduplicate a list (preserving case, unordered)
data = ["Alice", "bob", "Alice", "BOB", "charlie"]
unique = {name.lower() for name in data}
print(unique) # {'alice', 'bob', 'charlie'}
Practical Use Cases
# 1. Remove duplicates from a list
emails = ["alice@example.com", "bob@example.com", "alice@example.com"]
unique_emails = list(set(emails))
print(unique_emails)
# 2. Fast membership check (O(1) vs O(n) for list)
VALID_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
def is_valid_image(filename):
ext = filename.rsplit(".", 1)[-1].lower()
return f".{ext}" in VALID_EXTENSIONS
print(is_valid_image("photo.jpg")) # True
print(is_valid_image("virus.exe")) # False
# 3. Find common elements between two collections
group_a = {"Alice", "Bob", "Charlie", "Dave"}
group_b = {"Bob", "Eve", "Charlie", "Frank"}
both_groups = group_a & group_b
print(f"In both: {both_groups}") # {'Bob', 'Charlie'}
only_a = group_a - group_b
only_b = group_b - group_a
print(f"Only A: {only_a}") # {'Alice', 'Dave'}
print(f"Only B: {only_b}") # {'Eve', 'Frank'}
# 4. Find missing items
required = {"name", "email", "age", "city"}
provided = {"name", "email"}
missing = required - provided
print(f"Missing fields: {missing}") # {'age', 'city'}
# 5. Deduplicate while preserving order (Python 3.7+)
data = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3]
seen = set()
unique = []
for x in data:
if x not in seen:
seen.add(x)
unique.append(x)
print(unique) # [3, 1, 4, 5, 9, 2, 6] - order preserved
# Shorter alternative using dict (Python 3.7+)
unique2 = list(dict.fromkeys(data))
print(unique2) # [3, 1, 4, 5, 9, 2, 6]