open() and Modes
The open() function opens a file and returns a file object. Always use the with statement to ensure the file is closed.
# open(file, mode='r', encoding=None)
# Always specify encoding for text files
# 'r' - read (default): file must exist
with open("data.txt", "r", encoding="utf-8") as f:
content = f.read()
# 'w' - write: creates or overwrites the file
with open("output.txt", "w", encoding="utf-8") as f:
f.write("Hello, World!\n")
# 'a' - append: creates or adds to end of file
with open("log.txt", "a", encoding="utf-8") as f:
f.write("New entry\n")
# 'x' - exclusive creation: fails if file exists
try:
with open("new_file.txt", "x", encoding="utf-8") as f:
f.write("Created fresh")
except FileExistsError:
print("File already exists")
# 'r+' - read and write (file must exist)
with open("data.txt", "r+", encoding="utf-8") as f:
content = f.read()
f.seek(0) # go back to start
f.write("NEW: " + content)
# Binary modes: add 'b' to any mode
# 'rb', 'wb', 'ab', 'rb+'
with open("image.png", "rb") as f:
header = f.read(8) # read first 8 bytes
| Mode | Operation | File must exist? | Truncates? |
|---|---|---|---|
r | Read | Yes | No |
w | Write | No (creates) | Yes |
a | Append | No (creates) | No |
x | Exclusive create | No (fails if exists) | No |
r+ | Read+write | Yes | No |
w+ | Write+read | No (creates) | Yes |
+ b | Binary mode | - | - |
Reading Files
from pathlib import Path
# Create test file
Path("sample.txt").write_text("Line 1\nLine 2\nLine 3\n", encoding="utf-8")
# read() - entire file as one string
with open("sample.txt", encoding="utf-8") as f:
content = f.read()
print(repr(content)) # 'Line 1\nLine 2\nLine 3\n'
# readlines() - list of lines (includes \n)
with open("sample.txt", encoding="utf-8") as f:
lines = f.readlines()
print(lines) # ['Line 1\n', 'Line 2\n', 'Line 3\n']
# readline() - one line at a time
with open("sample.txt", encoding="utf-8") as f:
first = f.readline() # 'Line 1\n'
second = f.readline() # 'Line 2\n'
# Iterate directly - most memory-efficient for large files
with open("sample.txt", encoding="utf-8") as f:
for line in f:
print(line.strip()) # removes trailing \n
# Read and strip lines
with open("sample.txt", encoding="utf-8") as f:
lines = [line.strip() for line in f if line.strip()]
# Read with error handling
try:
with open("missing.txt", encoding="utf-8") as f:
data = f.read()
except FileNotFoundError:
data = ""
except PermissionError as e:
print(f"No permission: {e}")
data = ""
# Read with seek / tell (file position)
with open("sample.txt", encoding="utf-8") as f:
print(f.tell()) # 0 - position at start
chunk = f.read(7) # read 7 characters
print(chunk) # 'Line 1\n'
print(f.tell()) # 7
f.seek(0) # back to start
print(f.read(6)) # 'Line 1'
Writing Files
# write() - write a string
with open("output.txt", "w", encoding="utf-8") as f:
f.write("Hello, World!\n")
f.write("Second line\n")
# writelines() - write a list of strings (no auto newlines)
lines = ["Alice\n", "Bob\n", "Charlie\n"]
with open("names.txt", "w", encoding="utf-8") as f:
f.writelines(lines)
# Append to file
with open("log.txt", "a", encoding="utf-8") as f:
import datetime
timestamp = datetime.datetime.now().isoformat()
f.write(f"[{timestamp}] Application started\n")
# Write formatted data
data = [
{"name": "Alice", "score": 95},
{"name": "Bob", "score": 87},
]
with open("results.txt", "w", encoding="utf-8") as f:
f.write("Name Score\n")
f.write("-" * 16 + "\n")
for row in data:
f.write(f"{row['name']:<8} {row['score']}\n")
# print() can write to a file
with open("debug.txt", "w", encoding="utf-8") as f:
print("Debug output:", file=f)
print("Value:", 42, file=f)
# Atomic write - write to temp file, then rename (safe)
import os
import tempfile
def atomic_write(filepath, content, encoding="utf-8"):
"""Write content to filepath atomically."""
dir_name = os.path.dirname(os.path.abspath(filepath))
with tempfile.NamedTemporaryFile(
mode="w", encoding=encoding,
dir=dir_name, delete=False, suffix=".tmp"
) as tmp:
tmp.write(content)
tmp_name = tmp.name
os.replace(tmp_name, filepath) # atomic on most OS
Python's default encoding depends on the OS (UTF-8 on Linux/macOS, often cp1252 on Windows). Always pass encoding="utf-8" explicitly to open() to ensure consistent behavior across platforms. This prevents UnicodeDecodeError on Windows when reading files created on macOS/Linux.
Binary Files
# Binary mode - no encoding, bytes not str
import struct
# Write binary data
with open("data.bin", "wb") as f:
f.write(b"\x89PNG\r\n\x1a\n") # write bytes literal
f.write(bytes([72, 101, 108, 108, 111])) # bytes from list of ints
# Read binary data
with open("data.bin", "rb") as f:
header = f.read(4) # first 4 bytes
print(header) # b'\x89PNG'
rest = f.read() # rest of file
# Check file signature (magic bytes)
def is_png(filepath):
with open(filepath, "rb") as f:
return f.read(8) == b"\x89PNG\r\n\x1a\n"
# Read in chunks (large files)
def read_chunks(filepath, chunk_size=8192):
with open(filepath, "rb") as f:
while chunk := f.read(chunk_size):
yield chunk
# Copy a file in binary mode
def copy_file(src, dst):
with open(src, "rb") as src_f, open(dst, "wb") as dst_f:
for chunk in read_chunks(src):
dst_f.write(chunk)
# struct - pack/unpack binary data (like C structs)
# Format: < = little-endian, I = unsigned int, f = float
packed = struct.pack("
pathlib
pathlib.Path is the modern, object-oriented way to work with file paths. It handles OS differences automatically.
from pathlib import Path
# Create paths
p = Path("/home/alice/documents/report.pdf")
# Path components
print(p.name) # report.pdf
print(p.stem) # report
print(p.suffix) # .pdf
print(p.parent) # /home/alice/documents
print(p.parts) # ('/', 'home', 'alice', 'documents', 'report.pdf')
# Path construction with / operator
base = Path("/home/alice")
file = base / "projects" / "main.py"
print(file) # /home/alice/projects/main.py
# Current directory and home
cwd = Path.cwd() # current working directory
home = Path.home() # user home directory
# Check existence and type
p2 = Path("sample.txt")
print(p2.exists()) # True/False
print(p2.is_file()) # True if regular file
print(p2.is_dir()) # True if directory
# Read and write (shorthand methods)
path = Path("hello.txt")
path.write_text("Hello, World!", encoding="utf-8")
content = path.read_text(encoding="utf-8")
print(content) # Hello, World!
path.write_bytes(b"binary data")
data = path.read_bytes()
# Directory operations
(Path("mydir") / "subdir").mkdir(parents=True, exist_ok=True)
# List directory contents
for f in Path(".").iterdir():
print(f.name)
# Glob patterns
py_files = list(Path(".").glob("*.py")) # current dir
all_py = list(Path(".").rglob("**/*.py")) # recursive
# File info
print(p2.stat().st_size) # file size in bytes
print(p2.stat().st_mtime) # modification time
# Rename and delete
p2.rename("renamed.txt")
p2.unlink(missing_ok=True) # delete file (no error if missing)
CSV and JSON Files
import csv
import json
# --- CSV ---
# Write CSV
rows = [
["name", "age", "city"],
["Alice", 30, "London"],
["Bob", 25, "Paris"],
["Charlie", 35, "Berlin"],
]
with open("people.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(rows)
# Read CSV
with open("people.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader) # first row is header
for row in reader:
print(row) # ['Alice', '30', 'London'] (all strings)
# DictReader - each row as a dict
with open("people.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
people = list(reader)
for person in people:
print(f"{person['name']} is {person['age']} from {person['city']}")
# DictWriter - write dicts as CSV
with open("output.csv", "w", newline="", encoding="utf-8") as f:
fieldnames = ["name", "age", "city"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(people)
# --- JSON ---
data = {
"users": [
{"name": "Alice", "age": 30, "active": True},
{"name": "Bob", "age": 25, "active": False},
]
}
# Write JSON to file
with open("users.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Read JSON from file
with open("users.json", "r", encoding="utf-8") as f:
loaded = json.load(f)
print(loaded["users"][0]["name"]) # Alice
# Pretty print for debugging
print(json.dumps(data, indent=2))