Python File Handling

Read and write files in Python: open(), the with statement, all file modes, binary files, pathlib, and CSV/JSON operations.

Beginner 11 min read 10 examples

open() and Modes

The open() function opens a file and returns a file object. Always use the with statement to ensure the file is closed.

Python
# open(file, mode='r', encoding=None)
# Always specify encoding for text files

# 'r' - read (default): file must exist
with open("data.txt", "r", encoding="utf-8") as f:
    content = f.read()

# 'w' - write: creates or overwrites the file
with open("output.txt", "w", encoding="utf-8") as f:
    f.write("Hello, World!\n")

# 'a' - append: creates or adds to end of file
with open("log.txt", "a", encoding="utf-8") as f:
    f.write("New entry\n")

# 'x' - exclusive creation: fails if file exists
try:
    with open("new_file.txt", "x", encoding="utf-8") as f:
        f.write("Created fresh")
except FileExistsError:
    print("File already exists")

# 'r+' - read and write (file must exist)
with open("data.txt", "r+", encoding="utf-8") as f:
    content = f.read()
    f.seek(0)           # go back to start
    f.write("NEW: " + content)

# Binary modes: add 'b' to any mode
# 'rb', 'wb', 'ab', 'rb+'
with open("image.png", "rb") as f:
    header = f.read(8)  # read first 8 bytes
ModeOperationFile must exist?Truncates?
rReadYesNo
wWriteNo (creates)Yes
aAppendNo (creates)No
xExclusive createNo (fails if exists)No
r+Read+writeYesNo
w+Write+readNo (creates)Yes
+ bBinary mode--

Reading Files

Python
from pathlib import Path

# Create test file
Path("sample.txt").write_text("Line 1\nLine 2\nLine 3\n", encoding="utf-8")

# read() - entire file as one string
with open("sample.txt", encoding="utf-8") as f:
    content = f.read()
    print(repr(content))    # 'Line 1\nLine 2\nLine 3\n'

# readlines() - list of lines (includes \n)
with open("sample.txt", encoding="utf-8") as f:
    lines = f.readlines()
    print(lines)    # ['Line 1\n', 'Line 2\n', 'Line 3\n']

# readline() - one line at a time
with open("sample.txt", encoding="utf-8") as f:
    first  = f.readline()   # 'Line 1\n'
    second = f.readline()   # 'Line 2\n'

# Iterate directly - most memory-efficient for large files
with open("sample.txt", encoding="utf-8") as f:
    for line in f:
        print(line.strip())     # removes trailing \n

# Read and strip lines
with open("sample.txt", encoding="utf-8") as f:
    lines = [line.strip() for line in f if line.strip()]

# Read with error handling
try:
    with open("missing.txt", encoding="utf-8") as f:
        data = f.read()
except FileNotFoundError:
    data = ""
except PermissionError as e:
    print(f"No permission: {e}")
    data = ""

# Read with seek / tell (file position)
with open("sample.txt", encoding="utf-8") as f:
    print(f.tell())     # 0 - position at start
    chunk = f.read(7)   # read 7 characters
    print(chunk)        # 'Line 1\n'
    print(f.tell())     # 7
    f.seek(0)           # back to start
    print(f.read(6))    # 'Line 1'

Writing Files

Python
# write() - write a string
with open("output.txt", "w", encoding="utf-8") as f:
    f.write("Hello, World!\n")
    f.write("Second line\n")

# writelines() - write a list of strings (no auto newlines)
lines = ["Alice\n", "Bob\n", "Charlie\n"]
with open("names.txt", "w", encoding="utf-8") as f:
    f.writelines(lines)

# Append to file
with open("log.txt", "a", encoding="utf-8") as f:
    import datetime
    timestamp = datetime.datetime.now().isoformat()
    f.write(f"[{timestamp}] Application started\n")

# Write formatted data
data = [
    {"name": "Alice", "score": 95},
    {"name": "Bob",   "score": 87},
]
with open("results.txt", "w", encoding="utf-8") as f:
    f.write("Name     Score\n")
    f.write("-" * 16 + "\n")
    for row in data:
        f.write(f"{row['name']:<8} {row['score']}\n")

# print() can write to a file
with open("debug.txt", "w", encoding="utf-8") as f:
    print("Debug output:", file=f)
    print("Value:", 42, file=f)

# Atomic write - write to temp file, then rename (safe)
import os
import tempfile

def atomic_write(filepath, content, encoding="utf-8"):
    """Write content to filepath atomically."""
    dir_name = os.path.dirname(os.path.abspath(filepath))
    with tempfile.NamedTemporaryFile(
        mode="w", encoding=encoding,
        dir=dir_name, delete=False, suffix=".tmp"
    ) as tmp:
        tmp.write(content)
        tmp_name = tmp.name
    os.replace(tmp_name, filepath)  # atomic on most OS
Always specify encoding="utf-8"

Python's default encoding depends on the OS (UTF-8 on Linux/macOS, often cp1252 on Windows). Always pass encoding="utf-8" explicitly to open() to ensure consistent behavior across platforms. This prevents UnicodeDecodeError on Windows when reading files created on macOS/Linux.

Binary Files

Python
# Binary mode - no encoding, bytes not str
import struct

# Write binary data
with open("data.bin", "wb") as f:
    f.write(b"\x89PNG\r\n\x1a\n")      # write bytes literal
    f.write(bytes([72, 101, 108, 108, 111]))  # bytes from list of ints

# Read binary data
with open("data.bin", "rb") as f:
    header = f.read(4)                  # first 4 bytes
    print(header)                       # b'\x89PNG'
    rest   = f.read()                   # rest of file

# Check file signature (magic bytes)
def is_png(filepath):
    with open(filepath, "rb") as f:
        return f.read(8) == b"\x89PNG\r\n\x1a\n"

# Read in chunks (large files)
def read_chunks(filepath, chunk_size=8192):
    with open(filepath, "rb") as f:
        while chunk := f.read(chunk_size):
            yield chunk

# Copy a file in binary mode
def copy_file(src, dst):
    with open(src, "rb") as src_f, open(dst, "wb") as dst_f:
        for chunk in read_chunks(src):
            dst_f.write(chunk)

# struct - pack/unpack binary data (like C structs)
# Format: < = little-endian, I = unsigned int, f = float
packed = struct.pack("

pathlib

pathlib.Path is the modern, object-oriented way to work with file paths. It handles OS differences automatically.

Python
from pathlib import Path

# Create paths
p = Path("/home/alice/documents/report.pdf")

# Path components
print(p.name)       # report.pdf
print(p.stem)       # report
print(p.suffix)     # .pdf
print(p.parent)     # /home/alice/documents
print(p.parts)      # ('/', 'home', 'alice', 'documents', 'report.pdf')

# Path construction with / operator
base    = Path("/home/alice")
file    = base / "projects" / "main.py"
print(file)         # /home/alice/projects/main.py

# Current directory and home
cwd  = Path.cwd()   # current working directory
home = Path.home()  # user home directory

# Check existence and type
p2 = Path("sample.txt")
print(p2.exists())      # True/False
print(p2.is_file())     # True if regular file
print(p2.is_dir())      # True if directory

# Read and write (shorthand methods)
path = Path("hello.txt")
path.write_text("Hello, World!", encoding="utf-8")
content = path.read_text(encoding="utf-8")
print(content)      # Hello, World!

path.write_bytes(b"binary data")
data = path.read_bytes()

# Directory operations
(Path("mydir") / "subdir").mkdir(parents=True, exist_ok=True)

# List directory contents
for f in Path(".").iterdir():
    print(f.name)

# Glob patterns
py_files = list(Path(".").glob("*.py"))             # current dir
all_py   = list(Path(".").rglob("**/*.py"))          # recursive

# File info
print(p2.stat().st_size)        # file size in bytes
print(p2.stat().st_mtime)       # modification time

# Rename and delete
p2.rename("renamed.txt")
p2.unlink(missing_ok=True)      # delete file (no error if missing)

CSV and JSON Files

Python
import csv
import json

# --- CSV ---

# Write CSV
rows = [
    ["name",    "age", "city"],
    ["Alice",   30,    "London"],
    ["Bob",     25,    "Paris"],
    ["Charlie", 35,    "Berlin"],
]
with open("people.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerows(rows)

# Read CSV
with open("people.csv", "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    header = next(reader)           # first row is header
    for row in reader:
        print(row)                  # ['Alice', '30', 'London'] (all strings)

# DictReader - each row as a dict
with open("people.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    people = list(reader)

for person in people:
    print(f"{person['name']} is {person['age']} from {person['city']}")

# DictWriter - write dicts as CSV
with open("output.csv", "w", newline="", encoding="utf-8") as f:
    fieldnames = ["name", "age", "city"]
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(people)

# --- JSON ---

data = {
    "users": [
        {"name": "Alice", "age": 30, "active": True},
        {"name": "Bob",   "age": 25, "active": False},
    ]
}

# Write JSON to file
with open("users.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

# Read JSON from file
with open("users.json", "r", encoding="utf-8") as f:
    loaded = json.load(f)

print(loaded["users"][0]["name"])   # Alice

# Pretty print for debugging
print(json.dumps(data, indent=2))

Frequently Asked Questions

The with statement ensures the file is closed when the block exits - even if an exception occurs. Without it, if your code raises an exception before reaching f.close(), the file handle is leaked. On Windows, an open file cannot be deleted or renamed. Always use with open(...) as f: - it is the standard Pythonic approach.

read() reads the entire file into one string (or bytes). readline() reads one line at a time (including the trailing newline). readlines() reads all lines and returns a list. For large files, iterating directly over the file object (for line in f:) is most memory-efficient - it reads one line at a time without loading everything into memory.

pathlib.Path (Python 3.4+) provides an object-oriented interface for file system paths. It is more readable than os.path string manipulation: Path("a") / "b" / "c.txt" vs os.path.join("a", "b", "c.txt"). It also handles platform differences (Windows backslash vs Unix forward slash) automatically. Modern Python code should use pathlib over os.path.

Use a try/except block: try: with open("file.txt") as f: content = f.read() except FileNotFoundError: content = "". With pathlib: p = Path("file.txt"); content = p.read_text() if p.exists() else "". Avoid the pattern of checking existence before opening (if os.path.exists(...): open(...)) - there is a race condition between the check and the open.