Python Strings

String Basics

Strings are sequences of Unicode characters. They are immutable - once created, characters cannot be changed in place.

Python

# Single and double quotes - no difference
s1 = 'Hello, World!'
s2 = "Hello, World!"
print(s1 == s2)     # True

# Use the other quote type to avoid escaping
s3 = "it's a great day"    # single quote inside double quotes
s4 = 'say "hello" to her'  # double quote inside single quotes

# Escape sequences
print("Tab:\there")        # Tab:    here
print("Newline:\nhere")    # Newline:
                            # here
print("Backslash: \\")     # Backslash: \
print("Quote: \"hi\"")     # Quote: "hi"
print("Unicode: ❤")   # Unicode: heart symbol

# String is a sequence - supports len(), indexing, iteration
s = "Python"
print(len(s))       # 6
print(s[0])         # P
print(s[-1])        # n  (negative index from end)
for char in s:
    print(char, end=" ")    # P y t h o n

# Strings are immutable
# s[0] = "J"  # TypeError: 'str' object does not support item assignment

# Concatenation creates a new string
first = "Hello"
second = "World"
result = first + ", " + second + "!"
print(result)       # Hello, World!

# Repetition
print("ha" * 3)     # hahaha
print("-" * 40)     # ----------------------------------------

f-strings

f-strings (formatted string literals) are the modern way to embed expressions inside strings. Prefix the string with f and use {expression} placeholders.

Python

name = "Alice"
age  = 30
pi   = 3.14159265

# Basic f-string
print(f"Hello, {name}!")                    # Hello, Alice!
print(f"Age: {age}, Pi: {pi}")              # Age: 30, Pi: 3.14159265

# Expressions inside braces
print(f"In 5 years: {age + 5}")             # In 5 years: 35
print(f"Uppercase: {name.upper()}")         # Uppercase: ALICE
print(f"Length: {len(name)}")               # Length: 5

# Format specification: {value:format_spec}
print(f"Pi to 2 dp: {pi:.2f}")              # Pi to 2 dp: 3.14
print(f"Pi to 4 dp: {pi:.4f}")              # Pi to 4 dp: 3.1416
print(f"Percentage: {0.754:.1%}")           # Percentage: 75.4%
print(f"Scientific: {1234567:.2e}")         # Scientific: 1.23e+06

# Integer formatting
n = 1234567
print(f"With commas: {n:,}")                # With commas: 1,234,567
print(f"Padded:  {n:>15,}")                 # right-aligned in 15 chars
print(f"Binary:  {255:08b}")                # Binary:  11111111
print(f"Hex:     {255:#x}")                 # Hex:     0xff

# String alignment
print(f"{'left':<10}|")     # left      |
print(f"{'center':^10}|")   #   center  |
print(f"{'right':>10}|")    #      right|

# Nested f-strings (Python 3.12+ allows even = inside)
precision = 3
print(f"Pi: {pi:.{precision}f}")    # Pi: 3.142

# Self-documenting expressions (Python 3.8+) - the = suffix
x = 42
print(f"{x=}")              # x=42  (variable name + value)
print(f"{pi=:.3f}")         # pi=3.142

# Dictionary and attribute access
person = {"name": "Bob", "city": "London"}
print(f"Name: {person['name']}, City: {person['city']}")

# Multiline f-string
message = (
    f"Name:  {name}\n"
    f"Age:   {age}\n"
    f"Score: {9.5:.1f}"
)
print(message)

Output

Hello, Alice!
Age: 30, Pi: 3.14159265
In 5 years: 35
Pi to 2 dp: 3.14
With commas: 1,234,567
x=42

String Methods

Strings have dozens of built-in methods. All methods return a new string - they never modify the original.

Python

s = "  Hello, World!  "

# Case methods
print(s.upper())        # "  HELLO, WORLD!  "
print(s.lower())        # "  hello, world!  "
print(s.title())        # "  Hello, World!  "
print(s.swapcase())     # "  hELLO, wORLD!  "
print(s.capitalize())   # "  hello, world!  " -> "  Hello, world!  "

# Whitespace
print(s.strip())        # "Hello, World!"   - remove both ends
print(s.lstrip())       # "Hello, World!  " - remove left
print(s.rstrip())       # "  Hello, World!" - remove right
print(s.strip(" !H"))   # "ello, World"     - remove specific chars

# Search and test
s2 = "Hello, World!"
print(s2.find("World"))     # 7   (index of first occurrence, -1 if not found)
print(s2.find("xyz"))       # -1
print(s2.index("World"))    # 7   (raises ValueError if not found)
print(s2.count("l"))        # 3   (count occurrences)
print(s2.startswith("Hello"))  # True
print(s2.endswith("!"))        # True
print("123".isdigit())         # True
print("abc".isalpha())         # True
print("abc123".isalnum())      # True
print("  ".isspace())          # True

# Replace and split/join
print(s2.replace("World", "Python"))    # Hello, Python!
print(s2.replace("l", "L", 2))         # HeLLo, World!  (replace first 2)

csv = "apple,banana,cherry"
parts = csv.split(",")      # ['apple', 'banana', 'cherry']
print(parts)
print(",".join(parts))      # apple,banana,cherry  (join with separator)
print(" | ".join(parts))    # apple | banana | cherry

# Split on whitespace (default - splits on any whitespace, removes empties)
print("  hello   world  ".split())     # ['hello', 'world']
print("a,b,,c".split(","))             # ['a', 'b', '', 'c']  (preserves empties)

# Alignment and padding
print("hello".center(11))          # "   hello   "
print("hello".ljust(10, "-"))      # "hello-----"
print("hello".rjust(10, "-"))      # "-----hello"
print("42".zfill(6))               # "000042"   (pad with zeros)

# Check content
print("hello world".split())                   # ['hello', 'world']
words = ["Python", "is", "great"]
print(" ".join(words))                         # Python is great

# Partition - splits into exactly 3 parts at first occurrence
before, sep, after = "key=value".partition("=")
print(before, sep, after)                      # key = value

Method	Returns	Example
`upper()`	Uppercase string	`"hi".upper()` -> `"HI"`
`lower()`	Lowercase string	`"HI".lower()` -> `"hi"`
`strip()`	Trimmed string	`" hi ".strip()` -> `"hi"`
`split(sep)`	List of strings	`"a,b".split(",")` -> `["a","b"]`
`join(iter)`	Joined string	`",".join(["a","b"])` -> `"a,b"`
`replace(old, new)`	New string	`"ab".replace("a","x")` -> `"xb"`
`find(sub)`	int (index or -1)	`"hello".find("ll")` -> `2`
`startswith(pre)`	bool	`"hello".startswith("he")` -> `True`
`format(**kwargs)`	Formatted string	`"{n}".format(n=5)` -> `"5"`

Slicing and Indexing

Strings support the slice notation s[start:stop:step]. All three parts are optional and can be negative.

Python

s = "Hello, Python!"
#    0123456789...

# Single character indexing
print(s[0])     # H  - first character
print(s[7])     # P
print(s[-1])    # !  - last character
print(s[-7])    # P  - 7 from the end

# Slicing: s[start:stop]  (stop is exclusive)
print(s[0:5])   # Hello   - chars 0,1,2,3,4
print(s[7:13])  # Python
print(s[:5])    # Hello   - omit start = 0
print(s[7:])    # Python! - omit stop  = end
print(s[:])     # Hello, Python!  - full copy

# Negative slicing
print(s[-7:])   # Python!  - last 7 characters
print(s[:-1])   # Hello, Python  - all except last

# Step: s[start:stop:step]
print(s[::2])   # Hlo yhn - every 2nd character
print(s[1::2])  # el,Pto! - every 2nd starting from 1
print(s[::-1])  # !nohtyP ,olleH  - reverse the string

# Common patterns
def reverse(s):
    return s[::-1]

def first_n(s, n):
    return s[:n]

def last_n(s, n):
    return s[-n:]

# Slice out a part and replace (strings are immutable - creates new string)
original = "Hello, Java!"
modified = original[:7] + "Python" + original[11:]
print(modified)     # Hello, Python!

# String as sequence
print(list("abc"))    # ['a', 'b', 'c']
print(tuple("abc"))   # ('a', 'b', 'c')

# Membership
print("Py" in s)      # True
print("py" in s)      # False - case sensitive

Multiline Strings

Python

# Triple-quoted strings span multiple lines
poem = """Roses are red,
Violets are blue,
Python is great,
And so are you."""

print(poem)
# Roses are red,
# Violets are blue,
# ...

# Single quotes work too
html = '''
    Title
    Content here
'''

# Implicit string concatenation (adjacent literals are joined)
long_msg = (
    "This is a very long message that we want to "
    "split across multiple lines in source code "
    "without adding actual newlines to the string."
)
print(long_msg)    # One continuous line

# Explicit line continuation with backslash (less preferred)
msg2 = "Part one " \
       "part two " \
       "part three"

# textwrap.dedent removes common leading whitespace
import textwrap

sql = """
    SELECT *
    FROM users
    WHERE active = 1
    ORDER BY name;
"""
print(textwrap.dedent(sql).strip())
# SELECT *
# FROM users
# WHERE active = 1
# ORDER BY name;

# Newline handling
lines = "line1\nline2\nline3"
print(lines.splitlines())   # ['line1', 'line2', 'line3']
print(lines.split("\n"))    # same result here

Raw Strings

Raw strings (prefix r) treat backslashes as literal characters. They are most commonly used for regular expressions and Windows file paths.

Python

# Normal string - backslash is escape character
path1 = "C:\\Users\\Alice\\Documents"   # must double every backslash
print(path1)    # C:\Users\Alice\Documents

# Raw string - backslash is literal
path2 = r"C:\Users\Alice\Documents"    # clean and readable
print(path2)    # C:\Users\Alice\Documents

# Why raw strings matter for regular expressions
import re

# Without raw string - \b means backspace, not word boundary
pattern1 = "\\bword\\b"   # need to escape for regex

# With raw string - clean regex
pattern2 = r"\bword\b"

text = "a word here"
print(re.findall(pattern2, text))   # ['word']

# Other useful raw string escapes preserved literally
print(r"\n")    # \n  (two characters, not newline)
print(r"\t")    # \t  (two characters, not tab)
print(r"\\")    # \\  (two backslashes)

# Raw strings cannot end with an odd number of backslashes
# r"path\"  # SyntaxError - use r"path\\" or "path\\"

# Raw f-strings combine both features
folder = "projects"
file   = "readme.txt"
path3  = rf"C:\Users\Alice\{folder}\{file}"
print(path3)    # C:\Users\Alice\projects\readme.txt

Bytes and Encoding

Python strings are Unicode (UTF-32 internally). When communicating with files, networks, or APIs, you need to encode strings to bytes and decode bytes back to strings.

Python

# str -> bytes (encode)
s = "Hello, World!"
b = s.encode("utf-8")       # b'Hello, World!'
print(type(b))              # 
print(b)                    # b'Hello, World!'

# bytes -> str (decode)
s2 = b.decode("utf-8")
print(s2)                   # Hello, World!
print(type(s2))             # 

# bytes literal - prefix b
data = b"raw bytes"
print(data[0])              # 114  (integer, not character)
print(chr(data[0]))         # r    (convert to character)

# Unicode characters
emoji = "Hello ❤ Python"
print(emoji)                # Hello heart Python

# Different encodings
s3 = "café"          # cafe + combining accent (NFD)
print(s3)                   # cafe + accent mark
print(len(s3))              # 5    (5 code points)
print(len(s3.encode("utf-8")))  # 6  (6 bytes - accent needs 2)

# ASCII encoding - fails on non-ASCII
try:
    "caf\xe9".encode("ascii")   # cafe with e-acute
except UnicodeEncodeError as e:
    print(f"Error: {e}")

# Safe encoding with error handling
s4 = "caf\xe9"
print(s4.encode("ascii", errors="replace"))  # b'caf?'
print(s4.encode("ascii", errors="ignore"))   # b'caf'
print(s4.encode("ascii", errors="xmlcharrefreplace"))  # b'café'

# Reading files with explicit encoding
# Always specify encoding to avoid platform-dependent behavior
# with open("file.txt", "r", encoding="utf-8") as f:
#     content = f.read()   # returns str

# with open("file.bin", "rb") as f:
#     raw = f.read()        # returns bytes

Always specify encoding when opening files

Python's default file encoding depends on the operating system - UTF-8 on Linux/macOS, often Windows-1252 on Windows. Always pass encoding="utf-8" explicitly to open() so your code behaves the same on all platforms: open("file.txt", encoding="utf-8").

Previous Operators

Next Lists

String Basics

f-strings

String Methods

Slicing and Indexing

Multiline Strings

Title

Raw Strings

Bytes and Encoding

Frequently Asked Questions

What is the difference between single and double quotes in Python?

What is the best way to format strings in Python?

How do I check if a string contains a substring?

Are Python strings mutable?

Related Tutorials

Python Data Types

Python Operators

Python Comprehensions