String Basics
Strings are sequences of Unicode characters. They are immutable - once created, characters cannot be changed in place.
# Single and double quotes - no difference
s1 = 'Hello, World!'
s2 = "Hello, World!"
print(s1 == s2) # True
# Use the other quote type to avoid escaping
s3 = "it's a great day" # single quote inside double quotes
s4 = 'say "hello" to her' # double quote inside single quotes
# Escape sequences
print("Tab:\there") # Tab: here
print("Newline:\nhere") # Newline:
# here
print("Backslash: \\") # Backslash: \
print("Quote: \"hi\"") # Quote: "hi"
print("Unicode: ❤") # Unicode: heart symbol
# String is a sequence - supports len(), indexing, iteration
s = "Python"
print(len(s)) # 6
print(s[0]) # P
print(s[-1]) # n (negative index from end)
for char in s:
print(char, end=" ") # P y t h o n
# Strings are immutable
# s[0] = "J" # TypeError: 'str' object does not support item assignment
# Concatenation creates a new string
first = "Hello"
second = "World"
result = first + ", " + second + "!"
print(result) # Hello, World!
# Repetition
print("ha" * 3) # hahaha
print("-" * 40) # ----------------------------------------
f-strings
f-strings (formatted string literals) are the modern way to embed expressions inside strings. Prefix the string with f and use {expression} placeholders.
name = "Alice"
age = 30
pi = 3.14159265
# Basic f-string
print(f"Hello, {name}!") # Hello, Alice!
print(f"Age: {age}, Pi: {pi}") # Age: 30, Pi: 3.14159265
# Expressions inside braces
print(f"In 5 years: {age + 5}") # In 5 years: 35
print(f"Uppercase: {name.upper()}") # Uppercase: ALICE
print(f"Length: {len(name)}") # Length: 5
# Format specification: {value:format_spec}
print(f"Pi to 2 dp: {pi:.2f}") # Pi to 2 dp: 3.14
print(f"Pi to 4 dp: {pi:.4f}") # Pi to 4 dp: 3.1416
print(f"Percentage: {0.754:.1%}") # Percentage: 75.4%
print(f"Scientific: {1234567:.2e}") # Scientific: 1.23e+06
# Integer formatting
n = 1234567
print(f"With commas: {n:,}") # With commas: 1,234,567
print(f"Padded: {n:>15,}") # right-aligned in 15 chars
print(f"Binary: {255:08b}") # Binary: 11111111
print(f"Hex: {255:#x}") # Hex: 0xff
# String alignment
print(f"{'left':<10}|") # left |
print(f"{'center':^10}|") # center |
print(f"{'right':>10}|") # right|
# Nested f-strings (Python 3.12+ allows even = inside)
precision = 3
print(f"Pi: {pi:.{precision}f}") # Pi: 3.142
# Self-documenting expressions (Python 3.8+) - the = suffix
x = 42
print(f"{x=}") # x=42 (variable name + value)
print(f"{pi=:.3f}") # pi=3.142
# Dictionary and attribute access
person = {"name": "Bob", "city": "London"}
print(f"Name: {person['name']}, City: {person['city']}")
# Multiline f-string
message = (
f"Name: {name}\n"
f"Age: {age}\n"
f"Score: {9.5:.1f}"
)
print(message)
Hello, Alice! Age: 30, Pi: 3.14159265 In 5 years: 35 Pi to 2 dp: 3.14 With commas: 1,234,567 x=42
String Methods
Strings have dozens of built-in methods. All methods return a new string - they never modify the original.
s = " Hello, World! "
# Case methods
print(s.upper()) # " HELLO, WORLD! "
print(s.lower()) # " hello, world! "
print(s.title()) # " Hello, World! "
print(s.swapcase()) # " hELLO, wORLD! "
print(s.capitalize()) # " hello, world! " -> " Hello, world! "
# Whitespace
print(s.strip()) # "Hello, World!" - remove both ends
print(s.lstrip()) # "Hello, World! " - remove left
print(s.rstrip()) # " Hello, World!" - remove right
print(s.strip(" !H")) # "ello, World" - remove specific chars
# Search and test
s2 = "Hello, World!"
print(s2.find("World")) # 7 (index of first occurrence, -1 if not found)
print(s2.find("xyz")) # -1
print(s2.index("World")) # 7 (raises ValueError if not found)
print(s2.count("l")) # 3 (count occurrences)
print(s2.startswith("Hello")) # True
print(s2.endswith("!")) # True
print("123".isdigit()) # True
print("abc".isalpha()) # True
print("abc123".isalnum()) # True
print(" ".isspace()) # True
# Replace and split/join
print(s2.replace("World", "Python")) # Hello, Python!
print(s2.replace("l", "L", 2)) # HeLLo, World! (replace first 2)
csv = "apple,banana,cherry"
parts = csv.split(",") # ['apple', 'banana', 'cherry']
print(parts)
print(",".join(parts)) # apple,banana,cherry (join with separator)
print(" | ".join(parts)) # apple | banana | cherry
# Split on whitespace (default - splits on any whitespace, removes empties)
print(" hello world ".split()) # ['hello', 'world']
print("a,b,,c".split(",")) # ['a', 'b', '', 'c'] (preserves empties)
# Alignment and padding
print("hello".center(11)) # " hello "
print("hello".ljust(10, "-")) # "hello-----"
print("hello".rjust(10, "-")) # "-----hello"
print("42".zfill(6)) # "000042" (pad with zeros)
# Check content
print("hello world".split()) # ['hello', 'world']
words = ["Python", "is", "great"]
print(" ".join(words)) # Python is great
# Partition - splits into exactly 3 parts at first occurrence
before, sep, after = "key=value".partition("=")
print(before, sep, after) # key = value
| Method | Returns | Example |
|---|---|---|
upper() | Uppercase string | "hi".upper() -> "HI" |
lower() | Lowercase string | "HI".lower() -> "hi" |
strip() | Trimmed string | " hi ".strip() -> "hi" |
split(sep) | List of strings | "a,b".split(",") -> ["a","b"] |
join(iter) | Joined string | ",".join(["a","b"]) -> "a,b" |
replace(old, new) | New string | "ab".replace("a","x") -> "xb" |
find(sub) | int (index or -1) | "hello".find("ll") -> 2 |
startswith(pre) | bool | "hello".startswith("he") -> True |
format(**kwargs) | Formatted string | "{n}".format(n=5) -> "5" |
Slicing and Indexing
Strings support the slice notation s[start:stop:step]. All three parts are optional and can be negative.
s = "Hello, Python!"
# 0123456789...
# Single character indexing
print(s[0]) # H - first character
print(s[7]) # P
print(s[-1]) # ! - last character
print(s[-7]) # P - 7 from the end
# Slicing: s[start:stop] (stop is exclusive)
print(s[0:5]) # Hello - chars 0,1,2,3,4
print(s[7:13]) # Python
print(s[:5]) # Hello - omit start = 0
print(s[7:]) # Python! - omit stop = end
print(s[:]) # Hello, Python! - full copy
# Negative slicing
print(s[-7:]) # Python! - last 7 characters
print(s[:-1]) # Hello, Python - all except last
# Step: s[start:stop:step]
print(s[::2]) # Hlo yhn - every 2nd character
print(s[1::2]) # el,Pto! - every 2nd starting from 1
print(s[::-1]) # !nohtyP ,olleH - reverse the string
# Common patterns
def reverse(s):
return s[::-1]
def first_n(s, n):
return s[:n]
def last_n(s, n):
return s[-n:]
# Slice out a part and replace (strings are immutable - creates new string)
original = "Hello, Java!"
modified = original[:7] + "Python" + original[11:]
print(modified) # Hello, Python!
# String as sequence
print(list("abc")) # ['a', 'b', 'c']
print(tuple("abc")) # ('a', 'b', 'c')
# Membership
print("Py" in s) # True
print("py" in s) # False - case sensitive
Multiline Strings
# Triple-quoted strings span multiple lines
poem = """Roses are red,
Violets are blue,
Python is great,
And so are you."""
print(poem)
# Roses are red,
# Violets are blue,
# ...
# Single quotes work too
html = '''
Title
Content here
'''
# Implicit string concatenation (adjacent literals are joined)
long_msg = (
"This is a very long message that we want to "
"split across multiple lines in source code "
"without adding actual newlines to the string."
)
print(long_msg) # One continuous line
# Explicit line continuation with backslash (less preferred)
msg2 = "Part one " \
"part two " \
"part three"
# textwrap.dedent removes common leading whitespace
import textwrap
sql = """
SELECT *
FROM users
WHERE active = 1
ORDER BY name;
"""
print(textwrap.dedent(sql).strip())
# SELECT *
# FROM users
# WHERE active = 1
# ORDER BY name;
# Newline handling
lines = "line1\nline2\nline3"
print(lines.splitlines()) # ['line1', 'line2', 'line3']
print(lines.split("\n")) # same result here
Raw Strings
Raw strings (prefix r) treat backslashes as literal characters. They are most commonly used for regular expressions and Windows file paths.
# Normal string - backslash is escape character
path1 = "C:\\Users\\Alice\\Documents" # must double every backslash
print(path1) # C:\Users\Alice\Documents
# Raw string - backslash is literal
path2 = r"C:\Users\Alice\Documents" # clean and readable
print(path2) # C:\Users\Alice\Documents
# Why raw strings matter for regular expressions
import re
# Without raw string - \b means backspace, not word boundary
pattern1 = "\\bword\\b" # need to escape for regex
# With raw string - clean regex
pattern2 = r"\bword\b"
text = "a word here"
print(re.findall(pattern2, text)) # ['word']
# Other useful raw string escapes preserved literally
print(r"\n") # \n (two characters, not newline)
print(r"\t") # \t (two characters, not tab)
print(r"\\") # \\ (two backslashes)
# Raw strings cannot end with an odd number of backslashes
# r"path\" # SyntaxError - use r"path\\" or "path\\"
# Raw f-strings combine both features
folder = "projects"
file = "readme.txt"
path3 = rf"C:\Users\Alice\{folder}\{file}"
print(path3) # C:\Users\Alice\projects\readme.txt
Bytes and Encoding
Python strings are Unicode (UTF-32 internally). When communicating with files, networks, or APIs, you need to encode strings to bytes and decode bytes back to strings.
# str -> bytes (encode)
s = "Hello, World!"
b = s.encode("utf-8") # b'Hello, World!'
print(type(b)) #
print(b) # b'Hello, World!'
# bytes -> str (decode)
s2 = b.decode("utf-8")
print(s2) # Hello, World!
print(type(s2)) #
# bytes literal - prefix b
data = b"raw bytes"
print(data[0]) # 114 (integer, not character)
print(chr(data[0])) # r (convert to character)
# Unicode characters
emoji = "Hello ❤ Python"
print(emoji) # Hello heart Python
# Different encodings
s3 = "café" # cafe + combining accent (NFD)
print(s3) # cafe + accent mark
print(len(s3)) # 5 (5 code points)
print(len(s3.encode("utf-8"))) # 6 (6 bytes - accent needs 2)
# ASCII encoding - fails on non-ASCII
try:
"caf\xe9".encode("ascii") # cafe with e-acute
except UnicodeEncodeError as e:
print(f"Error: {e}")
# Safe encoding with error handling
s4 = "caf\xe9"
print(s4.encode("ascii", errors="replace")) # b'caf?'
print(s4.encode("ascii", errors="ignore")) # b'caf'
print(s4.encode("ascii", errors="xmlcharrefreplace")) # b'café'
# Reading files with explicit encoding
# Always specify encoding to avoid platform-dependent behavior
# with open("file.txt", "r", encoding="utf-8") as f:
# content = f.read() # returns str
# with open("file.bin", "rb") as f:
# raw = f.read() # returns bytes
Python's default file encoding depends on the operating system - UTF-8 on Linux/macOS, often Windows-1252 on Windows. Always pass encoding="utf-8" explicitly to open() so your code behaves the same on all platforms: open("file.txt", encoding="utf-8").