Regex Basics
A regular expression is a pattern for matching text. In JavaScript, create one with a literal /pattern/flags or with new RegExp(pattern, flags).
// Regex literal - preferred for fixed patterns
const re1 = /hello/;
const re2 = /hello/gi; // with flags
// RegExp constructor - when pattern is dynamic
const word = 'hello';
const re3 = new RegExp(word, 'gi');
const re4 = new RegExp('^\\d+$'); // escape backslashes in strings
// Basic matching - literal characters
/cat/.test('The cat sat'); // true
/cat/.test('The dog ran'); // false
// Special characters need escaping with \
/\./.test('3.14'); // matches literal dot (. alone matches any char)
/\$/.test('$100'); // matches literal dollar sign
/\(/.test('(a+b)'); // matches literal open paren
// . matches any character except newline
/c.t/.test('cat'); // true
/c.t/.test('cot'); // true
/c.t/.test('ct'); // false (needs exactly one character between c and t)
// ^ and $ - anchors
/^hello/.test('hello world'); // true (starts with hello)
/world$/.test('hello world'); // true (ends with world)
/^hello$/.test('hello'); // true (exactly 'hello')
/^hello$/.test('say hello'); // false
Flags
| Flag | Name | Effect |
|---|---|---|
g | global | Find all matches, not just the first |
i | ignoreCase | Case-insensitive matching |
m | multiline | ^ and $ match start/end of each line |
s | dotAll | . matches newlines too |
u | unicode | Full Unicode mode (required for Unicode property escapes) |
d | indices | Add start/end indices to match results |
// i - case insensitive
/hello/i.test('Hello World'); // true
/hello/i.test('HELLO'); // true
// g - global (find all matches)
'cat bat rat'.match(/[a-z]at/g); // ['cat', 'bat', 'rat']
'cat bat rat'.match(/[a-z]at/); // ['cat'] (no g = only first match)
// m - multiline: ^ and $ match per line, not just start/end of string
const text = 'line one\nline two\nline three';
text.match(/^line/gm); // ['line', 'line', 'line'] - all three
text.match(/^line/); // ['line'] - only first without m
// s - dotAll: . matches newline characters too
/hello.world/.test('hello\nworld'); // false (. doesn't match \n by default)
/hello.world/s.test('hello\nworld'); // true (dotAll)
// Combining flags
const emailRe = /[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}/gi;
const found = 'Contact: alice@example.com or BOB@test.org'.match(emailRe);
// ['alice@example.com', 'BOB@test.org']
Character Classes and Quantifiers
// Character classes
/[aeiou]/.test('hello'); // true - matches any vowel
/[^aeiou]/.test('hello'); // true - [^] negates: any non-vowel
/[a-z]/.test('hello'); // true - lowercase letter
/[A-Z]/.test('Hello'); // true - uppercase letter
/[0-9]/.test('abc123'); // true - digit
// Shorthand character classes
// \d - digit [0-9]
// \D - non-digit [^0-9]
// \w - word character [a-zA-Z0-9_]
// \W - non-word character
// \s - whitespace (space, tab, newline)
// \S - non-whitespace
/\d+/.test('abc123'); // true (one or more digits)
/^\d+$/.test('12345'); // true (only digits)
/^\d+$/.test('123a5'); // false (contains 'a')
/\s/.test('hello world'); // true (space)
/\w+/.test('hello_123'); // true (word characters)
// Quantifiers
// * - 0 or more
// + - 1 or more
// ? - 0 or 1 (optional)
// {n} - exactly n times
// {n,} - n or more times
// {n,m} - between n and m times
/ca*t/.test('ct'); // true (0 a's)
/ca*t/.test('caat'); // true (2 a's)
/ca+t/.test('ct'); // false (+ requires at least 1)
/ca+t/.test('cat'); // true
/colou?r/.test('color'); // true (u is optional)
/colou?r/.test('colour'); // true
/\d{4}/.test('2024'); // true (exactly 4 digits)
/\d{2,4}/.test('123'); // true (2 to 4 digits)
/\d{4,}/.test('12345'); // true (4 or more)
// Lazy (non-greedy) quantifiers - match as little as possible
const html = '<b>bold</b> and <i>italic</i>';
html.match(/<.+>/)[0]; // '<b>bold</b> and <i>italic</i>' (greedy - as much as possible)
html.match(/<.+?>/)[0]; // '<b>' (lazy - as little as possible)
test() and exec()
// test() - returns boolean (fastest for simple checks)
const emailRe = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
emailRe.test('alice@example.com'); // true
emailRe.test('not-an-email'); // false
// exec() - returns a match array or null (includes capture groups)
const dateRe = /(\d{4})-(\d{2})-(\d{2})/;
const match = dateRe.exec('Today is 2024-06-15 and tomorrow is 2024-06-16');
if (match) {
console.log(match[0]); // '2024-06-15' (full match)
console.log(match[1]); // '2024' (group 1)
console.log(match[2]); // '06' (group 2)
console.log(match[3]); // '15' (group 3)
console.log(match.index); // 9 (position of match)
}
// exec() with g flag - iterate all matches
const re = /\d+/g;
const input = 'scores: 95, 87, 91';
let m;
while ((m = re.exec(input)) !== null) {
console.log(`Found ${m[0]} at index ${m.index}`);
}
// Found 95 at index 8
// Found 87 at index 12
// Found 91 at index 16
// CAUTION: exec() with g stores state in re.lastIndex
// Reuse the same /regex/g variable carefully - or use matchAll() instead
String match, matchAll, replace, search, split
const text = 'Prices: $19.99 and $5.50 and $299.00';
// match() with g - array of all matched strings (no groups)
const prices = text.match(/\$[\d.]+/g);
// ['$19.99', '$5.50', '$299.00']
// matchAll() with g - iterator of full result objects (with groups)
const moneyRe = /\$(?<dollars>\d+)\.(?<cents>\d{2})/g;
for (const m of text.matchAll(moneyRe)) {
console.log(m[0]); // '$19.99'
console.log(m.groups.dollars); // '19'
console.log(m.groups.cents); // '99'
console.log(m.index); // position
}
// Collect all matches to array
const allMatches = [...text.matchAll(moneyRe)];
// replace() - replace first match (without g) or all (with g)
'hello world'.replace(/o/, '0'); // 'hell0 world' (first only)
'hello world'.replace(/o/g, '0'); // 'hell0 w0rld' (all)
// replace() with function - compute replacement dynamically
const formatted = text.replace(/\$[\d.]+/g, match => {
return '**' + match + '**'; // bold each price
});
// replace() with capture groups
'2024-06-15'.replace(/(\d{4})-(\d{2})-(\d{2})/, '$3/$2/$1');
// '15/06/2024' - $1 $2 $3 are the groups
// replaceAll() - simpler than replace+g for fixed strings
'cat cat cat'.replaceAll('cat', 'dog'); // 'dog dog dog'
// Note: replaceAll() with a regex requires the g flag
// search() - returns index of first match or -1
text.search(/\$[\d.]+/); // 9 (index of first price)
text.search(/xyz/); // -1
// split() - split on regex pattern
'one two three'.split(/\s+/); // ['one', 'two', 'three'] (any whitespace run)
Groups and Backreferences
// Capturing group (...) - saves the matched text
const m = /(\d{4})-(\d{2})-(\d{2})/.exec('2024-06-15');
console.log(m[1], m[2], m[3]); // '2024' '06' '15'
// Non-capturing group (?:...) - groups without saving (better performance)
/(?:Mr|Ms|Dr)\.?\s+(\w+)/.exec('Dr. Alice Smith');
// [0]: 'Dr. Alice' - full match
// [1]: 'Alice' - only the name is captured (Dr/Ms/Mr group is non-capturing)
// Alternation | inside groups
/^(cat|dog|bird)$/.test('cat'); // true
/^(cat|dog|bird)$/.test('fish'); // false
// Backreferences - \1 refers to the first captured group
// Match repeated words
/\b(\w+)\s+\1\b/.test('hello hello'); // true - same word twice
/\b(\w+)\s+\1\b/.test('hello world'); // false
// Backreference in replace
// Swap first and last name
'Smith, Alice'.replace(/(\w+),\s*(\w+)/, '$2 $1');
// 'Alice Smith'
// Groups in matchAll
const tagRe = /<(\w+)>(.*?)<\/\1>/gs; // \1 = backreference to tag name
const html = '<p>Hello</p><span>World</span>';
for (const [, tag, content] of html.matchAll(tagRe)) {
console.log(tag, ':', content); // 'p: Hello', 'span: World'
}
Lookahead and Lookbehind
Zero-width assertions - they check what is around the match without including it in the matched text.
// Positive lookahead (?=...) - match only if followed by pattern
'100px 200em 300rem'.match(/\d+(?=px)/g); // ['100'] - only digits before px
// Negative lookahead (?!...) - match only if NOT followed by pattern
'cat catch catfish'.match(/cat(?!ch|fish)/g); // ['cat'] - standalone cat only
// Positive lookbehind (?<=...) - match only if preceded by pattern
'$100 €200 £300'.match(/(?<=\$)\d+/g); // ['100'] - only after dollar sign
// Negative lookbehind (?<!...) - match only if NOT preceded by pattern
'100px 200em 300'.match(/(?<!\d)\d+(?!\d)/g); // may need adjustment per use case
// Practical: password strength check
function checkPassword(pw) {
const hasUpper = /(?=.*[A-Z])/.test(pw);
const hasLower = /(?=.*[a-z])/.test(pw);
const hasDigit = /(?=.*\d)/.test(pw);
const hasSpecial = /(?=.*[!@#$%^&*])/.test(pw);
const minLength = pw.length >= 8;
return hasUpper && hasLower && hasDigit && hasSpecial && minLength;
}
// Extract amounts with specific currency
const prices = '$100 €200 £300 $50';
const usdPrices = prices.match(/(?<=\$)\d+/g); // ['100', '50']
// Insert comma separators in numbers (3 digits at a time)
function formatNumber(n) {
return n.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ',');
}
formatNumber(1234567); // '1,234,567'
Named Capture Groups
// Named groups: (?<name>pattern)
const dateRe = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;
const match = dateRe.exec('2024-06-15');
if (match) {
const { year, month, day } = match.groups;
console.log(year, month, day); // '2024' '06' '15'
}
// Named groups in replace with $<name>
'2024-06-15'.replace(
/(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/,
'$<day>/$<month>/$<year>'
);
// '15/06/2024'
// Named groups in replace with a function
'2024-06-15'.replace(
/(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/,
(match, p1, p2, p3, offset, string, groups) => {
return `${groups.day}/${groups.month}/${groups.year}`;
}
);
// Named backreference: \k<name>
// Match HTML tag with matching closing tag
/<(?<tag>\w+)>.*?<\/\k<tag>>/s.test('<p>hello</p>'); // true
/<(?<tag>\w+)>.*?<\/\k<tag>>/s.test('<p>hello</div>'); // false
// matchAll with named groups
const log = '2024-06-15 ERROR server crash\n2024-06-16 INFO started';
const logRe = /(?<date>\d{4}-\d{2}-\d{2})\s(?<level>\w+)\s(?<msg>.+)/gm;
for (const { groups } of log.matchAll(logRe)) {
console.log(`[${groups.date}] ${groups.level}: ${groups.msg}`);
}
Practical Patterns
// Email (simplified - fully compliant email regex is very complex)
const EMAIL = /^[^\s@]+@[^\s@]+\.[^\s@]{2,}$/i;
EMAIL.test('alice@example.com'); // true
EMAIL.test('alice@'); // false
// Phone number (flexible - 10 digits with optional separators)
const PHONE = /^[\+]?[(]?[0-9]{3}[)]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}$/;
PHONE.test('555-123-4567'); // true
PHONE.test('+1 (555) 123-4567'); // true
// URL slug (lowercase letters, digits, hyphens)
const SLUG = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
SLUG.test('my-blog-post'); // true
SLUG.test('My Blog Post'); // false
// Hex color
const HEX_COLOR = /^#(?:[0-9a-fA-F]{3}){1,2}$/;
HEX_COLOR.test('#fff'); // true
HEX_COLOR.test('#3b82f6'); // true
HEX_COLOR.test('#gg0000'); // false
// Extract key=value pairs from a query string
function parseQueryString(qs) {
const params = {};
for (const [, key, value] of qs.matchAll(/([^=&]+)=([^&]*)/g)) {
params[decodeURIComponent(key)] = decodeURIComponent(value);
}
return params;
}
parseQueryString('name=Alice&age=30&city=Chennai');
// { name: 'Alice', age: '30', city: 'Chennai' }
// Sanitize HTML - escape special characters for safe display
function escapeHtml(str) {
return str.replace(/[&<>"']/g, ch => ({
'&': '&', '<': '<', '>': '>',
'"': '"', "'": '''
}[ch]));
}
// Camel case to kebab-case
'backgroundColor'.replace(/([A-Z])/g, '-$1').toLowerCase(); // 'background-color'
// Trim multiple spaces to single space
'hello world foo'.replace(/\s{2,}/g, ' '); // 'hello world foo'
Use an online regex tester (like regex101.com) to visualize matches, test edge cases, and understand why a pattern works. Regex bugs are subtle - a missing anchor or wrong quantifier can silently match or reject inputs you did not intend. Always test with valid inputs, invalid inputs, and edge cases (empty string, very long input).