SQL

Note

Unless otherwise specified, these notes are written with MySQL in mind.

Important

The order of keywords matters! The keywords below are listed in the order they should be used.

General

Variables

SET @start_date = '2023-01-01'; -- semicolon is important

SELECT *
FROM users
WHERE created_at > @start_date

Dates & Times

Important

Make sure to put dates in quotes! Without quotes, 2022-12-25 will be interpreted as "the number 2,022 minus 12 minus 25"

# created_at is a timestamp, which looks like '2022-12-01 14:27:08'
WHERE DATE(created_at) <= '2022-12-25'
# or
WHERE TIME(created_at) > '12:00:00'
WHERE
    created_at >= '2022-12-25'
AND
    created_at < '2022-12-26'

INTERVAL

WHERE created_date >= NOW() - INTERVAL 1 DAY

Keywords

SELECT / AS / FROM

SELECT
    id,
    name,
    owner_name AS owner
FROM pets

SELECT DISTINCT

SELECT DISTINCT owner_name
FROM pets
SELECT DISTINCT name, owner_name
FROM pets

CASE

SELECT
    a.name,
    CASE
        WHEN a.rating <= 2 THEN 'Bad'
        WHEN a.rating = 3 THEN 'Average'
        WHEN a.rating >= 4 THEN 'Good'
    END AS quality
FROM
    albums a
UPDATE
    albums a
SET
    a.album_type = CASE
        WHEN a.track_count = 1 THEN 'Single'
        WHEN a.track_count <= 5 THEN 'EP'
        # fallback to existing value, otherwise it will be set to NULL
        ELSE a.album_type
    END

INSERT

INSERT INTO pets (name, species, age, owner_id)
VALUES ('Rover', 'dog', 3, 1)
INSERT INTO pets (name, species, age, owner_id)
VALUES ('Rover', 'dog', 3, 1), ('Spot', 'dog', 1, 1)

INSERT IGNORE

INSERT INTO pets (id, name) VALUES (1, 'Rover')

INSERT INTO pets (id, name) VALUES (1, 'Spot') # errors

INSERT IGNORE INTO pets (id, name) VALUES (1, 'Spot') # doesn't error, does nothing

REPLACE (upsert)

REPLACE INTO pets (id, name) VALUES (1, 'Rover') # inserts (if the database is empty)

REPLACE INTO pets (id, name) VALUES (1, 'Spot')

Conditional inserts

INSERT INTO pets (name, species, age, owner_id)
SELECT 'Rover', 'dog', 3, 1
WHERE NOT EXISTS (SELECT * FROM pets WHERE species = 'dog' AND owner_id = 1)

UPDATE / SET

Warning

Don't forget the WHERE clause - otherwise you'll update every row in the table!

UPDATE
    pets p
SET
    p.sound = 'meow',
    p.fluffy = 1
WHERE
    p.species = 'cat'

JOIN

SELECT
	u.id,
	u.name,
	p.name AS petName,
	p.species
FROM
	database.users u
	JOIN database.pets p ON p.owner_id = u.id

sqlJoins_7.webp

USING

SELECT users.id, pets.id, address
FROM users
JOIN pets USING (address)

WHERE

SELECT *
FROM pets
WHERE
    species = 'cat'
    OR species = 'dog'
    AND age >= 1

Boolean and null checking

SELECT *
FROM pets
WHERE
    first_name IS NOT NULL
    AND last_name IS NULL

Check if a match exists

SELECT 1
FROM pets
WHERE p.species = 'cat'
LIMIT 1

LIKE (pattern matching)

WHERE name LIKE '%alice%'
WHERE name LIKE BINARY '%Alice%'

REGEXP_LIKE (regular expressions)

WHERE REGEXP_LIKE(name, '\\w+_\\d{4}_\\d{2}_\\d{2}')

IN (list of values)

Warning

The order of the returned rows doesn't necessarily match the order of the given values!

WHERE name IN ('Alice', 'Bob')

MEMBER_OF (in JSON array)

WHERE id MEMBER OF ('[1, 2, 3]')
-- if `user.relationships` was a JSON field with something like '{ 123: 'sibling' }'
WHERE CONVERT(user.id, CHAR) MEMBER OF (JSON_KEYS(user.relationships))

BETWEEN (ranges)

WHERE id BETWEEN 100 AND 200

EXISTS and NOT EXISTS

WHERE EXISTS ( ... )
AND EXISTS ( ... )
AND NOT EXISTS ( ... )

Select rows that share a value with other rows

SELECT a.*
FROM pets a
WHERE EXISTS (
    SELECT 1
    FROM pets b
    WHERE a.owner_id = b.owner_id
    AND a.id <> b.id
)

Select rows that don't meet a condition in another table

SELECT *
FROM users u
WHERE NOT EXISTS (
    SELECT 1
    FROM pets p
    WHERE u.id = p.owner_id
    AND p.species = 'cat'
)

HAVING

List all values that match more than one row

SELECT first_name, COUNT(*)
FROM users
GROUP BY first_name
HAVING COUNT(*) > 1

GROUP BY

SELECT owner_id, COUNT(*)
FROM pets
GROUP BY owner_id

GROUP BY and ordering

SELECT owner_id, name, age
FROM pets
GROUP BY owner_id
ORDER BY age DESC
SELECT owner_id, name, MAX(age)
FROM pets
GROUP BY owner_id
ORDER BY age DESC

ORDER BY

SELECT id, name
FROM pets
ORDER BY id DESC -- or ASC

LIMIT / OFFSET

SELECT id
FROM pets
LIMIT 5
OFFSET 10

WITH / AS (subqueries)

WITH ids AS (
    SELECT
        a.id
    FROM
        albums a
    WHERE
        a.stars = 5
    LIMIT 10
)

UPDATE
    albums a
SET
    a.recommended = 1
WHERE
    a.id IN (SELECT * FROM ids)
WITH foo AS (
    ...
), bar AS (
    ...
)
SELECT
    foo.id AS foo, bar.id AS bar
FROM
    foo
    JOIN bar USING (id)

Functions

Warning

Do not put a space between the function name and parentheses!

Aggregate functions (COUNT, MAX, MIN, SUM, AVG)

Select based on an aggregate

SELECT
    MIN(birth_date) as birth_date,
    id
FROM
    pets
WHERE
    species = "dog"
GROUP BY
    species

Count number of distinct values

SELECT COUNT(DISTINCT first_name) ...

CAST

Format Notes/Example
DATE YYYY-MM-DD
DATETIME YYYY-MM-DD HH:MM:SS
DECIMAL takes two parameters (M, D) for max integer digits & number of decimal digits
TIME HH:MM:SS
CHAR fixed length string
NCHAR string with the national character set
SIGNED signed 64-bit int
UNSIGNED unsigned 64-bit int
BINARY binary string

LENGTH (string length)

SELECT *
FROM pets
WHERE length(first_name) <= 5

JSON

JSON_OBJECT

SET u.metadata = JSON_OBJECT('is_enrolled', TRUE, 'is_mobile', FALSE)

JSON_KEYS

SELECT JSON_KEYS(user.metadata)

JSON_CONTAINS_PATH

WHERE JSON_CONTAINS_PATH(p.metadata, 'one', '$.processed_date')

WHERE JSON_CONTAINS_PATH(p.metadata, 'all', '$.created_date', '$.processed_date')

JSON_EXTRACT

SELECT JSON_EXTRACT(column_name, '$.version') ...
SELECT column_name->"$.version"
DATE(JSON_UNQUOTE(JSON_EXTRACT(p.metadata, '$.created_date')))
CAST(JSON_UNQUOTE(JSON_EXTRACT(p.metadata, '$.created_date')) as datetime)

JSON_SET, JSON_INSERT, JSON_REPLACE

UPDATE
	users u
SET
	u.metadata = CASE WHEN u.metadata IS NULL
	THEN JSON_OBJECT('is_enrolled', TRUE)
	ELSE JSON_SET(u.metadata, '$.is_enrolled', TRUE)
	END
WHERE
	u.id = 12345

JSON_REMOVE

UPDATE
    users u
SET
    u.metadata = JSON_REMOVE(u.metadata, '$.is_enrolled')
    END
WHERE
    u.id = 12345

See also

Creating tables

CREATE TABLE pets (
    id int NOT NULL AUTO_INCREMENT,
    owner_id int NOT NULL,
    name varchar(255),
    FOREIGN KEY (owner_id) REFERENCES Users(id),
    PRIMARY KEY (id)
);

Transactions

START TRANSACTION;

-- destructive code goes here - don't forget a semicolon at the end
;

-- add a SELECT here to verify that the changes look good
;

ROLLBACK;
-- or to commit the changes use COMMIT;

Configuration

DESCRIBE

DESCRIBE database.users

Auto increment

ALTER TABLE 'users' AUTO_INCREMENT = 1;

SQL mode

Get

SELECT @@GLOBAL.sql_mode;

Set

SET GLOBAL sql_mode = 'NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';

Time zone

Get

SELECT @@global.time_zone;
SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP);

Set

SET @@global.time_zone = '+00:00';

Renaming columns

Renaming existing columns can cause deployed code to break. One approach to safely renaming columns:

  1. update the code to handle both the old and new names and deploy
  2. rename the column
  3. update the code to remove the old name

Another approach:

  1. add a new column with the new name and copy the data over
  2. update the code to use the new column and deploy
  3. drop the old column

PostgreSQL differences from MySQL

user_id integer;
quantity numeric(5) DEFAULT 32;
url varchar := 'http://mysite.com';
transaction_time CONSTANT timestamp with time zone := now();
myrow tablename%ROWTYPE;
myfield tablename.columnname%TYPE;
arow RECORD;
'AB' || 'CD' -> 'ABCD'

'ABC' || 123 -> 'ABC123'

Arrays

CREATE TABLE sal_emp (
    name            text,
    pay_by_quarter  integer[],
    schedule        text[][]
);

INSERT INTO sal_emp
    VALUES ('Bill',
    '{10000, 10000, 10000, 10000}',
    '{{"meeting", "lunch"}, {"training", "presentation"}}');
-- or
INSERT INTO sal_emp
    VALUES ('Bill',
    ARRAY[10000, 10000, 10000, 10000],
    ARRAY[['meeting', 'lunch'], ['training', 'presentation']]);

SELECT pay_by_quarter[3] FROM sal_emp;

-- if using slices, use them for all dimensions
SELECT schedule[1:2][1:1] FROM sal_emp WHERE name = 'Bill';

UPDATE sal_emp SET pay_by_quarter[1:2] = '{27000,27000}'
    WHERE name = 'Carol';
ARRAY[1, 2] || ARRAY[3, 4] -> {1, 2, 3, 4}

ARRAY[1, 2, 3] || 4 -> {1, 2, 3, 4}

0 || ARRAY[1, 2, 3] -> {0, 1, 2, 3}
WHERE 10000 = ANY (pay_by_quarter)