PostgreSQL—The original NoSQL

Peter Eisentraut

peter@eisentraut.org
@petereisentraut@mastodon.social

https://www.enterprisedb.com/

Introduction

Peter Eisentraut

A short history of NoSQL

PostQUEL

create person (name = text, age = int4, location = point)
create city (name = char16, location = box, budget = city_budget)

append city (name = "Berkeley",
             location = "(1, 4, -122.3,37.9, -122.0,37.9, -122.0,37.6, -122.3,37.6)")

retrieve (p.name, p.age) from p in person* sort by age using >

replace p (age = p.age + 3) from p in person* where p.name = "linda"

delete person where person.name = "linda"

destroy person
destroy city

object-relational

CREATE TABLE person (name text, age int4, location point);
CREATE TABLE emp (salary int4, manager text) INHERITS (person);

SELECT name, age, salary FROM emp*;  -- look, no joins!

object-relational

CREATE TABLE person (name text, age int4, location point);
CREATE TABLE emp (person person, salary int4, manager text);

SELECT emp.person.name, emp.person.age, salary
  FROM emp;  -- look, no joins!

object-relational

-- multiple inheritance hell included!

CREATE TABLE person (name text, age int4, location point);

CREATE TABLE emp (salary int4, manager text) INHERITS (person);

CREATE TABLE student (...) INHERITS (person);

CREATE TABLE stud_emp (...) INHERITS (emp, student);

extensibility

CREATE FUNCTION
CREATE TYPE
CREATE OPERATOR
CREATE AGGREGATE
CREATE ACCESS METHOD
…
CREATE EXTENSION

extensibility

Postgres is bloatware by design: it was built to house PhD theses.

—Joe Hellerstein

extensibility

Corollary (Eisentraut, 2024):

Postgres is built to be the foundation of tech startups.

PostGIS

CREATE EXTENSION postgis;

CREATE TABLE global_points (
  id serial PRIMARY KEY,
  name varchar(64),
  location geography(POINT,4326)
);

INSERT INTO global_points (name, location) VALUES ('Town', 'SRID=4326;POINT(-110 30)');
INSERT INTO global_points (name, location) VALUES ('Forest', 'SRID=4326;POINT(-109 29)');
INSERT INTO global_points (name, location) VALUES ('London', 'SRID=4326;POINT(0 49)');

SELECT name FROM global_points
  WHERE ST_DWithin(location, 'SRID=4326;POINT(-110 29)'::geography, 1_000_000);

PostGIS + indexing

CREATE INDEX ON locations USING gist (geom);

CREATE INDEX ON locations USING brin (geom);

CREATE INDEX ON locations USING spgist (geom);

JSON

CREATE TABLE people (
  id int,
  name varchar,
  data jsonb
);

INSERT INTO people (name, data)
  VALUES ('John Smith',
          '{"dob": "1984-09-20",'
          ' "phones": [{"type": "mobile", "number": "12345"}],'
          ' "has_dog": true }');

JSON path

SELECT name, JSON_VALUE(data, '$.phones[0].number') AS mobile_phone
  FROM people
  WHERE current_date - JSON_VALUE(data, '$.dob.date()' RETURNING date) >= 18;

JSON path native syntax

/* future/WIP */

SELECT name, people.data.phones[0].number AS mobile_phone
  FROM people
  WHERE current_date - people.data.dob.date() >= 18;

JSON + indexing

CREATE INDEX ON people USING gist (data);

JSON tables?

/* future/maybe/WIP */

CREATE JSON TABLE people;

JSON—Does it matter?

XML

CREATE TABLE people (
  id int,
  name varchar,
  data xml
);

INSERT INTO people (name, data)
  VALUES ('John Smith',
          '<person><dob>1984-09-20</dob>'
          '<phones><phone type="mobile">12345</phone></phones>'
          '<has_dog/><person>');


SELECT name, xpath_string(data, '/person/phones/phone[@type="mobile"]') AS mobile_phone
  FROM people
  WHERE current_date - xpath_string(data, '/person/dob')::date >= 18;

Why not a specialized DBMS?

Full-text search

CREATE TABLE products (
  id int,
  name varchar,
  description varchar,
  desc_vec tsvector
    GENERATED ALWAYS AS (to_tsvector('english', description)) STORED
);

SELECT name
  FROM products
  WHERE desc_vec @@ to_tsquery('shoe & brown');

Full-text search + indexing

CREATE INDEX ON products USING gist (desc_vec);

CREATE INDEX ON products USING gin (desc_vec);

pgvector

CREATE EXTENSION pgvector;

CREATE TABLE products (
  id int,
  name varchar,
  description varchar,
  desc_vec vector  -- generated by Internet magic
);

SELECT name
  FROM products
  WHERE desc_vec <-> vector('(more Internet magic)');

pgvector + indexing

CREATE INDEX ON products USING hnsw (desc_vec vector_l2_ops);

CREATE INDEX ON products USING ivfflat (desc_vec vector_l2_ops);

Graph database

Graph database + SQL = SQL/PGQ

/* future/WIP */

CREATE PROPERTY GRAPH my_graph
  VERTEX TABLES (person, company, account)
  EDGE TABLES (
    person_accounts SOURCE person DESTINATION account LABEL ownerof,
    company_accounts SOURCE company DESTINATION account LABEL ownerof,
    employees SOURCE person DESTINATION company LABEL worksfor,
    transaction SOURCE account DESTINATION account
  );

Graph database + SQL = SQL/PGQ

/* future/WIP */

SELECT gt.name, gt.amount
FROM GRAPH_TABLE (my_graph MATCH
    (p IS person)
      -[IS worksfor]->
    (c IS company)
      -[IS ownerof]->
    (a IS account)
      -[t IS transaction WHERE t.amount >= 100.00]->
    (a2 IS account)
      <-[IS ownerof]-
    (c2 IS company)
      <-[IS worksfor]-
    (p IS person)
    WHERE c.name <> c2.name
    COLUMNS (p.name, t.amount)
) AS gt;

Conclusion

PostgreSQL = relational

+ object + GIS + document + K/V + queue + time series + IoT + text search + vector + AI + …