diff --git a/.dockerignore b/.dockerignore index e72e096..12809de 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,7 @@ .git .github .gitignore +.vscode *.md *.yml diff --git a/.gitignore b/.gitignore index a9d37c5..865d4a7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +.vscode/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 969cee9..5dfd19b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 0.1.4 +* Fixed a timezone issue with unit tests +* Upgrade prgx to 0.11.1 +* Reworked Dockerfile - multi-stage, better extension configuration, added a Makefile with 'all' and 'run' targets +* Improve README + ## 0.1.3 #### Enhancements diff --git a/Cargo.toml b/Cargo.toml index c4f3a9f..ab11471 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,16 @@ [package] name = "ulid" publish = false -version = "0.1.3" +version = "0.1.4" edition = "2021" -rust-version = "1.70.0" +rust-version = "1.74.0" [lib] crate-type = ["cdylib"] [features] -default = ["pg15"] +default = ["pg16"] pg11 = ["pgrx-tests/pg11", "pgrx/pg11"] pg12 = ["pgrx-tests/pg12", "pgrx/pg12"] pg13 = ["pgrx-tests/pg13", "pgrx/pg13"] @@ -21,10 +21,10 @@ pg_test = [] [dependencies] inner_ulid = { package = "ulid", version = "1.0.0" } -pgrx = "=0.10.2" +pgrx = "=0.11.1" [dev-dependencies] -pgrx-tests = "=0.10.2" +pgrx-tests = "=0.11.1" [profile.dev] panic = "unwind" diff --git a/Dockerfile b/Dockerfile index 4dc0c67..943a4d0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ +# use 12, 13, 14, 15, 16 ARG PG_MAJOR -FROM postgres:${PG_MAJOR} +FROM postgres:${PG_MAJOR} as build RUN apt-get update @@ -25,27 +26,30 @@ RUN chown postgres:postgres /home/postgres USER postgres RUN \ - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --profile minimal --default-toolchain 1.70.0 && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --profile minimal --default-toolchain 1.74.0 && \ rustup --version && \ rustc --version && \ cargo --version # pgrx -RUN cargo install cargo-pgrx --version 0.10.2 --locked +RUN cargo install cargo-pgrx --version 0.11.1 --locked +# init postgress dev env for target version RUN cargo pgrx init --pg${PG_MAJOR} $(which pg_config) -USER root - +# move the code COPY . . -RUN cargo pgrx install +# compile and package +RUN cargo pgrx package -RUN chown -R postgres:postgres /home/postgres -RUN chown -R postgres:postgres /usr/share/postgresql/${PG_MAJOR}/extension -RUN chown -R postgres:postgres /usr/lib/postgresql/${PG_MAJOR}/lib +# multi-stage - let's start clean +FROM postgres:${PG_MAJOR} -USER postgres +# copy & configure the entension +COPY --from=build --chown=root /home/postgres/target/release/ulid-pg${PG_MAJOR}/ / +COPY --from=build /home/postgres/docker/* /docker-entrypoint-initdb.d/ +# allow deployment without a password ENV POSTGRES_HOST_AUTH_METHOD=trust ENV USER=postgres diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8195de0 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +all: + docker build -t postgresql-ulid --build-arg="PG_MAJOR=16" . + +run: + docker run -p 5432:5432 postgresql-ulid diff --git a/README.md b/README.md index 3501ccd..b5f5a04 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,18 @@ A postgres extension to support [ulid][]. 1. [Why should I use this?](#why-should-i-use-this) 2. [Why should I use ulid over uuid?](#why-should-i-use-ulid-over-uuid) 3. [Monotonicity](#monotonicity) -4. [Usage](#usage) -5. [Installation](#installation) +4. [Installation](#installation) +5. [Usage](#usage) +6. [Recommendation](#recommendation) +7. [Building](#building) ## Why should I use this? +The use of GUID in a database is a trade off between performance and security. GUIDs are typically used in OLTP to get around id predictability and distributed scalability. + There are several different postgres extensions for [ulid][], but all of them have feature gaps. A good extension should have: -- **Generator**: A generator function to generate [ulid][] identifiers. +- **Generator**: A generator function to generate [ulid][] identifiers. crypto secure as it uses rand::thread_rng() - **Binary**: Data be stored as binary and not text. - **Type**: A postgres type `ulid` which is displayed as [ulid][] text. - **Uuid**: Support for casting between UUID and [ulid][] @@ -35,15 +39,20 @@ There are several different postgres extensions for [ulid][], but all of them ha [^1]: You can convert the [ulid][] into `uuid` or `bytea` and store it like that. [^2]: Supports casting indirectly through `bytea`. -## Why should I use ulid over uuid? +## Why should I use ULID over UUID? -The main advantages are: +The main advantages of ULID are: -* Indexes created over ULIDs are less fragmented compared to UUIDs due to the timestamp and [monotonicity][] that was encoded in the ULID when it was created. +* Indexes generated using ULIDs exhibit lower fragmentation compared to UUIDs thanks to the encoded timestamp and monotonicity. +* ULID are K-ordered, which means you can used to sort the column by time order * ULIDs don't use special characters, so they can be used in URLs or even HTML. * ULIDs are shorter than UUIDs as they are comprised of 26 characters compared to UUIDs' 36 characters. +* ULID are more secure than UUIDv7, their randomness is 80 bits as opposed to 62 bits. +* UUID v1/v2 is impractical in many environments, as it requires access to a unique, stable MAC address +* UUID v3/v5 requires a unique seed and produces randomly distributed IDs, which can cause fragmentation in many data structures +* UUID v4 provides no other information than randomness which can cause fragmentation in many data structures -This extension is approximately **30% faster** than both `pgcrypto`'s UUID and `pg_uuidv7`'s UUIDv7 when generating a million identifiers. +This extension is approximately **30% faster** than both `pgcrypto`'s UUID and `pg_uuidv7`'s UUIDv7 when generating a million identifiers while leveraging a crypto secure random generator.
@@ -110,7 +119,9 @@ ulid=# EXPLAIN ANALYSE INSERT INTO ulid_keys(id) SELECT gen_ulid() FROM generate
-## Monotonicity +### Monotonicity + +Monotony ensures guarantees k-sorting order on the same postgres instance. This extension supports [monotonicity][] through `gen_monotonic_ulid()` function. To achive this, it uses PostgreSQL's shared memory and LWLock to store last generated ULID. @@ -167,13 +178,13 @@ ulid=# EXPLAIN ANALYZE INSERT INTO users (name) SELECT 'Client 2' FROM generate_ -### Pros +#### Pros 1. Monotonic ULIDs are better for indexing, as they are sorted by default. 2. Monotonic ULIDs slightly faster than `gen_ulid()` when generating lots of ULIDs within one millisecond. Because, in this case, there is no need to generate random component of ULID. Instead it is just incremented. -### Cons +#### Cons 1. Previously generated ULID is saved in shmem and accessed via LWLock. i.e. it is exclusive for function invocation within database. Theoretically this can lead to slowdowns. @@ -187,6 +198,21 @@ ulid=# EXPLAIN ANALYZE INSERT INTO users (name) SELECT 'Client 2' FROM generate_ *...But, chances are negligible.* +## Installation + +The extension consist of 3 files + +1. **ulid--0.1.4.sql** & **ulid.control** - the extension configuration file, to deploy in SHAREDIR +2. **ulid.so** - the extension itself, to deploy in LIBDIR + +edit *postgresql.conf*, add the following line: + +```conf +shared_preload_libraries = 'ulid' # (change requires restart) +``` + +> Note: None of these configuration are required if you use the custom docker image + ## Usage Use the extension in the database: @@ -195,6 +221,19 @@ Use the extension in the database: CREATE EXTENSION ulid; ``` +Test Generation speed + +```SQL +# gen +EXPLAIN ANALYSE SELECT gen_ulid() FROM generate_series(1, 1000000); +# gen and insert +EXPLAIN ANALYSE INSERT INTO ulid_keys(id) SELECT gen_ulid() FROM generate_series(1, 1000000); + +# same as above but monotonic +EXPLAIN ANALYSE SELECT gen_monotonic_ulid() FROM generate_series(1, 1000000); +EXPLAIN ANALYSE INSERT INTO ulid_keys(id) SELECT gen_monotonic_ulid() FROM generate_series(1, 1000000); +``` + Create a table with [ulid][] as a primary key: ```sql @@ -213,6 +252,14 @@ CREATE TABLE users ( ); ``` +Insert records + +```SQL +INSERT INTO users values (DEFAULT, 'Olivier'); +``` + + + Operate it normally with text in queries: ```sql @@ -220,22 +267,84 @@ SELECT * FROM users WHERE id = '01ARZ3NDEKTSV4RRFFQ69G5FAV'; ``` Cast [ulid][] to timestamp: +```SQL +SELECT id::timestamp FROM users WHERE id; +``` -```sql -ALTER TABLE users -ADD COLUMN created_at timestamp GENERATED ALWAYS AS (id::timestamp) STORED; +or to uuid +```SQL +SELECT id::uuid FROM users WHERE id; ``` -Cast timestamp to [ulid][], this generates a zeroed ULID with the timestamp prefixed (TTTTTTTTTT0000000000000000): +## Recommendation -```sql --- gets all users where the ID was created on 2023-09-15, without using another column and taking advantage of the index -SELECT * FROM users WHERE id BETWEEN '2023-09-15'::timestamp::ulid AND '2023-09-16'::timestamp::ulid; +### Do not confuse ULID's internal date with the record creation date + +They are indeed quite similar at first glance, yet the dates have different connotations and, more significantly, a distinct life cycle. + +**I would strongly advise against** using ulid as a create_date column for the following reasons: + +* First an index is faster on a date column than on a random-date-ordered guid. thanks to its randomness. +* Shit happens - loss of data, code mistakes, migrations - you may have to change one of these dates without impacting the other. +* You may decide to create ULIDs asynchronously or in advence, therefore dissociating generation from record creation. +* In the end they are two different things: the **id's creation date** vs the **record's creation date**. Typically, in IT we get much better results when by spliting concerns. + +## Building + + You may build and deploy the extension locally: + +```shell +$ cargo install cargo-pgrx --version 0.11.1 --locked +# on osx only, because we need pg_config +$ brew install postgresql ``` -## Installation +[pgrx][] is a friendly framework to deploy postgresql extensions in rust, to install a local dev environment, use + +```shell +# if postgresql is not installed +# the following command will install and configure each version +$ cargo pgrx init +``` + +or use your own running instance: +```shell +# if you need to reuse a pre-installed postgresql +# make sure postgresql/bin is in the PATH +# ie. fish_add_path /opt/homebrew/opt/postgresql@16/bin +$ cargo pgrx init --pg16 (which pg_config) +``` + +From there, your may run the unit tests, interact with a test instance or compile the delivery package. + +```shell +# run the unit tests +$ cargo pgrx test +``` + +```shell +# interact with a test instance +$ cargo pgrx start +$ cargo pgrx connect +``` + +```shell +# compile the delivery package +$ cargo pgrx install --release +$ cargo pgrx package +``` + +Last, buid a postgres distribution with builtin ulid support + +```shell +# ensure docker is up +# to build the docker image +make +# to run it +make run +``` -Use [pgrx][]. You can clone this repo and install this extension locally by following [this guide](https://github.com/tcdi/pgrx/blob/master/cargo-pgrx/README.md#installing-your-extension-locally). +Further details can be found by following [this guide](https://github.com/tcdi/pgrx/blob/master/cargo-pgrx/README.md#installing-your-extension-locally). You can also download relevant files from [releases](https://github.com/pksunkara/pgx_ulid/releases) page. diff --git a/docker/config-postgres.sh b/docker/config-postgres.sh new file mode 100755 index 0000000..0748ebd --- /dev/null +++ b/docker/config-postgres.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +echo "shared_preload_libraries = 'ulid'" >> ~/data/postgresql.conf diff --git a/docker/install-extensions.sql b/docker/install-extensions.sql new file mode 100644 index 0000000..3c27b46 --- /dev/null +++ b/docker/install-extensions.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS ulid; \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index b5fff88..e077824 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -189,15 +189,19 @@ mod tests { #[pg_test] fn test_ulid_to_timestamp() { - let result = - Spi::get_one::<&str>(&format!("set timezone TO 'UTC'; SELECT '{TEXT}'::ulid::timestamp::text;")).unwrap(); + let result = Spi::get_one::<&str>(&format!( + "set timezone TO 'UTC'; SELECT '{TEXT}'::ulid::timestamp::text;" + )) + .unwrap(); assert_eq!(Some(TIMESTAMP), result); } #[pg_test] fn test_timestamp_to_ulid() { - let result = - Spi::get_one::<&str>(&format!("set timezone TO 'UTC'; SELECT '{TIMESTAMP}'::timestamp::ulid::text;")).unwrap(); + let result = Spi::get_one::<&str>(&format!( + "set timezone TO 'UTC'; SELECT '{TIMESTAMP}'::timestamp::ulid::text;" + )) + .unwrap(); assert_eq!(Some("01GV5PA9EQ0000000000000000"), result); }