From 2a96a8b179ca22a4689eabfccf59825073d70d8d Mon Sep 17 00:00:00 2001 From: santi1234567 <45318759+santi1234567@users.noreply.github.com> Date: Wed, 14 Feb 2024 12:28:25 -0300 Subject: [PATCH 01/11] create env example --- .env.example | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..143012a --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +LOG_LEVEL="info" # debug, info, warn, error +DB_URL="postgres://user:password@localhost:5432/dbName" # URL to connect to the postgres database +WORKER_NUM=15 # Number of workers to run concurrent alchemy/EL node requests +ALCHEMY_URL="https://eth-mainnet.g.alchemy.com/v2/KEY" # Alchemy API URL +EL_ENDPOINT="http://localhost:8545" # Ethereum Layer 1 endpoint, can also be alchemy or infura From f08a46cf782c47425658d86eab659cb88150b4fc Mon Sep 17 00:00:00 2001 From: santi1234567 <45318759+santi1234567@users.noreply.github.com> Date: Wed, 14 Feb 2024 12:28:31 -0300 Subject: [PATCH 02/11] dockerize --- Dockerfile | 22 ++++++++++++++++++++++ docker-compose.yml | 24 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3c24521 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM golang:1.21-alpine as builder +RUN apk add --update git +RUN apk add --update gcc +RUN apk add --update g++ +RUN apk add --update openssh-client +RUN apk add --update make + +RUN mkdir /app +WORKDIR /app +ADD . . + +RUN go get +RUN go build -o ./build/eth_pokhar + + +FROM alpine:latest +RUN apk --no-cache add ca-certificates +WORKDIR / +COPY --from=builder /app/build/eth_pokhar ./ +COPY --from=builder /app/db/migrations ./db/migrations + +ENTRYPOINT ["sh", "-c"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..46c12b0 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,24 @@ +version: "3.7" + +services: + eth-pokhar: + build: + context: ./ + dockerfile: Dockerfile + init: true + command: >- + "./eth_pokhar beacon_depositors_transactions + --log-level=${LOG_LEVEL} + --el-endpoint=${EL_ENDPOINT} + --db-url=${DB_URL} + --workers-num=${WORKER_NUM} + --alchemy-url=${ALCHEMY_URL} + && + ./eth_pokhar identify + --log-level=${LOG_LEVEL} + --el-endpoint=${EL_ENDPOINT} + --db-url=${DB_URL} + --workers-num=${WORKER_NUM} + --alchemy-url=${ALCHEMY_URL} + --recreate-table" + network_mode: "host" From 7bd12d7c6857c48e9435c68148200ddce7905d9a Mon Sep 17 00:00:00 2001 From: santi1234567 <45318759+santi1234567@users.noreply.github.com> Date: Wed, 14 Feb 2024 13:16:52 -0300 Subject: [PATCH 03/11] add db to docker --- .env.example | 5 +++++ .gitignore | 2 ++ docker-compose.yml | 13 +++++++++++++ 3 files changed, 20 insertions(+) diff --git a/.env.example b/.env.example index 143012a..1d9875e 100644 --- a/.env.example +++ b/.env.example @@ -3,3 +3,8 @@ DB_URL="postgres://user:password@localhost:5432/dbName" # URL to connect to the WORKER_NUM=15 # Number of workers to run concurrent alchemy/EL node requests ALCHEMY_URL="https://eth-mainnet.g.alchemy.com/v2/KEY" # Alchemy API URL EL_ENDPOINT="http://localhost:8545" # Ethereum Layer 1 endpoint, can also be alchemy or infura + +DATABASE_NAME=name # Your database name +DATABASE_USERNAME=user # Your database username +DATABASE_PASSWORD=pass # Your database password +LOCAL_PORT=5439 # Port where you connect to the database container diff --git a/.gitignore b/.gitignore index 97b7a6d..461174a 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ .env .vscode + +app-data diff --git a/docker-compose.yml b/docker-compose.yml index 46c12b0..f1c3498 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,3 +22,16 @@ services: --alchemy-url=${ALCHEMY_URL} --recreate-table" network_mode: "host" + depends_on: + - db + db: + image: postgres + restart: always + environment: + POSTGRES_USER: ${DATABASE_USERNAME} + POSTGRES_PASSWORD: ${DATABASE_PASSWORD} + POSTGRES_DB: ${DATABASE_NAME} + volumes: + - ./app-data/:/var/lib/postgresql/data/ + ports: + - "127.0.0.1:${LOCAL_PORT}:5432" From d8074d0299a6b1a187c8cb7642fe5c888ced8ec3 Mon Sep 17 00:00:00 2001 From: santi1234567 <45318759+santi1234567@users.noreply.github.com> Date: Wed, 14 Feb 2024 13:39:40 -0300 Subject: [PATCH 04/11] change log into info --- beacon-depositors-transactions/routines.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon-depositors-transactions/routines.go b/beacon-depositors-transactions/routines.go index 04a22e8..9cc37b9 100644 --- a/beacon-depositors-transactions/routines.go +++ b/beacon-depositors-transactions/routines.go @@ -100,7 +100,7 @@ func (b *BeaconDepositorsTransactions) downloadBeaconDeposits() { if err != nil { log.Fatalf("Error parsing block number: %s", err.Error()) } - log.Debugf("Downloaded 1000 more deposits on block %d", num) + log.Infof("Downloaded 1000 more deposits on block %d", num) params.PageKey = newPageKey firstCall = false err = b.processDepositTransfers(newTransfers, b.iConfig.Workers) From 6f43bfeb5b340963a5ac1284ac471f45aaad269f Mon Sep 17 00:00:00 2001 From: santi1234567 <45318759+santi1234567@users.noreply.github.com> Date: Wed, 14 Feb 2024 13:40:43 -0300 Subject: [PATCH 05/11] create dockerignore --- .dockerignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..7ff9721 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +app-data From 4e34031174dfbb05ac7b7ba91cd911f03225110b Mon Sep 17 00:00:00 2001 From: santi1234567 <45318759+santi1234567@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:41:03 -0300 Subject: [PATCH 06/11] update readme --- README.md | 164 ++++++++++++++++++++++++- repository-images/table_priorities.jpg | Bin 0 -> 70118 bytes 2 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 repository-images/table_priorities.jpg diff --git a/README.md b/README.md index 534adc5..66786af 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,160 @@ # eth-pokhar -Tool to identify validators <> entities in the Ethereum consensus layer +Eth-pokhar is a go tool that helps in the process of identifying the pool/entity that operates each validator in the Ethereum beacon chain. + +Identifying staking entities is tricky since this isn’t on-chain data in most cases. In the case of pools like Lido and Rocketpool, since they use smart contracts for creating their validators, the data is on-chain and can be easily identified. + +For the rest of the entities, other methods can be used like observing patterns in the depositor addresses like the ones found in this repository: [eth-deposits](https://github.com/alrevuelta/eth-deposits), and other off-chain data from contacts/data sources. When creating validators an address must deposit 32 ETH on the beaconchain contract. In most cases, entities share the same deposit address throughout multiple validators. By knowing a few of these cases, one can extrapolate the information and identify all of the validators that were generated by those addresses and thus identify the entities. + +This tool is used for tagging validators in [ethseer.io](https://ethseer.io/?network=mainnet). + +## Pre-requisites + +To use the tool, the following requirements must be met: + +- An alchemy API key (the free tier is enough). See [here](https://www.alchemy.com/pricing) +- Access to a Ethereum EL node + +Expect this tool to make the following amount of requests to the Ethereum EL node on the first run: + +- ~1.5m [eth_getTransactionReceipt](https://docs.alchemy.com/reference/eth-gettransactionreceipt) requests +- ~1.5m [eth_call](https://docs.alchemy.com/reference/eth-call) requests + +And the following amount of requests to the alchemy API on each run: + +- ~300k [alchemy_getAssetTransfers](https://docs.alchemy.com/reference/alchemy-getassettransfers) requests + +## Available commands + +### `beacon_depositors_transactions` + +Fetches the transactions of the depositors of the beaconchain contract. + +Available options (configurable in the `.env` file): + +``` +OPTIONS: + --el-endpoint value Execution node endpoint (default: http://localhost:8545) [$EL_ENDPOINT] + --db-url value Database where to store transactions (default: postgres://user:password@localhost:5432/dbName) [$DB_URL] + --log-level value Log level: debug, warn, info, error (default: info) [$LOG_LEVEL] + --workers-num value Number of workers to process API requests (default: 10) [$WORKER_NUM] + --alchemy-url value Alchemy url (default: https://eth-mainnet.g.alchemy.com/v2/KEY) [$ALCHEMY_URL] + --help, -h show help +``` + +### `identify` + +Identify the pool in which validators are participating or the entity who operates the validators. + +Available options (configurable in the `.env` file): + +``` +OPTIONS: + --el-endpoint value Execution node endpoint (default: http://localhost:8545) [$EL_ENDPOINT] + --db-url value Database where to store transactions (default: postgres://user:password@localhost:5432/dbName) [$DB_URL] + --log-level value Log level: debug, warn, info, error (default: info) [$LOG_LEVEL] + --alchemy-url value Alchemy url (default: https://eth-mainnet.g.alchemy.com/v2/KEY) [$ALCHEMY_URL] + --workers-num value Number of workers to process API requests (default: 10) [$WORKER_NUM] + --recreate-table Recreate the t_identified_validators table, meant to be used when one of the methodologies of identification changes (default: false) + --help, -h show help +``` + +## Running with Docker (recommended) + +To run the tool with docker, you can use the following commands: + +First, create a `.env` file on the root folder. You can use the `.env.example` file as a template. + +Then, run the following command to build the tool: + +```bash +docker-compose build +``` + +Finally, run the tool with the following command: + +```bash +docker-compose up -d +``` + +## Output + +The tool will create a database with the following tables: + +### `t_beacon_deposits` + +This table stores the deposits made to the beaconchain contract. It has the following columns: + +- `f_block_num`: The block number in which the deposit was made. +- `f_depositor`: The address of the depositor. +- `f_tx_hash`: The transaction hash of the deposit. +- `f_validator_pubkey`: The public key of the validator. + +### `t_beacon_depositors_transactions` + +This table stores the incoming/outgoing transactions of the depositors of the beaconchain contract. It has the following columns: + +- `f_block_num`: The block number in which the transaction was made. +- `f_value`: The value of the transaction. +- `f_from`: The address from which the transaction was made. +- `f_to`: The address to which the transaction was made. +- `f_tx_hash`: The transaction hash of the transaction. +- `f_depositor`: The address of the depositor to which the transaction is related. + +### `t_depositors_insert` + +This table stores the depositors that are used to identify the pool in which the validators are participating See [Utilizing custom off-chain data](#utilizing-custom-off-chain-data) for more information. It has the following columns: + +- `f_depositor`: The address of the depositor. +- `f_pool_name`: The name of the pool in which the validators are participating. + +### `t_validators_insert` + +This table stores the validators that are used to identify the pool in which the validators are participating. See [Utilizing custom off-chain data](#utilizing-custom-off-chain-data) for more information. It has the following columns: + +- `f_validator_pubkey`: The public key of the validator. +- `f_pool_name`: The name of the pool in which the validators are participating. + +### `t_lido` + +This table stores the validators that are participating in the Lido pool. See [Lido operators](https://operatorportal.lido.fi/) for more information. It has the following columns: + +- `f_validator_pubkey`: The public key of the validator. +- `f_operator`: The name of the operator of the validator. +- `f_operator_index`: The index of the operator in the Lido pool. + +### `t_rocketpool` + +This table stores the validators that are participating in the Rocketpool pool. It has the following columns: + +- `f_validator_pubkey`: The public key of the validator. + +### `t_identified_validators` (End result) + +This table stores the validators with the pool/entity that operates them. Unidentified validators will have a `f_pool_name` value of `others`. It has the following columns: + +- `f_validator_pubkey`: The public key of the validator. +- `f_pool_name`: The name of the pool in which the validators are participating. + +## Utilizing custom off-chain data + +As mentioned before, the tool can be used to identify validators by using off-chain data. For this purpose, two tables are created in the database on the first run: `t_depositors_insert` and `t_validators_insert`. + +### `t_depositors_insert` + +This table has the columns `f_depositor` and `f_pool_name`. The `identify` command will use this table to identify the pool in which the validators are participating. The `f_depositor` column is the address of the depositor and the `f_pool_name` is the name of the pool in which the validators are participating. All validators that have the same depositor address will be tagged with the `f_pool_name` value. + +### `t_validators_insert` + +This table has the columns `f_validator_pubkey` and `f_pool_name`. The `identify` command will use this table to identify the pool in which the validators are participating. The `f_validator_pubkey` column is the address of the validator and the `f_pool_name` is the name of the pool in which the validators are participating. These values will be used to tag the validators. + +## Identification priority + +Since the end table `t_identified_validators` is the result of the identification process, validators' pool/entity will be tagged in the following order (if the validator is already tagged, the next step will override the previous tag, resulting in the last tag being the one that is stored): + +
+ +
## Database migrations @@ -9,3 +163,11 @@ More specifically, one could clean the migrations by forcing the version with `I3r
zgp=-gRenF0M1&&+XhQ1(qoHVPsy;LL-VmI8G>9&mZy8 v<9<+SQkB}LJUNjzbBoV
zRI;p$v1PfF_9YoS{LT8L_;(S+I4baN5!R;DDX~jthCQI;bXM%#7m1AVBk)-Oc!a+k
z&7JYci3N<_Yq(wgv-Xt_N5CC=@kuUvq~3u<^)inUogs=6Hx;J#W{fBj(duwW831{U
z@CK8%6j9LLLFL|Bb?}6baa|2Qzf6)FKSIxME#%~%f>v@mQ*`%&>@V)OOx{(T^K*Ik
zQ(ETdT|fM`Yu~Gqm495a_|~alQPS#LsZ0NH+%QQpIdkUh>n|x$7<3XhOFk
p^6X)CkPO>;w2pAh
zh`1Iy6km&aa7Q<`fMVgD`1kMy;4T9Y`ka1D)3nt2DST-hBi7^n+wl3eV#IV%z;lO>
z@r{_tP|={sRG=o&IEJ^{`n0Cer6CUU=rr5121ER(Z=#(k>|7?k79KhF3JH*tbE%f7
zsHW{jKe2>Ry=h)+1**7JVX)!@d@11eG$TT5K)u7
&A=ORaa%P|UHX{btSE1PdniB-_$pi=LHd1D(8MLaGHOWg3
z!sc