diff --git a/.github/codeql-config.yml b/.github/codeql-config.yml new file mode 100644 index 00000000..0468ce24 --- /dev/null +++ b/.github/codeql-config.yml @@ -0,0 +1,7 @@ +name: "CodeQL config" + +paths: + - index + - api +paths-ignore: + - api/swagger-ui diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 549b73cb..e5526229 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -32,7 +32,7 @@ jobs: strategy: fail-fast: false matrix: - language: [ 'javascript', 'python' ] + language: [ 'javascript' ] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] # Learn more: # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed @@ -46,6 +46,7 @@ jobs: uses: github/codeql-action/init@v1 with: languages: ${{ matrix.language }} + config-file: ./.github/codeql-config.yml # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. diff --git a/.github/workflows/deactivate.yaml b/.github/workflows/deactivate.yaml new file mode 100644 index 00000000..0f32a866 --- /dev/null +++ b/.github/workflows/deactivate.yaml @@ -0,0 +1,15 @@ +name: Deactivate + +on: + pull_request: + types: [closed] + +jobs: + bury_review_env: + name: 🪦 Review + runs-on: ubuntu-latest + steps: + - uses: SocialGouv/actions/autodevops-deactivate@V1 + with: + kube-config: ${{ secrets.KUBECONFIG }} + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/indexing-dev.yml b/.github/workflows/indexing-dev.yml index c9831f32..608cb548 100644 --- a/.github/workflows/indexing-dev.yml +++ b/.github/workflows/indexing-dev.yml @@ -5,13 +5,52 @@ on: concurrency: cancel-in-progress: true - group: indexing + group: indexing-dev jobs: index: - name: Index + name: Index Dev runs-on: ubuntu-latest steps: - - name: Echo + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Yarn cache setup + uses: c-hive/gha-yarn-cache@v2 + with: + directory: .k8s + + - name: Install kosko-charts dependencies + shell: bash + run: yarn --cwd .k8s install --frozen-lockfile --prefer-offline + + - name: Generate indexing job + shell: bash + run: yarn --cwd .k8s --silent generate --env dev jobs/indexing > indexing.yml + env: + RANCHER_PROJECT_ID: ${{ secrets.RANCHER_PROJECT_ID }} + SOCIALGOUV_BASE_DOMAIN: ${{ secrets.SOCIALGOUV_BASE_DOMAIN }} + + - name: Archive indexing job + uses: actions/upload-artifact@v2 + with: + name: indexing.yml + path: indexing.yml + + - name: Get namespace name + uses: mikefarah/yq@master + id: namespace + with: + cmd: yq eval ".metadata.namespace" indexing.yml | head -n 1 + + - name: Create kubernetes config + shell: bash + run: | + mkdir ~/.kube + touch ~/.kube/config + echo ${{ secrets.KUBECONFIG }} | base64 -d > ~/.kube/config + + - name: Launch indexing Job shell: bash - run: echo "Hello, world" + run: | + kubectl apply -f indexing.yml --namespace ${{ steps.namespace.outputs.result }} diff --git a/.github/workflows/indexing-prod.yml b/.github/workflows/indexing-prod.yml new file mode 100644 index 00000000..13566630 --- /dev/null +++ b/.github/workflows/indexing-prod.yml @@ -0,0 +1,57 @@ +name: Indexing (prod) + +on: + workflow_dispatch: + +concurrency: + cancel-in-progress: true + group: indexing-prod + +jobs: + index: + name: Index Prod + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Yarn cache setup + uses: c-hive/gha-yarn-cache@v2 + with: + directory: .k8s + + - name: Install kosko-charts dependencies + shell: bash + run: yarn --cwd .k8s install --frozen-lockfile --prefer-offline + + - name: Generate indexing job + shell: bash + # HACK: we use preprod to run the job on the dev cluster + run: yarn --cwd .k8s --silent generate --env preprod jobs/indexing > indexing.yml + env: + RANCHER_PROJECT_ID: ${{ secrets.RANCHER_PROJECT_ID }} + SOCIALGOUV_BASE_DOMAIN: ${{ secrets.SOCIALGOUV_BASE_DOMAIN }} + + - name: Archive indexing job + uses: actions/upload-artifact@v2 + with: + name: indexing.yml + path: indexing.yml + + - name: Get namespace name + uses: mikefarah/yq@master + id: namespace + with: + cmd: yq eval ".metadata.namespace" indexing.yml | head -n 1 + + - name: Create kubernetes config + shell: bash + run: | + mkdir ~/.kube + touch ~/.kube/config + echo ${{ secrets.KUBECONFIG }} | base64 -d > ~/.kube/config + + - name: Launch indexing Job + shell: bash + run: | + kubectl apply -f indexing.yml --namespace ${{ steps.namespace.outputs.result }} diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml new file mode 100644 index 00000000..3e2fbd96 --- /dev/null +++ b/.github/workflows/production.yml @@ -0,0 +1,75 @@ +name: Production + +on: + push: + tags: + - v* + +concurrency: + group: production + cancel-in-progress: true + +jobs: + ############################################################################## + ## BUILD AND REGISTER DOCKER IMAGE + ############################################################################## + register: + name: Build & Register docker images + runs-on: ubuntu-latest + steps: + - name: Get project name + run: | + echo "project=${GITHUB_REPOSITORY#*/}" >> $GITHUB_ENV + + - name: Register API + uses: SocialGouv/actions/autodevops-build-register@v1 + with: + project: ${{ env.project }} + imageName: ${{ env.project }}/search + token: ${{ secrets.GITHUB_TOKEN }} + dockerfile: ./api/Dockerfile + dockercontext: ./api + + - name: Register Frontend (demo) + uses: SocialGouv/actions/autodevops-build-register@v1 + with: + project: ${{ env.project }} + imageName: ${{ env.project }}/front + token: ${{ secrets.GITHUB_TOKEN }} + dockerfile: ./front/Dockerfile + dockercontext: ./front + + - name: Register Indexing + uses: SocialGouv/actions/autodevops-build-register@v1 + with: + project: ${{ env.project }} + imageName: ${{ env.project }}/index + token: ${{ secrets.GITHUB_TOKEN }} + dockerfile: ./index/Dockerfile + dockercontext: ./index + + ############################################################################## + ## DEPLOY PRODUCTION APPLICATION + ############################################################################## + deploy-prod: + name: Deploy production + runs-on: ubuntu-latest + needs: [register] + environment: + name: production + url: https://recherche-entreprises.fabrique.social.gouv.fr + steps: + - name: Use k8s manifests generation + uses: SocialGouv/actions/k8s-manifests@v1 + with: + environment: "prod" + rancherId: ${{ secrets.RANCHER_PROJECT_ID }} + socialgouvBaseDomain: ${{ secrets.SOCIALGOUV_BASE_DOMAIN }} + + - name: Use autodevops deployment + uses: SocialGouv/actions/autodevops-deploy@v1 + with: + environment: "prod" + token: ${{ secrets.GITHUB_TOKEN }} + kubeconfig: ${{ secrets.KUBECONFIG }} + rancherId: ${{ secrets.RANCHER_PROJECT_ID }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..2ac85b99 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,17 @@ +name: Release + +on: + workflow_dispatch: + push: + branches: [master, alpha, beta, next] + +jobs: + release: + name: Release + runs-on: ubuntu-latest + steps: + - uses: SocialGouv/actions/autodevops-release@v1 + with: + github-token: ${{ secrets.SOCIALGROOVYBOT_BOTO_PAT }} + author-name: ${{ secrets.SOCIALGROOVYBOT_NAME }} + author-email: ${{ secrets.SOCIALGROOVYBOT_EMAIL }} diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml new file mode 100644 index 00000000..ca7b02cb --- /dev/null +++ b/.github/workflows/review.yml @@ -0,0 +1,81 @@ +name: Review + +on: + push: + branches: + - "**" + tags-ignore: + - v* + +concurrency: + cancel-in-progress: true + group: ${{ github.ref }} + +jobs: + ############################################################################## + ## BUILD AND REGISTER DOCKER IMAGES + ############################################################################## + register: + name: Build & Register + runs-on: ubuntu-latest + steps: + - name: Get project name + run: | + echo "project=${GITHUB_REPOSITORY#*/}" >> $GITHUB_ENV + + - name: Register API + uses: SocialGouv/actions/autodevops-build-register@v1 + with: + project: ${{ env.project }} + imageName: ${{ env.project }}/search + token: ${{ secrets.GITHUB_TOKEN }} + dockerfile: ./api/Dockerfile + dockercontext: ./api + + - name: Register Frontend (demo) + uses: SocialGouv/actions/autodevops-build-register@v1 + with: + project: ${{ env.project }} + imageName: ${{ env.project }}/front + token: ${{ secrets.GITHUB_TOKEN }} + dockerfile: ./front/Dockerfile + dockercontext: ./front + + - name: Register Indexing + uses: SocialGouv/actions/autodevops-build-register@v1 + with: + project: ${{ env.project }} + imageName: ${{ env.project }}/index + token: ${{ secrets.GITHUB_TOKEN }} + dockerfile: ./index/Dockerfile + dockercontext: ./index + + ############################################################################## + ## GENERATE KUBERNETES MANIFESTS + ############################################################################## + manifests: + name: Generate k8s manifests + runs-on: ubuntu-latest + steps: + - name: Use k8s manifests generation + uses: SocialGouv/actions/k8s-manifests@v1 + with: + environment: "dev" + rancherId: ${{ secrets.RANCHER_PROJECT_ID }} + socialgouvBaseDomain: ${{ secrets.SOCIALGOUV_BASE_DOMAIN }} + + ############################################################################## + ## DEPLOY APPLICATION OVER KUBERNETES + ############################################################################## + deploy: + name: Deploy application + runs-on: ubuntu-latest + needs: [register, manifests] + steps: + - name: Use autodevops deployment + uses: SocialGouv/actions/autodevops-deploy@v1 + with: + environment: "dev" + token: ${{ secrets.GITHUB_TOKEN }} + kubeconfig: ${{ secrets.KUBECONFIG }} + rancherId: ${{ secrets.RANCHER_PROJECT_ID }} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index bd775d84..00000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,77 +0,0 @@ -include: - - project: SocialGouv/gitlab-ci-yml - file: /autodevops.yml - ref: v23.0.1 - -variables: - # AUTO_DEVOPS_TEST_DISABLED: "🛑" - AUTO_DEVOPS_QUALITY_DISABLED: "🛑" - AUTO_DEVOPS_KANIKO: "🛑" - # AUTO_DEVOPS_ENABLE_KAPP: "🛑" - AUTO_DEVOPS_RELEASE_AUTO: "🚀" - AUTO_DEVOPS_PRODUCTION_AUTO: "🚀" - -Install: - script: - - echo "no install" - -Test: - script: - - echo "no test" - -Build: - rules: - - when: never - -Register Kaniko image: - extends: .autodevops_register_kaniko_image - needs: [] - variables: - CONTEXT: ./index - IMAGE_NAME: recherche-entreprises-index - -Register API image: - extends: .autodevops_register_kaniko_image - needs: [] - variables: - CONTEXT: ./api - IMAGE_NAME: recherche-entreprises-api - -.indexing_job: - stage: Deploy - extends: - - .base_deploy_kosko_stage - allow_failure: true - -############## -# -# Note: these memory intensive jobs cannot run on public Github actions runners -# -############## - -Download and Index (dev): - extends: - - .indexing_job - rules: - - if: "$CI_COMMIT_TAG" - when: never - - when: manual - variables: - KOSKO_GENERATE_ARGS: --env dev jobs/indexing - environment: - name: ${CI_COMMIT_REF_NAME}${AUTO_DEVOPS_DEV_ENVIRONMENT_NAME} - url: https://${CI_ENVIRONMENT_SLUG}-${CI_PROJECT_NAME}.${KUBE_INGRESS_BASE_DOMAIN} - -Download and Index (prod): - extends: - - .indexing_job - rules: - - if: "$CI_COMMIT_TAG" - when: manual - variables: - # the preprod env is used for production index - # so it runs on the dev cluster - KOSKO_GENERATE_ARGS: --env preprod jobs/indexing - environment: - name: preprod${AUTO_DEVOPS_PREPROD_ENVIRONMENT_NAME} - url: https://preprod-${CI_PROJECT_NAME}.${KUBE_INGRESS_BASE_DOMAIN} diff --git a/.k8s/__tests__/__snapshots__/dev.ts.snap b/.k8s/__tests__/__snapshots__/dev.ts.snap index 6352247e..bab93c79 100644 --- a/.k8s/__tests__/__snapshots__/dev.ts.snap +++ b/.k8s/__tests__/__snapshots__/dev.ts.snap @@ -10,20 +10,21 @@ metadata: janitor/ttl: 15d field.cattle.io/creatorId: gitlab field.cattle.io/projectId: c-bd7z2:p-7ms8p - git/branch: master - git/remote: >- - https://gitlab-ci-token:[MASKED]@gitlab.factory.social.gouv.fr/SocialGouv/recherche-entreprises.git - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + git/branch: refs/heads/mybranch + git/remote: socialgouv/recherche-entreprises + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: azure-pg-admin-user: recherche-entreprises - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - name: recherche-entreprises-85-master-dev42 + name: recherche-entreprises-mybranch --- apiVersion: apps/v1 kind: Deployment @@ -31,18 +32,20 @@ metadata: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard name: recherche-entreprises-api - namespace: recherche-entreprises-85-master-dev42 + namespace: recherche-entreprises-mybranch spec: replicas: 1 selector: @@ -53,20 +56,21 @@ spec: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard spec: containers: - - image: >- - harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-api:0123456789abcdefghijklmnopqrstuvwxyz0123 + - image: harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-api:1.5.8 livenessProbe: failureThreshold: 6 httpGet: @@ -113,35 +117,39 @@ metadata: sealedsecrets.bitnami.com/cluster-wide: 'true' kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-master-dev42 + namespace: recherche-entreprises-mybranch spec: encryptedData: ELASTICSEARCH_URL: >- - AgDGWiSE5kUv0Mv4Dot8KfbSGm5XVlyi4V9yLiMI97deZctFWRd2T31MtTUz/XgT4VlFH4NUhK+dQonLEILE2P9rs4MMDaY09cc3NZiDLB9GJoBcWnWsa02QV4Xykk8UthTHQPFC4g+0ElFU4UewbIc3Zc2ZqIk5Y803TnYkRuAF1bM1ZdpWGnm+yh7+lXnbRMmFo62vhPVB8lnl9Z/RVxhs1jx6xnbIFyTjvelM4/sySiyHZ0h74fL61DAvOcNBgCTd49RRBBzcAhp6a8KL45FL1eAYyOqS8vW5uNeFu59Cu9ByRruhyTELjpPtLesjsBUHiqmfBh09esKw+mdtMmfYxIsy+JGPS50N8GiNj+n0A5XXtLQkm9oT884r4LEQip35YHyenh20NmueHX8RGMOXRT05PWnbL70NtZHqUwzFdc21vdFhXAb/MVaMgqcKG4RPkRS+qljn+pQjJ8DkY0EqgWOCfSyjB+jKJ65peSqqORq+B8zTDgpv9ZaN+xSDsfi+PckPPJqKOjaTkQW/VdfwpT6ipOUpNgznUOGy7XN9vxAQR9P79n6KlIyMsme4hN7OHX5ko0xWQZcCyfHbSbT7JHY5UpmTv+c0NtyrV755FQN75HDFRhQYHtgLnhR9PuBCZ8Gvtg2T/Nr63GsTMC5e52bj2zIRlOUTrsvxEIYKQoBPILoueEIcMz64/UiMAZ9RwH308wRHOrZtch5gclzUQWZEgI5Vx2cJQ/zr9iqBaedg2v8GqvESeKw1e7pgXoLE6pFKJtQa7cMwQgmrBkkxrEfePcRDP4yAMVQaiM8DoqIjVg== + AgCxK1MqqVkCOHvMjjcieUsj1uE/ATUzAfYTYVEMqcWFxTnugOaYlXB2mTuZtzdjXNvINREzDynnVRBghPIJvrJm59vPgexC5FyQptdPf2XwQAG6n0yMLWAEBM+O8ieO1D6vBu6BYuq4QYoItq+SMTE6APNX+p6wi4/LP04cR8p8bVKNxckPMfyVfQGhUSTD+2gC5ow2x1Mx67odqqP3E8eK9tm5tU6ltzOpKqJKITUQIqTQbSKFr8p77DE4IrdbPn8MpvdP+VrT+ztF4thPkpVpuYWy9rABdQKeEuNdN2AuvmPeJs/2B80iApIgtWWQjV/iG+WlLGMohvWDZVsBGuBuYGzhjJlr5w8OljwKDihb2cG1M/Ot/LM1xRuCC85wvqH0oMhnnP/3DZts5aYAN+qjAezOkPT9kjT+gu0WoQB+E5r9ITcYYHYbJw/YSxS2z86Ul0amRRW4ovAWT5c/nFDZU6i74I8YyVh8oOtrieZgNF4ajJcIr5YpjT2LICKoZR0KExrQ4V30zZJ+wZcvd8W3e/KCmKsFxloTkLq/sjEiFQoe6ffYTQhSUXZynqrdUKZsdoSleprfctlJtf3rzxt5hZIpcLulXrYk7pPPiHkTcV3Zc4zkN3BFE1A9vsgTvh6xTXlvUekJHHMXU3t8ZyZD8vvemxv0FE8G33XWQF2DEkmyc3gXMlfPDLho4fHv0h0iId0c9klFkOpmp8REFWJyuRQjq+hOHvfoCSu1ymsCwA0CMAguJoX1XEhn0aA9QIgf0VyBe/cVJwu/GfW74UBi6x5feqzFovYIUXbDP6kW ELASTICSEARCH_API_KEY: >- - AgCXfkWdcNqxUUWf9iPMYZXx2a1hU/NTseXJ3EHDYU2+9i4EB5dKGSALKprr1OmAnt04YJ0K5ALDWfHz+U8lN5wmlyZzfn1lbhccFByyCBM+k0WvKCieuMDCgRh8bkV+bSN7kYVq7ARzL8EzzUyvr7kzfoPwB3kwUSR+9yywSsr/OnC1cpS2+vAH+iJxa/NuLzPboxuOuMiKC2EvnO1GzoB82Sv36eXwrbfDDXilAXPm/zPUee12xXlf63b1ieDaOSZd75w7i+DrkALccdmMsCR11Rms93/cU1B8xsGC2L6R3n2AEaHI8vK9FsMzfHUdIbAKceoRKJW3CTNShc8imjeInj6B7t/KYTOc41oSZ2OVls6R6wCeaGtrxvQKfUR/Pbof+mTw7rBL1RMSdLtux2ZkdtHbY8k3kBqJq5jZ/8u6WLdtSuOcbiwB16dA+xpBp3LjTEtVR+8+KM+kW1xUuKIe+GV6RbfyDf3X0W5eSSIEUUgVblD0YXjbTFBT/oJBPlKvFmrPFCxwqsYDSv64+8/TeJrZWzX2XHi61Elye0iLrCVnH3PjiHrzm2URaRuu+VoaFi3TpYWGsgow29+IX4m2+z1NqOk/BMYUDCL9Gb6RzJwkNx21JFtN8eTgZF6w/FS/rBoK4/Ny75HE6L3D8L49upB7VEv328pk+PjHI68+AmzYTwTgUAA7HjvD/Y/mA6rrO2LiLljQTvqwZAIC8W1g0Eu2Ir/9sIatVp2GtxbQRvIekSD8WagNCY7ntzKHcQbnZ/ivEG74uoWiiGc= + AgCtqcSVac6ptXOzSGqrBps1Uz6elTOnheaAf8cqAb1G960NCqmmkx1L1FDhfSKCuhBUAp0mT/3rokX3cg/pLIqImIVlPEhq/n+ZaKPHqGt0UT9QpD3r1C4nYan/WJ1PnBlCTXYlA/dtmCakNjtf2ikvWg/6Keu1OGlHt/ymVc2guxqGl++caxPiEIQAiVHDZksc09RM2Z6jU/WnI5gzTPQjgucb2yiHBLWZsEcoCIsA2nXrp2/vQ/ATMtITKb1223jyHnJiargNnQrRVRA7Geuz5FyaqSXjqvjG1HrqVxAlVNzFPb/N3fN8C6KHCjtOuu/pOM+cqC4cFvzyzqGymcDOKGC5Ax47j5v0KsVlJZ93dfO2yD/Fi2txeuNdmQ42bvhpGbPJu/iYLXmaETvu6/qpL3UrU4i2KVyxXW0CbUzBVh1fTmCClAMDgETpWNvnRXgAuu0jbgTnWjN0zSmjia0/BJmY1Z5uct7T4Lli4Dz/vJucBthIBVgYaVX/jLqfaNWpD1TuWkTHsf4gR5HErzQp60NDej/I7TespFMqRMhrroc6VaH/Wl6wV4msRM3sYaxcHPd4kJxrOeA7cQnfqQsWs4/RG++p+TCmD18qgevVBV1z11vFu2R6KE+c1YRANE5e6EBMO8yUly1yarrx6jOo6E5kAdKqPGkNUyq3BgOvGWmkbV32slQ4hXm7zUuyVnXBmpmoryowHhqV7iW8Si6CSMKACCENVqJPrk55VaL8sw8dZfQF0aekJoEqf/UqAv/DZUTYOAg4QI+4iD4= template: metadata: annotations: sealedsecrets.bitnami.com/cluster-wide: 'true' kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' name: elastic-recherche-entreprises-read labels: - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard @@ -152,8 +160,8 @@ kind: Service metadata: labels: app: recherche-entreprises-api - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard @@ -161,10 +169,12 @@ metadata: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 - namespace: recherche-entreprises-85-master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises-mybranch spec: ports: - name: http @@ -181,21 +191,23 @@ metadata: kubernetes.io/ingress.class: nginx kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard name: recherche-entreprises-api - namespace: recherche-entreprises-85-master-dev42 + namespace: recherche-entreprises-mybranch spec: rules: - - host: api-master-dev42-recherche-entreprises.dev42.fabrique.social.gouv.fr + - host: api-recherche-entreprises-mybranch.dev2.fabrique.social.gouv.fr http: paths: - backend: @@ -207,7 +219,310 @@ spec: pathType: Prefix tls: - hosts: - - api-master-dev42-recherche-entreprises.dev42.fabrique.social.gouv.fr + - api-recherche-entreprises-mybranch.dev2.fabrique.social.gouv.fr + secretName: wildcard-crt +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-search + namespace: recherche-entreprises-mybranch +spec: + replicas: 1 + selector: + matchLabels: + app: recherche-entreprises-search + template: + metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + spec: + containers: + - image: >- + ghcr.io/socialgouv/recherche-entreprises/search:sha-0123456789abcdefghijklmnopqrstuvwxyz0123 + livenessProbe: + failureThreshold: 6 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 5 + name: recherche-entreprises-search + ports: + - containerPort: 3000 + name: http + readinessProbe: + failureThreshold: 15 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 0 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 16Mi + startupProbe: + failureThreshold: 12 + httpGet: + path: /healthz + port: http + periodSeconds: 5 + env: + - name: ELASTICSEARCH_INDEX_NAME + value: search-entreprises +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-search + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises-mybranch +spec: + ports: + - name: http + port: 80 + targetPort: 3000 + selector: + app: recherche-entreprises-search + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-search + namespace: recherche-entreprises-mybranch +spec: + rules: + - host: search-recherche-entreprises-mybranch.dev2.fabrique.social.gouv.fr + http: + paths: + - backend: + service: + name: recherche-entreprises-search + port: + name: http + path: / + pathType: Prefix + tls: + - hosts: + - search-recherche-entreprises-mybranch.dev2.fabrique.social.gouv.fr + secretName: wildcard-crt +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-front + namespace: recherche-entreprises-mybranch +spec: + replicas: 1 + selector: + matchLabels: + app: recherche-entreprises-front + template: + metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + spec: + containers: + - image: >- + ghcr.io/socialgouv/recherche-entreprises/front:sha-0123456789abcdefghijklmnopqrstuvwxyz0123 + livenessProbe: + failureThreshold: 6 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 5 + name: recherche-entreprises-front + ports: + - containerPort: 80 + name: http + readinessProbe: + failureThreshold: 15 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 0 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 16Mi + startupProbe: + failureThreshold: 12 + httpGet: + path: /healthz + port: http + periodSeconds: 5 + env: + - name: REACT_APP_API_URL + value: >- + https://search-recherche-entreprises-mybranch.dev2.fabrique.social.gouv.fr/api/v1 +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-front + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises-mybranch +spec: + ports: + - name: http + port: 80 + targetPort: 80 + selector: + app: recherche-entreprises-front + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-front + namespace: recherche-entreprises-mybranch +spec: + rules: + - host: recherche-entreprises-mybranch.dev2.fabrique.social.gouv.fr + http: + paths: + - backend: + service: + name: recherche-entreprises-front + port: + name: http + path: / + pathType: Prefix + tls: + - hosts: + - recherche-entreprises-mybranch.dev2.fabrique.social.gouv.fr secretName: wildcard-crt " `; diff --git a/.k8s/__tests__/__snapshots__/indexing-dev.ts.snap b/.k8s/__tests__/__snapshots__/indexing-dev.ts.snap index afce1fe8..316ad409 100644 --- a/.k8s/__tests__/__snapshots__/indexing-dev.ts.snap +++ b/.k8s/__tests__/__snapshots__/indexing-dev.ts.snap @@ -8,33 +8,37 @@ metadata: name: elastic-recherche-entreprises-write annotations: sealedsecrets.bitnami.com/cluster-wide: 'true' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-master-dev42 + namespace: recherche-entreprises-mybranch spec: encryptedData: ELASTICSEARCH_URL: >- - AgDGWiSE5kUv0Mv4Dot8KfbSGm5XVlyi4V9yLiMI97deZctFWRd2T31MtTUz/XgT4VlFH4NUhK+dQonLEILE2P9rs4MMDaY09cc3NZiDLB9GJoBcWnWsa02QV4Xykk8UthTHQPFC4g+0ElFU4UewbIc3Zc2ZqIk5Y803TnYkRuAF1bM1ZdpWGnm+yh7+lXnbRMmFo62vhPVB8lnl9Z/RVxhs1jx6xnbIFyTjvelM4/sySiyHZ0h74fL61DAvOcNBgCTd49RRBBzcAhp6a8KL45FL1eAYyOqS8vW5uNeFu59Cu9ByRruhyTELjpPtLesjsBUHiqmfBh09esKw+mdtMmfYxIsy+JGPS50N8GiNj+n0A5XXtLQkm9oT884r4LEQip35YHyenh20NmueHX8RGMOXRT05PWnbL70NtZHqUwzFdc21vdFhXAb/MVaMgqcKG4RPkRS+qljn+pQjJ8DkY0EqgWOCfSyjB+jKJ65peSqqORq+B8zTDgpv9ZaN+xSDsfi+PckPPJqKOjaTkQW/VdfwpT6ipOUpNgznUOGy7XN9vxAQR9P79n6KlIyMsme4hN7OHX5ko0xWQZcCyfHbSbT7JHY5UpmTv+c0NtyrV755FQN75HDFRhQYHtgLnhR9PuBCZ8Gvtg2T/Nr63GsTMC5e52bj2zIRlOUTrsvxEIYKQoBPILoueEIcMz64/UiMAZ9RwH308wRHOrZtch5gclzUQWZEgI5Vx2cJQ/zr9iqBaedg2v8GqvESeKw1e7pgXoLE6pFKJtQa7cMwQgmrBkkxrEfePcRDP4yAMVQaiM8DoqIjVg== + AgCxK1MqqVkCOHvMjjcieUsj1uE/ATUzAfYTYVEMqcWFxTnugOaYlXB2mTuZtzdjXNvINREzDynnVRBghPIJvrJm59vPgexC5FyQptdPf2XwQAG6n0yMLWAEBM+O8ieO1D6vBu6BYuq4QYoItq+SMTE6APNX+p6wi4/LP04cR8p8bVKNxckPMfyVfQGhUSTD+2gC5ow2x1Mx67odqqP3E8eK9tm5tU6ltzOpKqJKITUQIqTQbSKFr8p77DE4IrdbPn8MpvdP+VrT+ztF4thPkpVpuYWy9rABdQKeEuNdN2AuvmPeJs/2B80iApIgtWWQjV/iG+WlLGMohvWDZVsBGuBuYGzhjJlr5w8OljwKDihb2cG1M/Ot/LM1xRuCC85wvqH0oMhnnP/3DZts5aYAN+qjAezOkPT9kjT+gu0WoQB+E5r9ITcYYHYbJw/YSxS2z86Ul0amRRW4ovAWT5c/nFDZU6i74I8YyVh8oOtrieZgNF4ajJcIr5YpjT2LICKoZR0KExrQ4V30zZJ+wZcvd8W3e/KCmKsFxloTkLq/sjEiFQoe6ffYTQhSUXZynqrdUKZsdoSleprfctlJtf3rzxt5hZIpcLulXrYk7pPPiHkTcV3Zc4zkN3BFE1A9vsgTvh6xTXlvUekJHHMXU3t8ZyZD8vvemxv0FE8G33XWQF2DEkmyc3gXMlfPDLho4fHv0h0iId0c9klFkOpmp8REFWJyuRQjq+hOHvfoCSu1ymsCwA0CMAguJoX1XEhn0aA9QIgf0VyBe/cVJwu/GfW74UBi6x5feqzFovYIUXbDP6kW ELASTICSEARCH_API_KEY: >- - AgDE1F8SFMKtRpPd9BKozxXC5YcwVae1SRc+ECX/9EUhLSaqxGXenqVbtE2tj6kovFcru8sSMgPQZIOGDel4QIpwGC9HozNQWGXkEf9AABjulJ82hwlyo/22XOYodqAs3FuZc0tiodEK6+GUw9KKVf6I2P3Y55AzwfZNuar319LG1H0OmkvJbaXjkQwuELjBeU0OG1hPSf3LSIX4F6hc9JlunyDgWFT+vqe7FN+0Q/BmRgbY9SJ4ZqD+TZxzLfKCmWAtGFWW2eFf5jJDmuXp5A4QK+jSd1vonJGjFn16fEgA32tOnBxaA+7TpcBqVq/oOqYvBckuXlDUrBIXkfKRK/FqjvFCxtkhpzrYRe6FFntA6MFmnYvzhsUEoIOSrn3O8Unlaae//am/sukctTkqTum4IF0316k5UwUjcEsd/1qTFM84l3mcPBeU656t1B0sAaTK5JEPSOkJvz1nk9Ei1RUXPJCVTp3i64hia5OeWgEaTWUsP1udhMhC1eGSt0tf5QtNnnDlpASebmI7oubuSiOLveXg/+RcHNXTztrbc3xSNAUIML8GTXNbA4TJXorvZKdecvVbmtsbH7u+4awna7J+SuAysKNdWdTeMsT0uMdFA0o4fez+PQVXc9+IF0DqU0T8EDeLfz62663aW1R7rQ2KEjK/2Vvy8W6EUjXIdY1DEPv1BmHpI9xRJJ8P8sTucaXBmohjPJb1lXzh+/9AmeIRhy/mDJTnCox/UOfyRrpx+FcU/Vi6TvJvV8ab7JZbufOVmo1iEPnP3Wx3rcQ= + AgANlBsYYlxFPxOSitnqdZ4LhP822km9ijYFc46pkpkSKoYGRHNm7gbpaPTRNWJ4u647IYI8Hl8E63GrtACnhntEIvJlHTE8ZwOw7kIA7NuPj/xma4nN3RTlazH8dmaGskd/wCD8mDZS2R3MhXxqSy6DVGbNJyRAmrgLnzCOgp5/aJkQAC3sZ9GOce0x+X7ngd+qKrOzf+CZS/hnf4/aQvEj6Caq141nl74jpuwpfjb3PWKl9aATJocZkyNw92ITRmfnGu5oE0VIf+BJe8tyBvLN7dcPQ5txBEfDh/XBX3eCwpAJMeyN6XHioJKX9BbN1rCG5lt8jAurB6WAxeAVtt5maytoX9aYAWZMiK7FQ3HubkrDHvDPBJyaeyzLWgcXeox077xiCti7BaKssVO2itM6BO9gMyvC2hwnw5LjtItj1mY25wGRmMz5B8/9vMYMZ6Xmjf3DaczAUwL3RcW3NYqjbUc03pz45uAavC2ImNXtIBIoaaXh0T0rX6ZcQaJZxpax29KydecKDoDnkkx3owpVZoZryebXgG/sW34ddaoHsLwgKwBdxVoNO+L04g46JdXoJclPA/7GVPDz0voMCHIZymrBfRPf/xdqtq5w0EdiIAf7d6GogOjaUU9ujKMJJKw//nu3RmYziVWcaAHzkgKJZahRe+k2Kb8o+YsGFPFYoR936GAnJRnLn8sEMppuP2tHrij6SN/jSRbJll0HgsA/BKzPDhwSu0tiDt81HDZdxXusre5MRNzyiESOoKGSq7hc7ABHCIHSWyhbVNU= template: metadata: annotations: sealedsecrets.bitnami.com/cluster-wide: 'true' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' name: elastic-recherche-entreprises-write labels: - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard @@ -43,43 +47,89 @@ spec: apiVersion: v1 kind: ConfigMap data: - get-data.sh: > + sqlite.sh: > #!/bin/bash - # borrowed from annuaire entreprise : + # exit when any command fails - # https://github.com/etalab/api-annuaire-entreprises/tree/master/db/init + set -e - geodir=\${DATA_DIR}/geo + # download files, convert to SQLite and export to CSV - mkdir -p $geodir + DATA_DIR=\${DATA_DIR:-\\"./data\\"} + + + mkdir -p \\"$DATA_DIR\\" || true + + + echo \\"-- Working in $(dirname \\"$0\\")\\" + + cd \\"$(dirname \\"$0\\")\\" || exit echo \\"-- Download datasets\\" - for d in \`seq -w 1 19\` 2A 2B \`seq 21 74\` \`seq 76 95\` 98 \\"\\"; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_$d.csv.gz + + if command -v apt-get &> /dev/null + + then + apt-get update -y + fi + + + # install sqlite3 if not exists + + if ! command -v sqlite3 &> /dev/null + + then + echo \\"sqlite3 could not be found\\" + apt-get install -y sqlite3 + fi + + # install wget if not exists + + if ! command -v wget &> /dev/null + + then + echo \\"wget could not be found\\" + apt-get install -y wget + fi + + # install unzip if not exists + + if ! command -v unzip &> /dev/null + + then + echo \\"unzip could not be found\\" + apt-get install -y unzip + fi + + + # geo siret par département + + for d in $(seq -w 1 19) 2A 2B $(seq 21 74) $(seq 76 95) 98 \\"\\"; do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_$d.csv.gz\\" done - #Cas particulier Paris + # Cas particulier Paris 75101-75120 - for d in \`seq -w 1 20\`; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_751$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_751$d.csv.gz + for d in $(seq -w 1 20); do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_751$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_751$d.csv.gz\\" done - #Cas particulier DOM + # Cas particulier DOM 971->978 - for d in \`seq -w 1 8\`; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_97$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_97$d.csv.gz + for d in $(seq -w 1 8); do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_97$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_97$d.csv.gz\\" done @@ -87,224 +137,256 @@ data: wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/insee-sirene/StockUniteLegale_utf8.zip - --directory-prefix=$DATA_DIR + --directory-prefix=\\"$DATA_DIR\\" + + unzip \\"\${DATA_DIR}/StockUniteLegale_utf8.zip\\" -d \\"\${DATA_DIR}\\" # WEEZ data wget --progress=bar:force:noscroll -q --show-progress https://www.data.gouv.fr/fr/datasets/r/a785345a-6e8c-4961-ae0a-bc00878e4f2e - -O \${DATA_DIR}/WEEZ.csv - assemble_data.py: | - \\"\\"\\"CDTN Entreprises data assembler - - This script assembles data from different places and creates a new file that - will be used as source for our search index. - - \\"\\"\\" - import argparse - import pandas as pd - import numpy as np - from os import listdir - from os.path import isfile, join - - - def read_siren(stock_unite_legale_file): - \\"\\"\\" Read SIREN Stock Unite Legale - - Parameters - ---------- - stock_unite_legale_file: str - The location of the CSV or ZIP file - - Returns - ------- - employeurs - a Pandas dataframe containing the list of all companies that are still open - and employ people - \\"\\"\\" - trancheEffectifsUniteLegale = \\"trancheEffectifsUniteLegale\\" - categorieJuridiqueUniteLegale = \\"categorieJuridiqueUniteLegale\\" - nomenclatureActivitePrincipaleUniteLegale = \\"nomenclatureActivitePrincipaleUniteLegale\\" - categorieEntreprise = \\"categorieEntreprise\\" - activitePrincipaleUniteLegale = \\"activitePrincipaleUniteLegale\\" - - selection = [\\"siren\\", \\"sigleUniteLegale\\", \\"nomUniteLegale\\", \\"nomUsageUniteLegale\\", - 'denominationUniteLegale', \\"denominationUsuelle1UniteLegale\\", \\"denominationUsuelle2UniteLegale\\", - \\"denominationUsuelle3UniteLegale\\", activitePrincipaleUniteLegale, - trancheEffectifsUniteLegale, categorieJuridiqueUniteLegale, - nomenclatureActivitePrincipaleUniteLegale, categorieEntreprise] - - etatAdmin = \\"etatAdministratifUniteLegale\\" - caractereEmployeur = \\"caractereEmployeurUniteLegale\\" - - # we only select columns in use and convert to categorical dtype - # in order to decrease the dataframe memory footprint - cols = selection + [etatAdmin, caractereEmployeur] - raw = pd.read_csv(stock_unite_legale_file, usecols=cols, - dtype={ \\"siren\\": np.dtype(str), etatAdmin: \\"category\\", caractereEmployeur: \\"category\\", - trancheEffectifsUniteLegale: \\"category\\", - categorieJuridiqueUniteLegale: \\"category\\", - nomenclatureActivitePrincipaleUniteLegale: \\"category\\", - activitePrincipaleUniteLegale: \\"category\\", - categorieEntreprise: \\"category\\"}, ) - - is_ouvert = raw[etatAdmin] == \\"A\\" - is_employeur = raw[caractereEmployeur] == \\"O\\" - is_admin = raw[etatAdmin] == \\"A\\" - - employeurs = raw[is_ouvert & is_employeur & is_admin] - - return employeurs[selection] - - - def read_geo(geo_directory): - \\"\\"\\" Read GEO data - - Parameters - ---------- - geo_directory: str - The directory containing geo data for all regions - - Returns - ------- - all_geo - a Pandas dataframe containing geo information for all open companies - \\"\\"\\" - geo_files = [f for f in listdir( - geo_directory) if isfile(join(geo_directory, f))] - geo_selection = [\\"enseigne1Etablissement\\", \\"enseigne2Etablissement\\", \\"enseigne3Etablissement\\", \\"denominationUsuelleEtablissement\\", \\"activitePrincipaleEtablissement\\", - 'siren', 'siret', 'codePostalEtablissement', 'libelleCommuneEtablissement', \\"etatAdministratifEtablissement\\", \\"geo_adresse\\"] - geo = {} - for file in geo_files: - geo[file] = pd.read_csv( - geo_directory + file, dtype={\\"codePostalEtablissement\\": np.dtype(str), - \\"etatAdministratifEtablissement\\": \\"category\\", - \\"activitePrincipaleEtablissement\\": \\"category\\", - \\"siret\\": np.dtype(str), - \\"siren\\": np.dtype(str), - }, usecols=geo_selection - ) - - all_geo = pd.concat(geo.values(), ignore_index=True).dropna( - subset=['siret']) - - all_geo = all_geo.astype(dtype={\\"codePostalEtablissement\\": np.dtype(str), - \\"etatAdministratifEtablissement\\": \\"category\\", - \\"activitePrincipaleEtablissement\\": \\"category\\", - \\"siret\\": np.dtype(str), - \\"siren\\": np.dtype(str), - }) - - all_geo = all_geo[all_geo[\\"etatAdministratifEtablissement\\"] == \\"A\\"] - - return all_geo - - - def read_idcc(idcc_file): - \\"\\"\\" Read IDCC data - - Parameters - ---------- - idcc_file: str - The location of the CSV file containing associations between companies and their \\"convention collectives\\", (aka WEEZ) - - Returns - ------- - idccs - a Pandas dataframe containing siret / idcc associations - \\"\\"\\" - idccs = pd.read_csv(idcc_file, dtype={\\"SIRET\\": np.dtype(str)}, usecols=[\\"SIRET\\", \\"IDCC\\"]).rename( - columns={\\"SIRET\\": \\"siret\\", \\"IDCC\\": \\"idcc\\"}) - - return idccs - - - def assemble(siren, geo, idcc, output): - sirenGeo = pd.merge(siren, geo, on='siren') - merged = pd.merge(sirenGeo, idcc, how='left', on='siret') - - # add etablissement counts - etsCounts = merged.siren.value_counts().rename_axis( - 'siren').reset_index(name='etablissements') - withEts = pd.merge(merged, etsCounts, on='siren') - - # persits as CSV file - withEts.astype({'idcc': 'Int64'}).to_csv(output) - - - def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - 'siren_file', - type=str, - help=\\"Location of the StockUniteLegale CSV or ZIP file\\" - ) - parser.add_argument( - 'geo_directory', - type=str, - help=\\"Location of the directory containing all the Geo CSV files\\" - ) - parser.add_argument( - 'idcc_file', - type=str, - help=\\"Location of the siret/idcc CSV file (aka WEEZ)\\" - ) - parser.add_argument( - 'output_file', - type=str, - help=\\"Location of the output file\\" - ) - - args = parser.parse_args() - - print(\\"Read SIREN data\\") - siren = read_siren(args.siren_file) - - print(\\"Read GEO data\\") - geo = read_geo(args.geo_directory) - - print(\\"Read IDCC data\\") - idcc = read_idcc(args.idcc_file) - - print(\\"Assemble datasets\\") - assemble(siren, geo, idcc, args.output_file) - - - if __name__ == \\"__main__\\": - main() - requirements.txt: | - numpy - pandas + -O \\"\${DATA_DIR}/WEEZ.csv\\" + + + echo \\"-- Import CSV datasets to sqlite\\" + + + sqlite3 -echo \\"\${DATA_DIR}/db.sqlite\\" \\".read import.sql\\" + + + echo \\"-- Export sqlite data to \${DATA_DIR}/assembly.csv\\" + + + sqlite3 -header -csv \\"\${DATA_DIR}/db.sqlite\\" \\".read export.sql\\" > + \\"\${DATA_DIR}/assembly.csv\\" + import.sql: |- + --- sqlite3 -echo db.sqlite \\".read import.sql\\" + --- + --- import local CSVs to sqlite database + --- + + PRAGMA synchronous = OFF; + + DROP TABLE IF EXISTS weez; + DROP TABLE IF EXISTS geo_siret; + DROP TABLE IF EXISTS stock; + + .mode csv + .import data/WEEZ.csv weez + .import data/StockUniteLegale_utf8.csv stock + .import data/geo_siret_.csv geo_siret + .import data/geo_siret_01.csv geo_siret + .import data/geo_siret_02.csv geo_siret + .import data/geo_siret_03.csv geo_siret + .import data/geo_siret_04.csv geo_siret + .import data/geo_siret_05.csv geo_siret + .import data/geo_siret_06.csv geo_siret + .import data/geo_siret_07.csv geo_siret + .import data/geo_siret_08.csv geo_siret + .import data/geo_siret_09.csv geo_siret + .import data/geo_siret_10.csv geo_siret + .import data/geo_siret_11.csv geo_siret + .import data/geo_siret_12.csv geo_siret + .import data/geo_siret_13.csv geo_siret + .import data/geo_siret_14.csv geo_siret + .import data/geo_siret_15.csv geo_siret + .import data/geo_siret_16.csv geo_siret + .import data/geo_siret_17.csv geo_siret + .import data/geo_siret_18.csv geo_siret + .import data/geo_siret_19.csv geo_siret + .import data/geo_siret_21.csv geo_siret + .import data/geo_siret_22.csv geo_siret + .import data/geo_siret_23.csv geo_siret + .import data/geo_siret_24.csv geo_siret + .import data/geo_siret_25.csv geo_siret + .import data/geo_siret_26.csv geo_siret + .import data/geo_siret_27.csv geo_siret + .import data/geo_siret_28.csv geo_siret + .import data/geo_siret_29.csv geo_siret + .import data/geo_siret_2A.csv geo_siret + .import data/geo_siret_2B.csv geo_siret + .import data/geo_siret_30.csv geo_siret + .import data/geo_siret_31.csv geo_siret + .import data/geo_siret_32.csv geo_siret + .import data/geo_siret_33.csv geo_siret + .import data/geo_siret_34.csv geo_siret + .import data/geo_siret_35.csv geo_siret + .import data/geo_siret_36.csv geo_siret + .import data/geo_siret_37.csv geo_siret + .import data/geo_siret_38.csv geo_siret + .import data/geo_siret_39.csv geo_siret + .import data/geo_siret_40.csv geo_siret + .import data/geo_siret_41.csv geo_siret + .import data/geo_siret_42.csv geo_siret + .import data/geo_siret_43.csv geo_siret + .import data/geo_siret_44.csv geo_siret + .import data/geo_siret_45.csv geo_siret + .import data/geo_siret_46.csv geo_siret + .import data/geo_siret_47.csv geo_siret + .import data/geo_siret_48.csv geo_siret + .import data/geo_siret_49.csv geo_siret + .import data/geo_siret_50.csv geo_siret + .import data/geo_siret_51.csv geo_siret + .import data/geo_siret_52.csv geo_siret + .import data/geo_siret_53.csv geo_siret + .import data/geo_siret_54.csv geo_siret + .import data/geo_siret_55.csv geo_siret + .import data/geo_siret_56.csv geo_siret + .import data/geo_siret_57.csv geo_siret + .import data/geo_siret_58.csv geo_siret + .import data/geo_siret_59.csv geo_siret + .import data/geo_siret_60.csv geo_siret + .import data/geo_siret_61.csv geo_siret + .import data/geo_siret_62.csv geo_siret + .import data/geo_siret_63.csv geo_siret + .import data/geo_siret_64.csv geo_siret + .import data/geo_siret_65.csv geo_siret + .import data/geo_siret_66.csv geo_siret + .import data/geo_siret_67.csv geo_siret + .import data/geo_siret_68.csv geo_siret + .import data/geo_siret_69.csv geo_siret + .import data/geo_siret_70.csv geo_siret + .import data/geo_siret_71.csv geo_siret + .import data/geo_siret_72.csv geo_siret + .import data/geo_siret_73.csv geo_siret + .import data/geo_siret_74.csv geo_siret + .import data/geo_siret_75101.csv geo_siret + .import data/geo_siret_75102.csv geo_siret + .import data/geo_siret_75103.csv geo_siret + .import data/geo_siret_75104.csv geo_siret + .import data/geo_siret_75105.csv geo_siret + .import data/geo_siret_75106.csv geo_siret + .import data/geo_siret_75107.csv geo_siret + .import data/geo_siret_75108.csv geo_siret + .import data/geo_siret_75109.csv geo_siret + .import data/geo_siret_75110.csv geo_siret + .import data/geo_siret_75111.csv geo_siret + .import data/geo_siret_75112.csv geo_siret + .import data/geo_siret_75113.csv geo_siret + .import data/geo_siret_75114.csv geo_siret + .import data/geo_siret_75115.csv geo_siret + .import data/geo_siret_75116.csv geo_siret + .import data/geo_siret_75117.csv geo_siret + .import data/geo_siret_75118.csv geo_siret + .import data/geo_siret_75119.csv geo_siret + .import data/geo_siret_75120.csv geo_siret + .import data/geo_siret_76.csv geo_siret + .import data/geo_siret_77.csv geo_siret + .import data/geo_siret_78.csv geo_siret + .import data/geo_siret_79.csv geo_siret + .import data/geo_siret_80.csv geo_siret + .import data/geo_siret_81.csv geo_siret + .import data/geo_siret_82.csv geo_siret + .import data/geo_siret_83.csv geo_siret + .import data/geo_siret_84.csv geo_siret + .import data/geo_siret_85.csv geo_siret + .import data/geo_siret_86.csv geo_siret + .import data/geo_siret_87.csv geo_siret + .import data/geo_siret_88.csv geo_siret + .import data/geo_siret_89.csv geo_siret + .import data/geo_siret_90.csv geo_siret + .import data/geo_siret_91.csv geo_siret + .import data/geo_siret_92.csv geo_siret + .import data/geo_siret_93.csv geo_siret + .import data/geo_siret_94.csv geo_siret + .import data/geo_siret_95.csv geo_siret + .import data/geo_siret_971.csv geo_siret + .import data/geo_siret_972.csv geo_siret + .import data/geo_siret_973.csv geo_siret + .import data/geo_siret_974.csv geo_siret + .import data/geo_siret_975.csv geo_siret + .import data/geo_siret_976.csv geo_siret + .import data/geo_siret_977.csv geo_siret + .import data/geo_siret_978.csv geo_siret + .import data/geo_siret_98.csv geo_siret + + CREATE INDEX 'geo_siret_idx' ON 'geo_siret' ('siret'); + CREATE INDEX 'geo_siren_idx' ON 'geo_siret' ('siren'); + CREATE INDEX 'weez_siret_idx' ON 'weez' ('SIRET'); + CREATE INDEX 'stock_siren_idx' ON 'stock' ('siren'); + + SELECT \\"weez\\", count(*) from weez; + SELECT \\"stock\\", count(*) from stock; + SELECT \\"geo_siret\\", count(*) from geo_siret; + export.sql: |+ + --- Output index data + --- sqlite3 -header -csv db.sqlite \\".read export.sql\\" > output.csv + + PRAGMA synchronous = OFF; + + SELECT + stock.siren, + stock.sigleUniteLegale, + stock.nomUniteLegale, + stock.prenom1UniteLegale, + stock.nomUsageUniteLegale, + stock.denominationUniteLegale, + stock.denominationUsuelle1UniteLegale, + stock.denominationUsuelle2UniteLegale, + stock.denominationUsuelle3UniteLegale, + stock.activitePrincipaleUniteLegale, + stock.trancheEffectifsUniteLegale, + stock.categorieJuridiqueUniteLegale, + stock.nomenclatureActivitePrincipaleUniteLegale, + stock.categorieEntreprise, + stock.etatAdministratifUniteLegale, + stock.caractereEmployeurUniteLegale, + geo_siret.siret, + geo_siret.codePostalEtablissement, + geo_siret.libelleCommuneEtablissement, + geo_siret.etatAdministratifEtablissement, + geo_siret.enseigne1Etablissement, + geo_siret.enseigne2Etablissement, + geo_siret.enseigne3Etablissement, + geo_siret.denominationUsuelleEtablissement, + geo_siret.activitePrincipaleEtablissement, + geo_siret.geo_adresse, + geo_siret.complementAdresseEtablissement, + geo_siret.numeroVoieEtablissement, + geo_siret.indiceRepetitionEtablissement, + geo_siret.typeVoieEtablissement, + geo_siret.libelleVoieEtablissement, + weez.IDCC as idcc, + (select count(*) FROM geo_siret where siren=stock.siren) etablissements + from stock, geo_siret + left join weez on weez.SIRET=geo_siret.siret + where stock.siren=geo_siret.siren; + + metadata: - name: config-map-files-0123456 + name: config-map-files-0123456789abcdefghijklmnopqrstuvwxyz0123 annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-master-dev42 + namespace: recherche-entreprises-mybranch --- apiVersion: batch/v1 kind: Job metadata: - name: update-index-0123456 + name: update-index-0123456789abcdefghijklmnopqrstuvwxyz0123 annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-master-dev42 + namespace: recherche-entreprises-mybranch spec: backoffLimit: 3 template: @@ -312,78 +394,66 @@ spec: containers: - name: update-index image: >- - harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-index:0123456789abcdefghijklmnopqrstuvwxyz0123 + ghcr.io/socialgouv/recherche-entreprises/index:sha-0123456789abcdefghijklmnopqrstuvwxyz0123 volumeMounts: - name: data mountPath: /data env: - name: ASSEMBLY_FILE value: /data/assembly.csv + - name: ELASTICSEARCH_INDEX_NAME + value: search-entreprises envFrom: - secretRef: name: elastic-recherche-entreprises-write resources: limits: - cpu: '2' - memory: 18Gi + cpu: '4' + memory: 5Gi requests: - cpu: '1' - memory: 14Gi + cpu: '2' + memory: 2Gi restartPolicy: Never volumes: - name: data emptyDir: {} - configMap: - name: config-map-files-0123456 + name: config-map-files-0123456789abcdefghijklmnopqrstuvwxyz0123 defaultMode: 511 name: local-files initContainers: - args: - - '-c' - - > - - apt-get update -y && apt-get install -y wget - - - export DATA_DIR=\\"/data\\" - - - cd /data - - - echo \\"running get-data.sh...\\" - - - /mnt/scripts/get-data.sh - - - pip3 install -r /mnt/scripts/requirements.txt - - - echo \\"running assemble_data.py...\\" - - - python3 /mnt/scripts/assemble_data.py - $DATA_DIR/StockUniteLegale_utf8.zip $DATA_DIR/geo/ - $DATA_DIR/WEEZ.csv $DATA_DIR/assembly.csv + - /mnt/scripts/sqlite.sh command: - sh - image: python:3.9.4 + image: ubuntu:18.04 imagePullPolicy: Always - name: download-data + name: download-build-data + env: + - name: DATA_DIR + value: /mnt/scripts/data + resources: + limits: + cpu: '4' + memory: 2Gi + requests: + cpu: '2' + memory: 1Gi volumeMounts: - name: data - mountPath: /data + mountPath: /mnt/scripts/data - mountPath: /mnt/scripts name: local-files metadata: annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: master-dev42 - app.gitlab.com/env.name: master-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/heads/mybranch + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: master-dev42-recherche-entreprises - component: master-dev42-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard diff --git a/.k8s/__tests__/__snapshots__/indexing-preprod.ts.snap b/.k8s/__tests__/__snapshots__/indexing-preprod.ts.snap index da5138d9..ee2511cc 100644 --- a/.k8s/__tests__/__snapshots__/indexing-preprod.ts.snap +++ b/.k8s/__tests__/__snapshots__/indexing-preprod.ts.snap @@ -7,32 +7,36 @@ kind: SealedSecret metadata: name: elastic-recherche-entreprises-write annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod spec: encryptedData: ELASTICSEARCH_URL: >- - AgB82d7LnDA/6RxRlks/7cRtmWuz6Ql1p4gCwdghu3X85Ek4FqSL6ui1tIBiuM1pZcaiwM6ZizsKA0ofq5iBafUwRQOzTFc3XAMy7XltrymG/QwBRmYKS4w4Ub1DPuYpVxEUC6Jngyex7OvhCKUK7pugjG6Q8FXO6i9iyVVEpKnAcSVLaUe+olmlOrO2RMjIK3mgKX/xOFT+2FYiN5/LJob+w/+p0hPlZaMsLrLOl/i5N4LuI5ckg+FawifD2MnN057fsLbwt0m63g7ZHvXtGT66tbTcQgpWfy5kLe2m7oIbzdk+oPoh4FS8PnyU6nMC8sOkC3v/GUMK91qCas01RBoyPRTTs11yX3gbYHti5Nc3zDt36YHPhrqfRQHQ8xONYkx5SkAylnDr1JoXyfrKDwZUBvLQ6Xh3gGI7qu909LxZ2ryWd9WRslpB1+8bOLN0tV20slAesGYFC/W6e5GT0AhwWqwJ9usGLf0dM7GE+IXJegIlconcM+2x/FW3RQ5XnK5kI/coiha5pxBkK0p8pbwmLnOwH5c2QoBD5xgLCZ3wleMcbTWdSynzm/LSYkWZzL15M3dy4m8c+qXc88LCbAHTZ2UaAH/XF7pcuMvOF33ZesjgaWLumFoUvhtaG1gZN97eU2/K1xLwo+x/vt06P6vUbU2Emj9cEziTaQqx1ZPUI4Hsp5ZNpbAGRsLMhCf7G7YvxZkpxh+7GAKvW1JmhF1DYgBapCen2bxQN/XK/pNniL89T0hiKSwh/Fo8yDOnqAGova3T1Nq6fLFcRYNxtKFVlsn5zsUgyVPCtyn+cUKIFw== + AgCxK1MqqVkCOHvMjjcieUsj1uE/ATUzAfYTYVEMqcWFxTnugOaYlXB2mTuZtzdjXNvINREzDynnVRBghPIJvrJm59vPgexC5FyQptdPf2XwQAG6n0yMLWAEBM+O8ieO1D6vBu6BYuq4QYoItq+SMTE6APNX+p6wi4/LP04cR8p8bVKNxckPMfyVfQGhUSTD+2gC5ow2x1Mx67odqqP3E8eK9tm5tU6ltzOpKqJKITUQIqTQbSKFr8p77DE4IrdbPn8MpvdP+VrT+ztF4thPkpVpuYWy9rABdQKeEuNdN2AuvmPeJs/2B80iApIgtWWQjV/iG+WlLGMohvWDZVsBGuBuYGzhjJlr5w8OljwKDihb2cG1M/Ot/LM1xRuCC85wvqH0oMhnnP/3DZts5aYAN+qjAezOkPT9kjT+gu0WoQB+E5r9ITcYYHYbJw/YSxS2z86Ul0amRRW4ovAWT5c/nFDZU6i74I8YyVh8oOtrieZgNF4ajJcIr5YpjT2LICKoZR0KExrQ4V30zZJ+wZcvd8W3e/KCmKsFxloTkLq/sjEiFQoe6ffYTQhSUXZynqrdUKZsdoSleprfctlJtf3rzxt5hZIpcLulXrYk7pPPiHkTcV3Zc4zkN3BFE1A9vsgTvh6xTXlvUekJHHMXU3t8ZyZD8vvemxv0FE8G33XWQF2DEkmyc3gXMlfPDLho4fHv0h0iId0c9klFkOpmp8REFWJyuRQjq+hOHvfoCSu1ymsCwA0CMAguJoX1XEhn0aA9QIgf0VyBe/cVJwu/GfW74UBi6x5feqzFovYIUXbDP6kW ELASTICSEARCH_API_KEY: >- - AgCWc+7OMggEJuNUXxL8CufezAMvJaT3svLkFi9KiKost/avFQNBoXDGixl7vERoCDVvDxnPlcn7m7MIjveP5oIfewhJzYQua9WmQrSMTab1soqUIjMHWj+2A8y0qSYg4s21w/X6bc+H0/O4+ax0QNVtm/MWc30vnCOF+uVibM2WcDkSY/FGE2bE7hcmQWeDbsmnRxDaZTY10ME2ycZpv9eAMoXshCQ98k4LFT9DM0D51az4BibLJjUtsW/vMn/FNKv9/teOGiCAFE/pfoyeZ4QN7ZsoFzCSHwj++Rd0hWScX8VCvbS5pgmfioz8SHmQFrxrN994CAPlr2Rw7mxfkzSETrDgzQVmzKD+j49PZL6cz77cCh9DGE70Pco91szWbsaUQ/lcWUTlFm39Z0Xmf6uV1eCSbyPcM3ogMx95Rjb3MnGa2zrt4OPLMMA2YfyqMtrrVTcq2aJ4fGvWAmVZHH3k2QMcPiYLGHQ8gnVsjI6LYGJEM27RVj99/ppZpoPWeims00fBHMclr3/4czfQCBKcr8GFouxfcKVYGJ4gRNdH0Qn5gZIPXOOiRKUVS0ik0zuT1Xu9u9kU6Df2RVOQawiIOy14+5RrMqt0li1PuCFZjRTqbsLGogOzJQfaXKOafNjxswUqXSn4gR3pVBmm0tPJx+9iFdSrkavvGBrQeSDrNzY5UjeZwIgz9hveUUe1dxARBK8iXu0Q8P6W+VnFActyQs0gnoc4Zzb3jJNfxgDWv1LF/MK22b5G2YcZ3bk5LSsGxP1KICJ0J1MMSL8= + AgANlBsYYlxFPxOSitnqdZ4LhP822km9ijYFc46pkpkSKoYGRHNm7gbpaPTRNWJ4u647IYI8Hl8E63GrtACnhntEIvJlHTE8ZwOw7kIA7NuPj/xma4nN3RTlazH8dmaGskd/wCD8mDZS2R3MhXxqSy6DVGbNJyRAmrgLnzCOgp5/aJkQAC3sZ9GOce0x+X7ngd+qKrOzf+CZS/hnf4/aQvEj6Caq141nl74jpuwpfjb3PWKl9aATJocZkyNw92ITRmfnGu5oE0VIf+BJe8tyBvLN7dcPQ5txBEfDh/XBX3eCwpAJMeyN6XHioJKX9BbN1rCG5lt8jAurB6WAxeAVtt5maytoX9aYAWZMiK7FQ3HubkrDHvDPBJyaeyzLWgcXeox077xiCti7BaKssVO2itM6BO9gMyvC2hwnw5LjtItj1mY25wGRmMz5B8/9vMYMZ6Xmjf3DaczAUwL3RcW3NYqjbUc03pz45uAavC2ImNXtIBIoaaXh0T0rX6ZcQaJZxpax29KydecKDoDnkkx3owpVZoZryebXgG/sW34ddaoHsLwgKwBdxVoNO+L04g46JdXoJclPA/7GVPDz0voMCHIZymrBfRPf/xdqtq5w0EdiIAf7d6GogOjaUU9ujKMJJKw//nu3RmYziVWcaAHzkgKJZahRe+k2Kb8o+YsGFPFYoR936GAnJRnLn8sEMppuP2tHrij6SN/jSRbJll0HgsA/BKzPDhwSu0tiDt81HDZdxXusre5MRNzyiESOoKGSq7hc7ABHCIHSWyhbVNU= template: metadata: annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' name: elastic-recherche-entreprises-write labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard @@ -41,43 +45,89 @@ spec: apiVersion: v1 kind: ConfigMap data: - get-data.sh: > + sqlite.sh: > #!/bin/bash - # borrowed from annuaire entreprise : + # exit when any command fails - # https://github.com/etalab/api-annuaire-entreprises/tree/master/db/init + set -e - geodir=\${DATA_DIR}/geo + # download files, convert to SQLite and export to CSV - mkdir -p $geodir + DATA_DIR=\${DATA_DIR:-\\"./data\\"} + + + mkdir -p \\"$DATA_DIR\\" || true + + + echo \\"-- Working in $(dirname \\"$0\\")\\" + + cd \\"$(dirname \\"$0\\")\\" || exit echo \\"-- Download datasets\\" - for d in \`seq -w 1 19\` 2A 2B \`seq 21 74\` \`seq 76 95\` 98 \\"\\"; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_$d.csv.gz + + if command -v apt-get &> /dev/null + + then + apt-get update -y + fi + + + # install sqlite3 if not exists + + if ! command -v sqlite3 &> /dev/null + + then + echo \\"sqlite3 could not be found\\" + apt-get install -y sqlite3 + fi + + # install wget if not exists + + if ! command -v wget &> /dev/null + + then + echo \\"wget could not be found\\" + apt-get install -y wget + fi + + # install unzip if not exists + + if ! command -v unzip &> /dev/null + + then + echo \\"unzip could not be found\\" + apt-get install -y unzip + fi + + + # geo siret par département + + for d in $(seq -w 1 19) 2A 2B $(seq 21 74) $(seq 76 95) 98 \\"\\"; do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_$d.csv.gz\\" done - #Cas particulier Paris + # Cas particulier Paris 75101-75120 - for d in \`seq -w 1 20\`; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_751$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_751$d.csv.gz + for d in $(seq -w 1 20); do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_751$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_751$d.csv.gz\\" done - #Cas particulier DOM + # Cas particulier DOM 971->978 - for d in \`seq -w 1 8\`; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_97$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_97$d.csv.gz + for d in $(seq -w 1 8); do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_97$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_97$d.csv.gz\\" done @@ -85,303 +135,322 @@ data: wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/insee-sirene/StockUniteLegale_utf8.zip - --directory-prefix=$DATA_DIR + --directory-prefix=\\"$DATA_DIR\\" + + unzip \\"\${DATA_DIR}/StockUniteLegale_utf8.zip\\" -d \\"\${DATA_DIR}\\" # WEEZ data wget --progress=bar:force:noscroll -q --show-progress https://www.data.gouv.fr/fr/datasets/r/a785345a-6e8c-4961-ae0a-bc00878e4f2e - -O \${DATA_DIR}/WEEZ.csv - assemble_data.py: | - \\"\\"\\"CDTN Entreprises data assembler - - This script assembles data from different places and creates a new file that - will be used as source for our search index. - - \\"\\"\\" - import argparse - import pandas as pd - import numpy as np - from os import listdir - from os.path import isfile, join - - - def read_siren(stock_unite_legale_file): - \\"\\"\\" Read SIREN Stock Unite Legale - - Parameters - ---------- - stock_unite_legale_file: str - The location of the CSV or ZIP file - - Returns - ------- - employeurs - a Pandas dataframe containing the list of all companies that are still open - and employ people - \\"\\"\\" - trancheEffectifsUniteLegale = \\"trancheEffectifsUniteLegale\\" - categorieJuridiqueUniteLegale = \\"categorieJuridiqueUniteLegale\\" - nomenclatureActivitePrincipaleUniteLegale = \\"nomenclatureActivitePrincipaleUniteLegale\\" - categorieEntreprise = \\"categorieEntreprise\\" - activitePrincipaleUniteLegale = \\"activitePrincipaleUniteLegale\\" - - selection = [\\"siren\\", \\"sigleUniteLegale\\", \\"nomUniteLegale\\", \\"nomUsageUniteLegale\\", - 'denominationUniteLegale', \\"denominationUsuelle1UniteLegale\\", \\"denominationUsuelle2UniteLegale\\", - \\"denominationUsuelle3UniteLegale\\", activitePrincipaleUniteLegale, - trancheEffectifsUniteLegale, categorieJuridiqueUniteLegale, - nomenclatureActivitePrincipaleUniteLegale, categorieEntreprise] - - etatAdmin = \\"etatAdministratifUniteLegale\\" - caractereEmployeur = \\"caractereEmployeurUniteLegale\\" - - # we only select columns in use and convert to categorical dtype - # in order to decrease the dataframe memory footprint - cols = selection + [etatAdmin, caractereEmployeur] - raw = pd.read_csv(stock_unite_legale_file, usecols=cols, - dtype={ \\"siren\\": np.dtype(str), etatAdmin: \\"category\\", caractereEmployeur: \\"category\\", - trancheEffectifsUniteLegale: \\"category\\", - categorieJuridiqueUniteLegale: \\"category\\", - nomenclatureActivitePrincipaleUniteLegale: \\"category\\", - activitePrincipaleUniteLegale: \\"category\\", - categorieEntreprise: \\"category\\"}, ) - - is_ouvert = raw[etatAdmin] == \\"A\\" - is_employeur = raw[caractereEmployeur] == \\"O\\" - is_admin = raw[etatAdmin] == \\"A\\" - - employeurs = raw[is_ouvert & is_employeur & is_admin] - - return employeurs[selection] - - - def read_geo(geo_directory): - \\"\\"\\" Read GEO data - - Parameters - ---------- - geo_directory: str - The directory containing geo data for all regions - - Returns - ------- - all_geo - a Pandas dataframe containing geo information for all open companies - \\"\\"\\" - geo_files = [f for f in listdir( - geo_directory) if isfile(join(geo_directory, f))] - geo_selection = [\\"enseigne1Etablissement\\", \\"enseigne2Etablissement\\", \\"enseigne3Etablissement\\", \\"denominationUsuelleEtablissement\\", \\"activitePrincipaleEtablissement\\", - 'siren', 'siret', 'codePostalEtablissement', 'libelleCommuneEtablissement', \\"etatAdministratifEtablissement\\", \\"geo_adresse\\"] - geo = {} - for file in geo_files: - geo[file] = pd.read_csv( - geo_directory + file, dtype={\\"codePostalEtablissement\\": np.dtype(str), - \\"etatAdministratifEtablissement\\": \\"category\\", - \\"activitePrincipaleEtablissement\\": \\"category\\", - \\"siret\\": np.dtype(str), - \\"siren\\": np.dtype(str), - }, usecols=geo_selection - ) - - all_geo = pd.concat(geo.values(), ignore_index=True).dropna( - subset=['siret']) - - all_geo = all_geo.astype(dtype={\\"codePostalEtablissement\\": np.dtype(str), - \\"etatAdministratifEtablissement\\": \\"category\\", - \\"activitePrincipaleEtablissement\\": \\"category\\", - \\"siret\\": np.dtype(str), - \\"siren\\": np.dtype(str), - }) - - all_geo = all_geo[all_geo[\\"etatAdministratifEtablissement\\"] == \\"A\\"] - - return all_geo - - - def read_idcc(idcc_file): - \\"\\"\\" Read IDCC data - - Parameters - ---------- - idcc_file: str - The location of the CSV file containing associations between companies and their \\"convention collectives\\", (aka WEEZ) - - Returns - ------- - idccs - a Pandas dataframe containing siret / idcc associations - \\"\\"\\" - idccs = pd.read_csv(idcc_file, dtype={\\"SIRET\\": np.dtype(str)}, usecols=[\\"SIRET\\", \\"IDCC\\"]).rename( - columns={\\"SIRET\\": \\"siret\\", \\"IDCC\\": \\"idcc\\"}) - - return idccs - - - def assemble(siren, geo, idcc, output): - sirenGeo = pd.merge(siren, geo, on='siren') - merged = pd.merge(sirenGeo, idcc, how='left', on='siret') - - # add etablissement counts - etsCounts = merged.siren.value_counts().rename_axis( - 'siren').reset_index(name='etablissements') - withEts = pd.merge(merged, etsCounts, on='siren') - - # persits as CSV file - withEts.astype({'idcc': 'Int64'}).to_csv(output) - - - def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - 'siren_file', - type=str, - help=\\"Location of the StockUniteLegale CSV or ZIP file\\" - ) - parser.add_argument( - 'geo_directory', - type=str, - help=\\"Location of the directory containing all the Geo CSV files\\" - ) - parser.add_argument( - 'idcc_file', - type=str, - help=\\"Location of the siret/idcc CSV file (aka WEEZ)\\" - ) - parser.add_argument( - 'output_file', - type=str, - help=\\"Location of the output file\\" - ) - - args = parser.parse_args() - - print(\\"Read SIREN data\\") - siren = read_siren(args.siren_file) - - print(\\"Read GEO data\\") - geo = read_geo(args.geo_directory) - - print(\\"Read IDCC data\\") - idcc = read_idcc(args.idcc_file) - - print(\\"Assemble datasets\\") - assemble(siren, geo, idcc, args.output_file) - - - if __name__ == \\"__main__\\": - main() - requirements.txt: | - numpy - pandas + -O \\"\${DATA_DIR}/WEEZ.csv\\" + + + echo \\"-- Import CSV datasets to sqlite\\" + + + sqlite3 -echo \\"\${DATA_DIR}/db.sqlite\\" \\".read import.sql\\" + + + echo \\"-- Export sqlite data to \${DATA_DIR}/assembly.csv\\" + + + sqlite3 -header -csv \\"\${DATA_DIR}/db.sqlite\\" \\".read export.sql\\" > + \\"\${DATA_DIR}/assembly.csv\\" + import.sql: |- + --- sqlite3 -echo db.sqlite \\".read import.sql\\" + --- + --- import local CSVs to sqlite database + --- + + PRAGMA synchronous = OFF; + + DROP TABLE IF EXISTS weez; + DROP TABLE IF EXISTS geo_siret; + DROP TABLE IF EXISTS stock; + + .mode csv + .import data/WEEZ.csv weez + .import data/StockUniteLegale_utf8.csv stock + .import data/geo_siret_.csv geo_siret + .import data/geo_siret_01.csv geo_siret + .import data/geo_siret_02.csv geo_siret + .import data/geo_siret_03.csv geo_siret + .import data/geo_siret_04.csv geo_siret + .import data/geo_siret_05.csv geo_siret + .import data/geo_siret_06.csv geo_siret + .import data/geo_siret_07.csv geo_siret + .import data/geo_siret_08.csv geo_siret + .import data/geo_siret_09.csv geo_siret + .import data/geo_siret_10.csv geo_siret + .import data/geo_siret_11.csv geo_siret + .import data/geo_siret_12.csv geo_siret + .import data/geo_siret_13.csv geo_siret + .import data/geo_siret_14.csv geo_siret + .import data/geo_siret_15.csv geo_siret + .import data/geo_siret_16.csv geo_siret + .import data/geo_siret_17.csv geo_siret + .import data/geo_siret_18.csv geo_siret + .import data/geo_siret_19.csv geo_siret + .import data/geo_siret_21.csv geo_siret + .import data/geo_siret_22.csv geo_siret + .import data/geo_siret_23.csv geo_siret + .import data/geo_siret_24.csv geo_siret + .import data/geo_siret_25.csv geo_siret + .import data/geo_siret_26.csv geo_siret + .import data/geo_siret_27.csv geo_siret + .import data/geo_siret_28.csv geo_siret + .import data/geo_siret_29.csv geo_siret + .import data/geo_siret_2A.csv geo_siret + .import data/geo_siret_2B.csv geo_siret + .import data/geo_siret_30.csv geo_siret + .import data/geo_siret_31.csv geo_siret + .import data/geo_siret_32.csv geo_siret + .import data/geo_siret_33.csv geo_siret + .import data/geo_siret_34.csv geo_siret + .import data/geo_siret_35.csv geo_siret + .import data/geo_siret_36.csv geo_siret + .import data/geo_siret_37.csv geo_siret + .import data/geo_siret_38.csv geo_siret + .import data/geo_siret_39.csv geo_siret + .import data/geo_siret_40.csv geo_siret + .import data/geo_siret_41.csv geo_siret + .import data/geo_siret_42.csv geo_siret + .import data/geo_siret_43.csv geo_siret + .import data/geo_siret_44.csv geo_siret + .import data/geo_siret_45.csv geo_siret + .import data/geo_siret_46.csv geo_siret + .import data/geo_siret_47.csv geo_siret + .import data/geo_siret_48.csv geo_siret + .import data/geo_siret_49.csv geo_siret + .import data/geo_siret_50.csv geo_siret + .import data/geo_siret_51.csv geo_siret + .import data/geo_siret_52.csv geo_siret + .import data/geo_siret_53.csv geo_siret + .import data/geo_siret_54.csv geo_siret + .import data/geo_siret_55.csv geo_siret + .import data/geo_siret_56.csv geo_siret + .import data/geo_siret_57.csv geo_siret + .import data/geo_siret_58.csv geo_siret + .import data/geo_siret_59.csv geo_siret + .import data/geo_siret_60.csv geo_siret + .import data/geo_siret_61.csv geo_siret + .import data/geo_siret_62.csv geo_siret + .import data/geo_siret_63.csv geo_siret + .import data/geo_siret_64.csv geo_siret + .import data/geo_siret_65.csv geo_siret + .import data/geo_siret_66.csv geo_siret + .import data/geo_siret_67.csv geo_siret + .import data/geo_siret_68.csv geo_siret + .import data/geo_siret_69.csv geo_siret + .import data/geo_siret_70.csv geo_siret + .import data/geo_siret_71.csv geo_siret + .import data/geo_siret_72.csv geo_siret + .import data/geo_siret_73.csv geo_siret + .import data/geo_siret_74.csv geo_siret + .import data/geo_siret_75101.csv geo_siret + .import data/geo_siret_75102.csv geo_siret + .import data/geo_siret_75103.csv geo_siret + .import data/geo_siret_75104.csv geo_siret + .import data/geo_siret_75105.csv geo_siret + .import data/geo_siret_75106.csv geo_siret + .import data/geo_siret_75107.csv geo_siret + .import data/geo_siret_75108.csv geo_siret + .import data/geo_siret_75109.csv geo_siret + .import data/geo_siret_75110.csv geo_siret + .import data/geo_siret_75111.csv geo_siret + .import data/geo_siret_75112.csv geo_siret + .import data/geo_siret_75113.csv geo_siret + .import data/geo_siret_75114.csv geo_siret + .import data/geo_siret_75115.csv geo_siret + .import data/geo_siret_75116.csv geo_siret + .import data/geo_siret_75117.csv geo_siret + .import data/geo_siret_75118.csv geo_siret + .import data/geo_siret_75119.csv geo_siret + .import data/geo_siret_75120.csv geo_siret + .import data/geo_siret_76.csv geo_siret + .import data/geo_siret_77.csv geo_siret + .import data/geo_siret_78.csv geo_siret + .import data/geo_siret_79.csv geo_siret + .import data/geo_siret_80.csv geo_siret + .import data/geo_siret_81.csv geo_siret + .import data/geo_siret_82.csv geo_siret + .import data/geo_siret_83.csv geo_siret + .import data/geo_siret_84.csv geo_siret + .import data/geo_siret_85.csv geo_siret + .import data/geo_siret_86.csv geo_siret + .import data/geo_siret_87.csv geo_siret + .import data/geo_siret_88.csv geo_siret + .import data/geo_siret_89.csv geo_siret + .import data/geo_siret_90.csv geo_siret + .import data/geo_siret_91.csv geo_siret + .import data/geo_siret_92.csv geo_siret + .import data/geo_siret_93.csv geo_siret + .import data/geo_siret_94.csv geo_siret + .import data/geo_siret_95.csv geo_siret + .import data/geo_siret_971.csv geo_siret + .import data/geo_siret_972.csv geo_siret + .import data/geo_siret_973.csv geo_siret + .import data/geo_siret_974.csv geo_siret + .import data/geo_siret_975.csv geo_siret + .import data/geo_siret_976.csv geo_siret + .import data/geo_siret_977.csv geo_siret + .import data/geo_siret_978.csv geo_siret + .import data/geo_siret_98.csv geo_siret + + CREATE INDEX 'geo_siret_idx' ON 'geo_siret' ('siret'); + CREATE INDEX 'geo_siren_idx' ON 'geo_siret' ('siren'); + CREATE INDEX 'weez_siret_idx' ON 'weez' ('SIRET'); + CREATE INDEX 'stock_siren_idx' ON 'stock' ('siren'); + + SELECT \\"weez\\", count(*) from weez; + SELECT \\"stock\\", count(*) from stock; + SELECT \\"geo_siret\\", count(*) from geo_siret; + export.sql: |+ + --- Output index data + --- sqlite3 -header -csv db.sqlite \\".read export.sql\\" > output.csv + + PRAGMA synchronous = OFF; + + SELECT + stock.siren, + stock.sigleUniteLegale, + stock.nomUniteLegale, + stock.prenom1UniteLegale, + stock.nomUsageUniteLegale, + stock.denominationUniteLegale, + stock.denominationUsuelle1UniteLegale, + stock.denominationUsuelle2UniteLegale, + stock.denominationUsuelle3UniteLegale, + stock.activitePrincipaleUniteLegale, + stock.trancheEffectifsUniteLegale, + stock.categorieJuridiqueUniteLegale, + stock.nomenclatureActivitePrincipaleUniteLegale, + stock.categorieEntreprise, + stock.etatAdministratifUniteLegale, + stock.caractereEmployeurUniteLegale, + geo_siret.siret, + geo_siret.codePostalEtablissement, + geo_siret.libelleCommuneEtablissement, + geo_siret.etatAdministratifEtablissement, + geo_siret.enseigne1Etablissement, + geo_siret.enseigne2Etablissement, + geo_siret.enseigne3Etablissement, + geo_siret.denominationUsuelleEtablissement, + geo_siret.activitePrincipaleEtablissement, + geo_siret.geo_adresse, + geo_siret.complementAdresseEtablissement, + geo_siret.numeroVoieEtablissement, + geo_siret.indiceRepetitionEtablissement, + geo_siret.typeVoieEtablissement, + geo_siret.libelleVoieEtablissement, + weez.IDCC as idcc, + (select count(*) FROM geo_siret where siren=stock.siren) etablissements + from stock, geo_siret + left join weez on weez.SIRET=geo_siret.siret + where stock.siren=geo_siret.siren; + + metadata: - name: config-map-files-0123456 + name: config-map-files-0123456789abcdefghijklmnopqrstuvwxyz0123 annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod --- apiVersion: batch/v1 kind: Job metadata: - name: update-index-0123456 + name: update-index-0123456789abcdefghijklmnopqrstuvwxyz0123 annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod spec: backoffLimit: 3 template: spec: containers: - name: update-index - image: >- - harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-index:1.2.3 + image: ghcr.io/socialgouv/recherche-entreprises/index:1.2.3 volumeMounts: - name: data mountPath: /data env: - name: ASSEMBLY_FILE value: /data/assembly.csv + - name: ELASTICSEARCH_INDEX_NAME + value: search-entreprises envFrom: - secretRef: name: elastic-recherche-entreprises-write resources: limits: - cpu: '2' - memory: 18Gi + cpu: '4' + memory: 5Gi requests: - cpu: '1' - memory: 14Gi + cpu: '2' + memory: 2Gi restartPolicy: Never volumes: - name: data emptyDir: {} - configMap: - name: config-map-files-0123456 + name: config-map-files-0123456789abcdefghijklmnopqrstuvwxyz0123 defaultMode: 511 name: local-files initContainers: - args: - - '-c' - - > - - apt-get update -y && apt-get install -y wget - - - export DATA_DIR=\\"/data\\" - - - cd /data - - - echo \\"running get-data.sh...\\" - - - /mnt/scripts/get-data.sh - - - pip3 install -r /mnt/scripts/requirements.txt - - - echo \\"running assemble_data.py...\\" - - - python3 /mnt/scripts/assemble_data.py - $DATA_DIR/StockUniteLegale_utf8.zip $DATA_DIR/geo/ - $DATA_DIR/WEEZ.csv $DATA_DIR/assembly.csv + - /mnt/scripts/sqlite.sh command: - sh - image: python:3.9.4 + image: ubuntu:18.04 imagePullPolicy: Always - name: download-data + name: download-build-data + env: + - name: DATA_DIR + value: /mnt/scripts/data + resources: + limits: + cpu: '4' + memory: 2Gi + requests: + cpu: '2' + memory: 1Gi volumeMounts: - name: data - mountPath: /data + mountPath: /mnt/scripts/data - mountPath: /mnt/scripts name: local-files metadata: annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard diff --git a/.k8s/__tests__/__snapshots__/indexing-prod.ts.snap b/.k8s/__tests__/__snapshots__/indexing-prod.ts.snap index db42ef11..b1ce71bb 100644 --- a/.k8s/__tests__/__snapshots__/indexing-prod.ts.snap +++ b/.k8s/__tests__/__snapshots__/indexing-prod.ts.snap @@ -7,32 +7,36 @@ kind: SealedSecret metadata: name: elastic-recherche-entreprises-write annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod spec: encryptedData: ELASTICSEARCH_URL: >- AgA0evdMnmx3uImqSzGIHorhP2zdv0hFREYZt0FLKi9Eg389OmU1f8CKgpOJp5LPedSAghc8HEd//YeqYRvhQZhfclkw15FZJX7xxz3H75wYJMuNxnLPz8cEyruuA2NrqVoCNuf8p06aI1hfhjNlIKnPjewR14hK5tdKVmsRc1bfPOygUhslbp+aDCbyCFAVFamhaHYHYKdBKZ4B3V+pgLfQvJl2Xfnm3ChXO9Y7ptnI7IJjyzpResdQwasAppc6onvOrGAdms4wW0TZQOsrRV/3JwFHwbqwiX8DzdwkvLlK37HwV7XddiHOQsa3Z+ONI5y9Uhbw2+3ynMi0h8dVOxHmjeq4iPVBqSTWgRfqomz7MR+MSQfNwaE/D20qlt1fzrFvNezc07XxwYxogUS9QFVe8vgXbeoOFmgUA25pKUsOVgjs+ulA5uH7fMv9JhF5H93250FE+/VcHS1wtUAZw67w9GndnpEgs/kmqR9juD4qVEslrEb36OmsZi4hkgNKh8kK2GybTTaTw4I5xSuWu0mpQqZR3i5M8biT/B0YStW1EkajX02obUyQKSJk8VjLwXIFQ726kse5sMh8Ste+z1lKEIfq6jOKZZ7sUiSTwuLuGTmEqy5mdcmkyeCgp4paYOyzKHhYDNXaoZkKguZP9DuOY+44g4uch9Uj3hGvZb+4dV41FGIJ3PB0YZbbxgHF3UA6iHGkZcITwxhn4lJADU7yJyQcrziDXccKa1yiGe9BOOjGn2+WOLF0lrLsA6Uxk70sEiwZygYJQCJGN6KsHPz7Resz6aF/YTY3K+m65VeI4g== ELASTICSEARCH_API_KEY: >- - AgAw0rLyRSJJHgwBKDwtTfrBJtMTZ2jaQ07AZArXOsCazbaGOCfLtdtoB3tmlZw+fPYkLpCcWEDypjjt2qvoaMTQJp/LZcyXG69ccPLTj5wDzTSiYsIIewBNN6d0A71lUdbtAFgdyFPrxjYIDuj9SVtQqRjv6Fd/gmEnAsGh2szYRfeRIse8zqI/ICWuBpfidZ7lLDzJbTnCK4AYkan2zWqk4Xp5X8owyAaKc3gkG3CKDiR0Re+cAdfpQ2VJ7KX22vnn8jtu98i98vwGtG2iJngX7oLWXgYxR+ptkKdSt54lwzRCCfRgEI32Rl0JNHcY8QpYq+SVlxpMpNNLk5rp7NscR6AeU+YVQH0R+ZidEPC3Ys/ELIZ2wLd/++DKVRFQfgSoC1c0DkBy+u49Ye/5eV0m8p0FW+ch12ylVynnWvbIfhYsRQx2c9xsI7KN3ysUcktWjDech2qI4zpkTUyV+KhWqVpO2LjbJoD6TFOVgd/CoFGeNB7Yjds0ccerXk1z8lirdm2irSJ+B1szZBzGCVXGwqnPu/KNebaPdwk+Dy4MH3fFe9ktwHcSJuiwBsnx38FuFHI0qWDTk/StOzprYD16WoAihB+LCVrOvJEJTnAWf5lCs2QStRiD5gFAHMtdW/Z8Q29QgXI8s+19YZ3Wz6JmJYgUKayYR/7qZqV5eD5aVQR/BphmZiH2SJh4zJ5lFk3r6a02/OwlwqIVfCYFmm9thFHsbJr5A3698128+KRrB/WLTKV1AvmPWWcDoj7HbFi+tYobtwNQpCQWw28= + AgABUhcCHYMldKg9JPfijTDeiF5Fdo0h7NIHCv5/B2Jwnyb1LciNXJ4WmI+7jVweUhx2k0LZzIpyG7VRVMRzliDqcDH4CuNdjFmsZ/KKD1re88AVArYNblasISTx4JxeomlG903SVJgIHjdZDLHsiUYnB+BeM11UBUGlVhaCH0+mV+1alXK+xkxLbaFIh3853onWvbZTvPkc74qFCKfqJX5J9cxLrwX+qq48EwZZIspaR8LtlcZfSem1NSavHyLUxBXP8IEF2o4stt3haKbq+kO+2dPr8/o141KyNzabbbP82wXikuDk3yRqX4j0yAZhfT+X4Sk8kJKH947CV2bHmkr0ySy9+qTiQqwwHthEKWxmCVsQocX13tGC+v9hYzjor9Eu1OFeDa+pzJ21tC5gMRIVUgsTJ4+mDqiJqPgHAB/rPMSB+JaK2DZchVR2MK4mDYcjiNOrzUNG1sRiKrgikIiBIHKBgyoWlJVkqNWzUHBDl0hCp8uEkOWcmBymNvI37HqFPxfENvIeu6vXZ2K+R51bkbEYYfcvrrFmeCh+bdpsKALhmGbtVG0NeKIHR8uicixu/ewi95LVE8hwkYeAZerlBNSF4HRs2ZvNwtqnzQtiv5Dh1aVfQe8AaXojuS22JqgE+eiZhzw225ovZZ/OYv81odymXdBhovF+uurhF1jxczPc0DWdAKagw0mZYFfC7ipBILEitnp7vsrIuZZoKoZHjRhgLikhu74USV7RrutZpPQKqgm5+RWGu1v8HZzK4eDrH3JHi9TjKbcRG50= template: metadata: annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' name: elastic-recherche-entreprises-write labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard @@ -41,43 +45,89 @@ spec: apiVersion: v1 kind: ConfigMap data: - get-data.sh: > + sqlite.sh: > #!/bin/bash - # borrowed from annuaire entreprise : + # exit when any command fails - # https://github.com/etalab/api-annuaire-entreprises/tree/master/db/init + set -e - geodir=\${DATA_DIR}/geo + # download files, convert to SQLite and export to CSV - mkdir -p $geodir + DATA_DIR=\${DATA_DIR:-\\"./data\\"} + + + mkdir -p \\"$DATA_DIR\\" || true + + + echo \\"-- Working in $(dirname \\"$0\\")\\" + + cd \\"$(dirname \\"$0\\")\\" || exit echo \\"-- Download datasets\\" - for d in \`seq -w 1 19\` 2A 2B \`seq 21 74\` \`seq 76 95\` 98 \\"\\"; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_$d.csv.gz + + if command -v apt-get &> /dev/null + + then + apt-get update -y + fi + + + # install sqlite3 if not exists + + if ! command -v sqlite3 &> /dev/null + + then + echo \\"sqlite3 could not be found\\" + apt-get install -y sqlite3 + fi + + # install wget if not exists + + if ! command -v wget &> /dev/null + + then + echo \\"wget could not be found\\" + apt-get install -y wget + fi + + # install unzip if not exists + + if ! command -v unzip &> /dev/null + + then + echo \\"unzip could not be found\\" + apt-get install -y unzip + fi + + + # geo siret par département + + for d in $(seq -w 1 19) 2A 2B $(seq 21 74) $(seq 76 95) 98 \\"\\"; do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_$d.csv.gz\\" done - #Cas particulier Paris + # Cas particulier Paris 75101-75120 - for d in \`seq -w 1 20\`; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_751$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_751$d.csv.gz + for d in $(seq -w 1 20); do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_751$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_751$d.csv.gz\\" done - #Cas particulier DOM + # Cas particulier DOM 971->978 - for d in \`seq -w 1 8\`; do - wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_97$d.csv.gz --directory-prefix=$geodir - gunzip \${geodir}/geo_siret_97$d.csv.gz + for d in $(seq -w 1 8); do + wget --progress=bar:force:noscroll -q --show-progress \\"https://files.data.gouv.fr/geo-sirene/last/dep/geo_siret_97$d.csv.gz\\" --directory-prefix=\\"$DATA_DIR\\" + gunzip \\"\${DATA_DIR}/geo_siret_97$d.csv.gz\\" done @@ -85,303 +135,322 @@ data: wget --progress=bar:force:noscroll -q --show-progress https://files.data.gouv.fr/insee-sirene/StockUniteLegale_utf8.zip - --directory-prefix=$DATA_DIR + --directory-prefix=\\"$DATA_DIR\\" + + unzip \\"\${DATA_DIR}/StockUniteLegale_utf8.zip\\" -d \\"\${DATA_DIR}\\" # WEEZ data wget --progress=bar:force:noscroll -q --show-progress https://www.data.gouv.fr/fr/datasets/r/a785345a-6e8c-4961-ae0a-bc00878e4f2e - -O \${DATA_DIR}/WEEZ.csv - assemble_data.py: | - \\"\\"\\"CDTN Entreprises data assembler - - This script assembles data from different places and creates a new file that - will be used as source for our search index. - - \\"\\"\\" - import argparse - import pandas as pd - import numpy as np - from os import listdir - from os.path import isfile, join - - - def read_siren(stock_unite_legale_file): - \\"\\"\\" Read SIREN Stock Unite Legale - - Parameters - ---------- - stock_unite_legale_file: str - The location of the CSV or ZIP file - - Returns - ------- - employeurs - a Pandas dataframe containing the list of all companies that are still open - and employ people - \\"\\"\\" - trancheEffectifsUniteLegale = \\"trancheEffectifsUniteLegale\\" - categorieJuridiqueUniteLegale = \\"categorieJuridiqueUniteLegale\\" - nomenclatureActivitePrincipaleUniteLegale = \\"nomenclatureActivitePrincipaleUniteLegale\\" - categorieEntreprise = \\"categorieEntreprise\\" - activitePrincipaleUniteLegale = \\"activitePrincipaleUniteLegale\\" - - selection = [\\"siren\\", \\"sigleUniteLegale\\", \\"nomUniteLegale\\", \\"nomUsageUniteLegale\\", - 'denominationUniteLegale', \\"denominationUsuelle1UniteLegale\\", \\"denominationUsuelle2UniteLegale\\", - \\"denominationUsuelle3UniteLegale\\", activitePrincipaleUniteLegale, - trancheEffectifsUniteLegale, categorieJuridiqueUniteLegale, - nomenclatureActivitePrincipaleUniteLegale, categorieEntreprise] - - etatAdmin = \\"etatAdministratifUniteLegale\\" - caractereEmployeur = \\"caractereEmployeurUniteLegale\\" - - # we only select columns in use and convert to categorical dtype - # in order to decrease the dataframe memory footprint - cols = selection + [etatAdmin, caractereEmployeur] - raw = pd.read_csv(stock_unite_legale_file, usecols=cols, - dtype={ \\"siren\\": np.dtype(str), etatAdmin: \\"category\\", caractereEmployeur: \\"category\\", - trancheEffectifsUniteLegale: \\"category\\", - categorieJuridiqueUniteLegale: \\"category\\", - nomenclatureActivitePrincipaleUniteLegale: \\"category\\", - activitePrincipaleUniteLegale: \\"category\\", - categorieEntreprise: \\"category\\"}, ) - - is_ouvert = raw[etatAdmin] == \\"A\\" - is_employeur = raw[caractereEmployeur] == \\"O\\" - is_admin = raw[etatAdmin] == \\"A\\" - - employeurs = raw[is_ouvert & is_employeur & is_admin] - - return employeurs[selection] - - - def read_geo(geo_directory): - \\"\\"\\" Read GEO data - - Parameters - ---------- - geo_directory: str - The directory containing geo data for all regions - - Returns - ------- - all_geo - a Pandas dataframe containing geo information for all open companies - \\"\\"\\" - geo_files = [f for f in listdir( - geo_directory) if isfile(join(geo_directory, f))] - geo_selection = [\\"enseigne1Etablissement\\", \\"enseigne2Etablissement\\", \\"enseigne3Etablissement\\", \\"denominationUsuelleEtablissement\\", \\"activitePrincipaleEtablissement\\", - 'siren', 'siret', 'codePostalEtablissement', 'libelleCommuneEtablissement', \\"etatAdministratifEtablissement\\", \\"geo_adresse\\"] - geo = {} - for file in geo_files: - geo[file] = pd.read_csv( - geo_directory + file, dtype={\\"codePostalEtablissement\\": np.dtype(str), - \\"etatAdministratifEtablissement\\": \\"category\\", - \\"activitePrincipaleEtablissement\\": \\"category\\", - \\"siret\\": np.dtype(str), - \\"siren\\": np.dtype(str), - }, usecols=geo_selection - ) - - all_geo = pd.concat(geo.values(), ignore_index=True).dropna( - subset=['siret']) - - all_geo = all_geo.astype(dtype={\\"codePostalEtablissement\\": np.dtype(str), - \\"etatAdministratifEtablissement\\": \\"category\\", - \\"activitePrincipaleEtablissement\\": \\"category\\", - \\"siret\\": np.dtype(str), - \\"siren\\": np.dtype(str), - }) - - all_geo = all_geo[all_geo[\\"etatAdministratifEtablissement\\"] == \\"A\\"] - - return all_geo - - - def read_idcc(idcc_file): - \\"\\"\\" Read IDCC data - - Parameters - ---------- - idcc_file: str - The location of the CSV file containing associations between companies and their \\"convention collectives\\", (aka WEEZ) - - Returns - ------- - idccs - a Pandas dataframe containing siret / idcc associations - \\"\\"\\" - idccs = pd.read_csv(idcc_file, dtype={\\"SIRET\\": np.dtype(str)}, usecols=[\\"SIRET\\", \\"IDCC\\"]).rename( - columns={\\"SIRET\\": \\"siret\\", \\"IDCC\\": \\"idcc\\"}) - - return idccs - - - def assemble(siren, geo, idcc, output): - sirenGeo = pd.merge(siren, geo, on='siren') - merged = pd.merge(sirenGeo, idcc, how='left', on='siret') - - # add etablissement counts - etsCounts = merged.siren.value_counts().rename_axis( - 'siren').reset_index(name='etablissements') - withEts = pd.merge(merged, etsCounts, on='siren') - - # persits as CSV file - withEts.astype({'idcc': 'Int64'}).to_csv(output) - - - def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - 'siren_file', - type=str, - help=\\"Location of the StockUniteLegale CSV or ZIP file\\" - ) - parser.add_argument( - 'geo_directory', - type=str, - help=\\"Location of the directory containing all the Geo CSV files\\" - ) - parser.add_argument( - 'idcc_file', - type=str, - help=\\"Location of the siret/idcc CSV file (aka WEEZ)\\" - ) - parser.add_argument( - 'output_file', - type=str, - help=\\"Location of the output file\\" - ) - - args = parser.parse_args() - - print(\\"Read SIREN data\\") - siren = read_siren(args.siren_file) - - print(\\"Read GEO data\\") - geo = read_geo(args.geo_directory) - - print(\\"Read IDCC data\\") - idcc = read_idcc(args.idcc_file) - - print(\\"Assemble datasets\\") - assemble(siren, geo, idcc, args.output_file) - - - if __name__ == \\"__main__\\": - main() - requirements.txt: | - numpy - pandas + -O \\"\${DATA_DIR}/WEEZ.csv\\" + + + echo \\"-- Import CSV datasets to sqlite\\" + + + sqlite3 -echo \\"\${DATA_DIR}/db.sqlite\\" \\".read import.sql\\" + + + echo \\"-- Export sqlite data to \${DATA_DIR}/assembly.csv\\" + + + sqlite3 -header -csv \\"\${DATA_DIR}/db.sqlite\\" \\".read export.sql\\" > + \\"\${DATA_DIR}/assembly.csv\\" + import.sql: |- + --- sqlite3 -echo db.sqlite \\".read import.sql\\" + --- + --- import local CSVs to sqlite database + --- + + PRAGMA synchronous = OFF; + + DROP TABLE IF EXISTS weez; + DROP TABLE IF EXISTS geo_siret; + DROP TABLE IF EXISTS stock; + + .mode csv + .import data/WEEZ.csv weez + .import data/StockUniteLegale_utf8.csv stock + .import data/geo_siret_.csv geo_siret + .import data/geo_siret_01.csv geo_siret + .import data/geo_siret_02.csv geo_siret + .import data/geo_siret_03.csv geo_siret + .import data/geo_siret_04.csv geo_siret + .import data/geo_siret_05.csv geo_siret + .import data/geo_siret_06.csv geo_siret + .import data/geo_siret_07.csv geo_siret + .import data/geo_siret_08.csv geo_siret + .import data/geo_siret_09.csv geo_siret + .import data/geo_siret_10.csv geo_siret + .import data/geo_siret_11.csv geo_siret + .import data/geo_siret_12.csv geo_siret + .import data/geo_siret_13.csv geo_siret + .import data/geo_siret_14.csv geo_siret + .import data/geo_siret_15.csv geo_siret + .import data/geo_siret_16.csv geo_siret + .import data/geo_siret_17.csv geo_siret + .import data/geo_siret_18.csv geo_siret + .import data/geo_siret_19.csv geo_siret + .import data/geo_siret_21.csv geo_siret + .import data/geo_siret_22.csv geo_siret + .import data/geo_siret_23.csv geo_siret + .import data/geo_siret_24.csv geo_siret + .import data/geo_siret_25.csv geo_siret + .import data/geo_siret_26.csv geo_siret + .import data/geo_siret_27.csv geo_siret + .import data/geo_siret_28.csv geo_siret + .import data/geo_siret_29.csv geo_siret + .import data/geo_siret_2A.csv geo_siret + .import data/geo_siret_2B.csv geo_siret + .import data/geo_siret_30.csv geo_siret + .import data/geo_siret_31.csv geo_siret + .import data/geo_siret_32.csv geo_siret + .import data/geo_siret_33.csv geo_siret + .import data/geo_siret_34.csv geo_siret + .import data/geo_siret_35.csv geo_siret + .import data/geo_siret_36.csv geo_siret + .import data/geo_siret_37.csv geo_siret + .import data/geo_siret_38.csv geo_siret + .import data/geo_siret_39.csv geo_siret + .import data/geo_siret_40.csv geo_siret + .import data/geo_siret_41.csv geo_siret + .import data/geo_siret_42.csv geo_siret + .import data/geo_siret_43.csv geo_siret + .import data/geo_siret_44.csv geo_siret + .import data/geo_siret_45.csv geo_siret + .import data/geo_siret_46.csv geo_siret + .import data/geo_siret_47.csv geo_siret + .import data/geo_siret_48.csv geo_siret + .import data/geo_siret_49.csv geo_siret + .import data/geo_siret_50.csv geo_siret + .import data/geo_siret_51.csv geo_siret + .import data/geo_siret_52.csv geo_siret + .import data/geo_siret_53.csv geo_siret + .import data/geo_siret_54.csv geo_siret + .import data/geo_siret_55.csv geo_siret + .import data/geo_siret_56.csv geo_siret + .import data/geo_siret_57.csv geo_siret + .import data/geo_siret_58.csv geo_siret + .import data/geo_siret_59.csv geo_siret + .import data/geo_siret_60.csv geo_siret + .import data/geo_siret_61.csv geo_siret + .import data/geo_siret_62.csv geo_siret + .import data/geo_siret_63.csv geo_siret + .import data/geo_siret_64.csv geo_siret + .import data/geo_siret_65.csv geo_siret + .import data/geo_siret_66.csv geo_siret + .import data/geo_siret_67.csv geo_siret + .import data/geo_siret_68.csv geo_siret + .import data/geo_siret_69.csv geo_siret + .import data/geo_siret_70.csv geo_siret + .import data/geo_siret_71.csv geo_siret + .import data/geo_siret_72.csv geo_siret + .import data/geo_siret_73.csv geo_siret + .import data/geo_siret_74.csv geo_siret + .import data/geo_siret_75101.csv geo_siret + .import data/geo_siret_75102.csv geo_siret + .import data/geo_siret_75103.csv geo_siret + .import data/geo_siret_75104.csv geo_siret + .import data/geo_siret_75105.csv geo_siret + .import data/geo_siret_75106.csv geo_siret + .import data/geo_siret_75107.csv geo_siret + .import data/geo_siret_75108.csv geo_siret + .import data/geo_siret_75109.csv geo_siret + .import data/geo_siret_75110.csv geo_siret + .import data/geo_siret_75111.csv geo_siret + .import data/geo_siret_75112.csv geo_siret + .import data/geo_siret_75113.csv geo_siret + .import data/geo_siret_75114.csv geo_siret + .import data/geo_siret_75115.csv geo_siret + .import data/geo_siret_75116.csv geo_siret + .import data/geo_siret_75117.csv geo_siret + .import data/geo_siret_75118.csv geo_siret + .import data/geo_siret_75119.csv geo_siret + .import data/geo_siret_75120.csv geo_siret + .import data/geo_siret_76.csv geo_siret + .import data/geo_siret_77.csv geo_siret + .import data/geo_siret_78.csv geo_siret + .import data/geo_siret_79.csv geo_siret + .import data/geo_siret_80.csv geo_siret + .import data/geo_siret_81.csv geo_siret + .import data/geo_siret_82.csv geo_siret + .import data/geo_siret_83.csv geo_siret + .import data/geo_siret_84.csv geo_siret + .import data/geo_siret_85.csv geo_siret + .import data/geo_siret_86.csv geo_siret + .import data/geo_siret_87.csv geo_siret + .import data/geo_siret_88.csv geo_siret + .import data/geo_siret_89.csv geo_siret + .import data/geo_siret_90.csv geo_siret + .import data/geo_siret_91.csv geo_siret + .import data/geo_siret_92.csv geo_siret + .import data/geo_siret_93.csv geo_siret + .import data/geo_siret_94.csv geo_siret + .import data/geo_siret_95.csv geo_siret + .import data/geo_siret_971.csv geo_siret + .import data/geo_siret_972.csv geo_siret + .import data/geo_siret_973.csv geo_siret + .import data/geo_siret_974.csv geo_siret + .import data/geo_siret_975.csv geo_siret + .import data/geo_siret_976.csv geo_siret + .import data/geo_siret_977.csv geo_siret + .import data/geo_siret_978.csv geo_siret + .import data/geo_siret_98.csv geo_siret + + CREATE INDEX 'geo_siret_idx' ON 'geo_siret' ('siret'); + CREATE INDEX 'geo_siren_idx' ON 'geo_siret' ('siren'); + CREATE INDEX 'weez_siret_idx' ON 'weez' ('SIRET'); + CREATE INDEX 'stock_siren_idx' ON 'stock' ('siren'); + + SELECT \\"weez\\", count(*) from weez; + SELECT \\"stock\\", count(*) from stock; + SELECT \\"geo_siret\\", count(*) from geo_siret; + export.sql: |+ + --- Output index data + --- sqlite3 -header -csv db.sqlite \\".read export.sql\\" > output.csv + + PRAGMA synchronous = OFF; + + SELECT + stock.siren, + stock.sigleUniteLegale, + stock.nomUniteLegale, + stock.prenom1UniteLegale, + stock.nomUsageUniteLegale, + stock.denominationUniteLegale, + stock.denominationUsuelle1UniteLegale, + stock.denominationUsuelle2UniteLegale, + stock.denominationUsuelle3UniteLegale, + stock.activitePrincipaleUniteLegale, + stock.trancheEffectifsUniteLegale, + stock.categorieJuridiqueUniteLegale, + stock.nomenclatureActivitePrincipaleUniteLegale, + stock.categorieEntreprise, + stock.etatAdministratifUniteLegale, + stock.caractereEmployeurUniteLegale, + geo_siret.siret, + geo_siret.codePostalEtablissement, + geo_siret.libelleCommuneEtablissement, + geo_siret.etatAdministratifEtablissement, + geo_siret.enseigne1Etablissement, + geo_siret.enseigne2Etablissement, + geo_siret.enseigne3Etablissement, + geo_siret.denominationUsuelleEtablissement, + geo_siret.activitePrincipaleEtablissement, + geo_siret.geo_adresse, + geo_siret.complementAdresseEtablissement, + geo_siret.numeroVoieEtablissement, + geo_siret.indiceRepetitionEtablissement, + geo_siret.typeVoieEtablissement, + geo_siret.libelleVoieEtablissement, + weez.IDCC as idcc, + (select count(*) FROM geo_siret where siren=stock.siren) etablissements + from stock, geo_siret + left join weez on weez.SIRET=geo_siret.siret + where stock.siren=geo_siret.siren; + + metadata: - name: config-map-files-0123456 + name: config-map-files-0123456789abcdefghijklmnopqrstuvwxyz0123 annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod --- apiVersion: batch/v1 kind: Job metadata: - name: update-index-0123456 + name: update-index-0123456789abcdefghijklmnopqrstuvwxyz0123 annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod spec: backoffLimit: 3 template: spec: containers: - name: update-index - image: >- - harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-index:1.2.3 + image: ghcr.io/socialgouv/recherche-entreprises/index:1.2.3 volumeMounts: - name: data mountPath: /data env: - name: ASSEMBLY_FILE value: /data/assembly.csv + - name: ELASTICSEARCH_INDEX_NAME + value: search-entreprises envFrom: - secretRef: name: elastic-recherche-entreprises-write resources: limits: - cpu: '2' - memory: 18Gi + cpu: '4' + memory: 5Gi requests: - cpu: '1' - memory: 14Gi + cpu: '2' + memory: 2Gi restartPolicy: Never volumes: - name: data emptyDir: {} - configMap: - name: config-map-files-0123456 + name: config-map-files-0123456789abcdefghijklmnopqrstuvwxyz0123 defaultMode: 511 name: local-files initContainers: - args: - - '-c' - - > - - apt-get update -y && apt-get install -y wget - - - export DATA_DIR=\\"/data\\" - - - cd /data - - - echo \\"running get-data.sh...\\" - - - /mnt/scripts/get-data.sh - - - pip3 install -r /mnt/scripts/requirements.txt - - - echo \\"running assemble_data.py...\\" - - - python3 /mnt/scripts/assemble_data.py - $DATA_DIR/StockUniteLegale_utf8.zip $DATA_DIR/geo/ - $DATA_DIR/WEEZ.csv $DATA_DIR/assembly.csv + - /mnt/scripts/sqlite.sh command: - sh - image: python:3.9.4 + image: ubuntu:18.04 imagePullPolicy: Always - name: download-data + name: download-build-data + env: + - name: DATA_DIR + value: /mnt/scripts/data + resources: + limits: + cpu: '4' + memory: 2Gi + requests: + cpu: '2' + memory: 1Gi volumeMounts: - name: data - mountPath: /data + mountPath: /mnt/scripts/data - mountPath: /mnt/scripts name: local-files metadata: annotations: - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard diff --git a/.k8s/__tests__/__snapshots__/preprod.ts.snap b/.k8s/__tests__/__snapshots__/preprod.ts.snap index 1439ae60..2797cce6 100644 --- a/.k8s/__tests__/__snapshots__/preprod.ts.snap +++ b/.k8s/__tests__/__snapshots__/preprod.ts.snap @@ -9,20 +9,21 @@ metadata: socialgouv/creator: autodevops field.cattle.io/creatorId: gitlab field.cattle.io/projectId: c-bd7z2:p-7ms8p - git/branch: v1.2.3 - git/remote: >- - https://gitlab-ci-token:[MASKED]@gitlab.factory.social.gouv.fr/SocialGouv/recherche-entreprises.git - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + git/branch: refs/tags/v1.2.3 + git/remote: socialgouv/recherche-entreprises + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: azure-pg-admin-user: recherche-entreprises - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - name: recherche-entreprises-85-preprod-dev42 + name: recherche-entreprises-preprod --- apiVersion: apps/v1 kind: Deployment @@ -30,18 +31,20 @@ metadata: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard name: recherche-entreprises-api - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod spec: replicas: 1 selector: @@ -52,19 +55,21 @@ spec: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard spec: containers: - - image: harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-api:1.2.3 + - image: harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-api:1.5.8 livenessProbe: failureThreshold: 6 httpGet: @@ -111,35 +116,39 @@ metadata: sealedsecrets.bitnami.com/cluster-wide: 'true' kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod spec: encryptedData: ELASTICSEARCH_URL: >- - AgDGWiSE5kUv0Mv4Dot8KfbSGm5XVlyi4V9yLiMI97deZctFWRd2T31MtTUz/XgT4VlFH4NUhK+dQonLEILE2P9rs4MMDaY09cc3NZiDLB9GJoBcWnWsa02QV4Xykk8UthTHQPFC4g+0ElFU4UewbIc3Zc2ZqIk5Y803TnYkRuAF1bM1ZdpWGnm+yh7+lXnbRMmFo62vhPVB8lnl9Z/RVxhs1jx6xnbIFyTjvelM4/sySiyHZ0h74fL61DAvOcNBgCTd49RRBBzcAhp6a8KL45FL1eAYyOqS8vW5uNeFu59Cu9ByRruhyTELjpPtLesjsBUHiqmfBh09esKw+mdtMmfYxIsy+JGPS50N8GiNj+n0A5XXtLQkm9oT884r4LEQip35YHyenh20NmueHX8RGMOXRT05PWnbL70NtZHqUwzFdc21vdFhXAb/MVaMgqcKG4RPkRS+qljn+pQjJ8DkY0EqgWOCfSyjB+jKJ65peSqqORq+B8zTDgpv9ZaN+xSDsfi+PckPPJqKOjaTkQW/VdfwpT6ipOUpNgznUOGy7XN9vxAQR9P79n6KlIyMsme4hN7OHX5ko0xWQZcCyfHbSbT7JHY5UpmTv+c0NtyrV755FQN75HDFRhQYHtgLnhR9PuBCZ8Gvtg2T/Nr63GsTMC5e52bj2zIRlOUTrsvxEIYKQoBPILoueEIcMz64/UiMAZ9RwH308wRHOrZtch5gclzUQWZEgI5Vx2cJQ/zr9iqBaedg2v8GqvESeKw1e7pgXoLE6pFKJtQa7cMwQgmrBkkxrEfePcRDP4yAMVQaiM8DoqIjVg== + AgCxK1MqqVkCOHvMjjcieUsj1uE/ATUzAfYTYVEMqcWFxTnugOaYlXB2mTuZtzdjXNvINREzDynnVRBghPIJvrJm59vPgexC5FyQptdPf2XwQAG6n0yMLWAEBM+O8ieO1D6vBu6BYuq4QYoItq+SMTE6APNX+p6wi4/LP04cR8p8bVKNxckPMfyVfQGhUSTD+2gC5ow2x1Mx67odqqP3E8eK9tm5tU6ltzOpKqJKITUQIqTQbSKFr8p77DE4IrdbPn8MpvdP+VrT+ztF4thPkpVpuYWy9rABdQKeEuNdN2AuvmPeJs/2B80iApIgtWWQjV/iG+WlLGMohvWDZVsBGuBuYGzhjJlr5w8OljwKDihb2cG1M/Ot/LM1xRuCC85wvqH0oMhnnP/3DZts5aYAN+qjAezOkPT9kjT+gu0WoQB+E5r9ITcYYHYbJw/YSxS2z86Ul0amRRW4ovAWT5c/nFDZU6i74I8YyVh8oOtrieZgNF4ajJcIr5YpjT2LICKoZR0KExrQ4V30zZJ+wZcvd8W3e/KCmKsFxloTkLq/sjEiFQoe6ffYTQhSUXZynqrdUKZsdoSleprfctlJtf3rzxt5hZIpcLulXrYk7pPPiHkTcV3Zc4zkN3BFE1A9vsgTvh6xTXlvUekJHHMXU3t8ZyZD8vvemxv0FE8G33XWQF2DEkmyc3gXMlfPDLho4fHv0h0iId0c9klFkOpmp8REFWJyuRQjq+hOHvfoCSu1ymsCwA0CMAguJoX1XEhn0aA9QIgf0VyBe/cVJwu/GfW74UBi6x5feqzFovYIUXbDP6kW ELASTICSEARCH_API_KEY: >- - AgCXfkWdcNqxUUWf9iPMYZXx2a1hU/NTseXJ3EHDYU2+9i4EB5dKGSALKprr1OmAnt04YJ0K5ALDWfHz+U8lN5wmlyZzfn1lbhccFByyCBM+k0WvKCieuMDCgRh8bkV+bSN7kYVq7ARzL8EzzUyvr7kzfoPwB3kwUSR+9yywSsr/OnC1cpS2+vAH+iJxa/NuLzPboxuOuMiKC2EvnO1GzoB82Sv36eXwrbfDDXilAXPm/zPUee12xXlf63b1ieDaOSZd75w7i+DrkALccdmMsCR11Rms93/cU1B8xsGC2L6R3n2AEaHI8vK9FsMzfHUdIbAKceoRKJW3CTNShc8imjeInj6B7t/KYTOc41oSZ2OVls6R6wCeaGtrxvQKfUR/Pbof+mTw7rBL1RMSdLtux2ZkdtHbY8k3kBqJq5jZ/8u6WLdtSuOcbiwB16dA+xpBp3LjTEtVR+8+KM+kW1xUuKIe+GV6RbfyDf3X0W5eSSIEUUgVblD0YXjbTFBT/oJBPlKvFmrPFCxwqsYDSv64+8/TeJrZWzX2XHi61Elye0iLrCVnH3PjiHrzm2URaRuu+VoaFi3TpYWGsgow29+IX4m2+z1NqOk/BMYUDCL9Gb6RzJwkNx21JFtN8eTgZF6w/FS/rBoK4/Ny75HE6L3D8L49upB7VEv328pk+PjHI68+AmzYTwTgUAA7HjvD/Y/mA6rrO2LiLljQTvqwZAIC8W1g0Eu2Ir/9sIatVp2GtxbQRvIekSD8WagNCY7ntzKHcQbnZ/ivEG74uoWiiGc= + AgCtqcSVac6ptXOzSGqrBps1Uz6elTOnheaAf8cqAb1G960NCqmmkx1L1FDhfSKCuhBUAp0mT/3rokX3cg/pLIqImIVlPEhq/n+ZaKPHqGt0UT9QpD3r1C4nYan/WJ1PnBlCTXYlA/dtmCakNjtf2ikvWg/6Keu1OGlHt/ymVc2guxqGl++caxPiEIQAiVHDZksc09RM2Z6jU/WnI5gzTPQjgucb2yiHBLWZsEcoCIsA2nXrp2/vQ/ATMtITKb1223jyHnJiargNnQrRVRA7Geuz5FyaqSXjqvjG1HrqVxAlVNzFPb/N3fN8C6KHCjtOuu/pOM+cqC4cFvzyzqGymcDOKGC5Ax47j5v0KsVlJZ93dfO2yD/Fi2txeuNdmQ42bvhpGbPJu/iYLXmaETvu6/qpL3UrU4i2KVyxXW0CbUzBVh1fTmCClAMDgETpWNvnRXgAuu0jbgTnWjN0zSmjia0/BJmY1Z5uct7T4Lli4Dz/vJucBthIBVgYaVX/jLqfaNWpD1TuWkTHsf4gR5HErzQp60NDej/I7TespFMqRMhrroc6VaH/Wl6wV4msRM3sYaxcHPd4kJxrOeA7cQnfqQsWs4/RG++p+TCmD18qgevVBV1z11vFu2R6KE+c1YRANE5e6EBMO8yUly1yarrx6jOo6E5kAdKqPGkNUyq3BgOvGWmkbV32slQ4hXm7zUuyVnXBmpmoryowHhqV7iW8Si6CSMKACCENVqJPrk55VaL8sw8dZfQF0aekJoEqf/UqAv/DZUTYOAg4QI+4iD4= template: metadata: annotations: sealedsecrets.bitnami.com/cluster-wide: 'true' kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' name: elastic-recherche-entreprises-read labels: - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard @@ -150,8 +159,8 @@ kind: Service metadata: labels: app: recherche-entreprises-api - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard @@ -159,10 +168,12 @@ metadata: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 - namespace: recherche-entreprises-85-preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises-preprod spec: ports: - name: http @@ -179,21 +190,23 @@ metadata: kubernetes.io/ingress.class: nginx kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: preprod-dev42 - app.gitlab.com/env.name: preprod-dev42 + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api - application: v1-2-3-recherche-entreprises - component: v1-2-3-recherche-entreprises + application: recherche-entreprises + component: recherche-entreprises owner: recherche-entreprises team: recherche-entreprises cert: wildcard name: recherche-entreprises-api - namespace: recherche-entreprises-85-preprod-dev42 + namespace: recherche-entreprises-preprod spec: rules: - - host: api-preprod-recherche-entreprises.dev42.fabrique.social.gouv.fr + - host: api-recherche-entreprises-preprod.dev2.fabrique.social.gouv.fr http: paths: - backend: @@ -205,7 +218,308 @@ spec: pathType: Prefix tls: - hosts: - - api-preprod-recherche-entreprises.dev42.fabrique.social.gouv.fr + - api-recherche-entreprises-preprod.dev2.fabrique.social.gouv.fr + secretName: wildcard-crt +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-search + namespace: recherche-entreprises-preprod +spec: + replicas: 1 + selector: + matchLabels: + app: recherche-entreprises-search + template: + metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + spec: + containers: + - image: ghcr.io/socialgouv/recherche-entreprises/search:1.2.3 + livenessProbe: + failureThreshold: 6 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 5 + name: recherche-entreprises-search + ports: + - containerPort: 3000 + name: http + readinessProbe: + failureThreshold: 15 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 0 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 16Mi + startupProbe: + failureThreshold: 12 + httpGet: + path: /healthz + port: http + periodSeconds: 5 + env: + - name: ELASTICSEARCH_INDEX_NAME + value: search-entreprises +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-search + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises-preprod +spec: + ports: + - name: http + port: 80 + targetPort: 3000 + selector: + app: recherche-entreprises-search + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-search + namespace: recherche-entreprises-preprod +spec: + rules: + - host: search-recherche-entreprises-preprod.dev2.fabrique.social.gouv.fr + http: + paths: + - backend: + service: + name: recherche-entreprises-search + port: + name: http + path: / + pathType: Prefix + tls: + - hosts: + - search-recherche-entreprises-preprod.dev2.fabrique.social.gouv.fr + secretName: wildcard-crt +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-front + namespace: recherche-entreprises-preprod +spec: + replicas: 1 + selector: + matchLabels: + app: recherche-entreprises-front + template: + metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + spec: + containers: + - image: ghcr.io/socialgouv/recherche-entreprises/front:1.2.3 + livenessProbe: + failureThreshold: 6 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 5 + name: recherche-entreprises-front + ports: + - containerPort: 80 + name: http + readinessProbe: + failureThreshold: 15 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 0 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 16Mi + startupProbe: + failureThreshold: 12 + httpGet: + path: /healthz + port: http + periodSeconds: 5 + env: + - name: REACT_APP_API_URL + value: >- + https://search-recherche-entreprises-preprod.dev2.fabrique.social.gouv.fr/api/v1 +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-front + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises-preprod +spec: + ports: + - name: http + port: 80 + targetPort: 80 + selector: + app: recherche-entreprises-front + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + cert: wildcard + name: recherche-entreprises-front + namespace: recherche-entreprises-preprod +spec: + rules: + - host: recherche-entreprises-preprod.dev2.fabrique.social.gouv.fr + http: + paths: + - backend: + service: + name: recherche-entreprises-front + port: + name: http + path: / + pathType: Prefix + tls: + - hosts: + - recherche-entreprises-preprod.dev2.fabrique.social.gouv.fr secretName: wildcard-crt " `; diff --git a/.k8s/__tests__/__snapshots__/prod.ts.snap b/.k8s/__tests__/__snapshots__/prod.ts.snap index b8271c37..55ba28c0 100644 --- a/.k8s/__tests__/__snapshots__/prod.ts.snap +++ b/.k8s/__tests__/__snapshots__/prod.ts.snap @@ -8,9 +8,11 @@ metadata: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: prod - app.gitlab.com/env.name: prod + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api application: recherche-entreprises @@ -29,9 +31,11 @@ spec: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: prod - app.gitlab.com/env.name: prod + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api application: recherche-entreprises @@ -40,7 +44,7 @@ spec: team: recherche-entreprises spec: containers: - - image: harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-api:1.2.3 + - image: harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-api:1.5.8 livenessProbe: failureThreshold: 6 httpGet: @@ -86,9 +90,11 @@ metadata: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: prod - app.gitlab.com/env.name: prod + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: application: recherche-entreprises component: recherche-entreprises @@ -100,16 +106,18 @@ spec: ELASTICSEARCH_URL: >- AgAjiLY8OLRszj3zw7ngLaY3auR33D6sJSDaLQJWekd3Y4aHg5duw8wbdkMSu3WUARDGbFqyfoRQsG57xAliVJVLz07vFhxe7TXOnVMkpWH9pYdVqbl0blAnqyxTI7Uly9GvYDF7OcM23IvGA0svXHyge+nIUqO/zgvEyiYL/jiIfjXwKjXg0mJnH6/WszPBf9oaZYpwl0HOYHUK5LNIWyE5/nSSzCEMPamlACCtHdyRf0EywK3hur8V7FvC8gKDnByxn7IhVboOTdCsIQhrSV1ZePbvV5wex0H58JQdJWPMJxq/rRQKsNj/9Xmy8c31j3D2nCNQs6aSDi6wN9d2057rsiwIwqCnA8gPPh6bT26T2nnFFnbNFE/BMajucxxrx3+6ctIwmbC/ZpLrabzxcvoanc8o8zTS8s7FZL3/q00mWjAVy/OgIDdLKI8B2LQp1S+FGFdTmtzTNyscxtkjiJoEQeo1Mf8tWL2+h1nfeUipRq2xiVOa8KxM8ZkDPLFIAInA9w64/Ddk3FWnm5ky1MzRioZcjKqWfT6XxxNG/+4wvDCEMyDTLZyVY56sglGSSmizxXdBUeSR/n08F2k8Vw9s+qjTch5O1g/Agv87CVl+fROV/a4ayrP/gyTmLBT0OIsh6ZZtgIx1UJexF7fF8obFUcCAo8rvcj0ZlyP/ZNfmFu6CPUqd374DZyvZpwtnbX9fzpC2I1ZYNvV9vCVs5+BFULhbldsQPo5VMiEK1T+hunQr08yl3c/gtGYBpt5c6nn5xsopBWkwjjttVnsVVRZEDEwiDhlKbaJ/hedY3mZJ2g== ELASTICSEARCH_API_KEY: >- - AgBeveBsjm4jtYUPCGG29nyKrmf6Qo/N+XE2fr6ROfLaEeLHY4Ak7LxtL11Gk7+nPMB+/MyJbNGWFNd7trVwTb/NeYyLD77bvsq8Dr421+KiM8MO0JPt+RyieaMCV/ZhRzQJPzPznO8mP1yQTcKgBIoSVMTNZ27EfwLzSu99i6WW4M3EgbY1WcV0Ofyepyp/MsW/yuGWQJmvPuygcw0D4hfHSXBrOs4e1nXMsNIuqs00aV7mHFAzYZ+ZBq+htxRWvgPYTXH4m1L6dxb89Q85JdJS13akPDawXcTYkac72F4QxYTBZttZkDAg5mhbexZyr+yDpwKNTBBaLzaHNjw1CTIdGeAROVNH5m01g0zUNjhabOPibvWq3+JidmxabbW3TiGJxmOMiyPkTHFFeOjZHsgLGiXoZzoRKgMrigctBiWiTsO5hZ6RAISqtM8+u+w/48zZUQA/aHQDrjpuzle1y9aJKikIkKepHTlepvbeh7w48Vv7RfZzJ701tnVuuMNi9VswEprHG32P6P0cT03g+oL1n4vpybpx++Sb8TkctQQnAUBCeT1dSuYqAWvzSszVbIkA3YJZXqTDGwTIa6YnEqzXIkrZ1muX0FiQn/Nl28uBYcTv9ysmdt7GIixtJotrDDUawOsVwnEBtqiBy9tfziyQUBKoSbZF3lQ1YN8yYqNnMsGqD833N/nBnZwFgmZqbe3Eb44j08e+i/w0tLsDHxIdEdTGmsZR2QTGyMlhxcEyWGpRU0Ft6JlfhIlYv02vNCv7ncI5AqbnghOi5p0= + AgCqtv233/TmTQR4P6bx+wGb0uKdVlKLI7gd+gRXuMBbyyCNmm565ulZFvZJOw+f0hS0KQ6lP84R3LLQTbm40KO71Al+UW0g/4L2eSqAvx3jmQEuLMPehlG7rwitoUDsAiuRPbclb0bfro9uhtWFXgLOTNU48mTT64Xq5mFsYKdNkwXXVgP7+wNg0UMsa2TlyDLMb7JW91MN5vOwsKtr7OfoPIKa5LwHvQIkFDcI4aWBuxsS2FGTBJ81xpgCAfBOnDVYz+ymon8FyQt6Y3SgSF0wjVZC7t8qkC0nPTRj6eCBc9loOt/NLUpSBqFhEYqO0FAl++sS0CZBgvsLnS8gAiiv3UwdLOz94QL0zqFdZBh5TZgdZUQLuW6aWkueAW+0ccKop3VyBJn8fZ1xAKfIszgldpfUBxHuDJ8keEr9aJM8nYiC/lXNMb7dKDJkRoyn/X4mLv6VTwnaqvj/NwVfkK9mnZAoxQg2kEmiDWYvqNL7yRForYIZKDjU7dGUPBWPqeqvpKJ8QLsDnve1Amvhe6wr8K7FYpCLcyrAC0huPaU4/d3IFd7P/neGqocGEh1pvQDp1BWGytkmZAqu2nRp4y6zeVyPzO/G5BbVwOhvqG2BUIdFM68ekL2xFe8yeHIktqJeadgifOtYNF7xnDmsi/v57E6MAFoFiqkVG+4vtUCxQTYZNbDS5YDAo3DBaiIsdOgO0cIM/TCCxjcDPh3pQGs6fsFdENcXzgj8zZQnE3SjbojejTXfVEI6dSoHaIY4WBg0MXEwsivDJ8Ctjrc= template: metadata: name: elastic-recherche-entreprises-read annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: prod - app.gitlab.com/env.name: prod + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: application: recherche-entreprises component: recherche-entreprises @@ -130,9 +138,11 @@ metadata: annotations: kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: prod - app.gitlab.com/env.name: prod + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' namespace: recherche-entreprises spec: ports: @@ -152,9 +162,11 @@ metadata: kubernetes.io/tls-acme: 'true' kapp.k14s.io/disable-default-ownership-label-rules: '' kapp.k14s.io/disable-default-label-scoping-rules: '' - app.gitlab.com/app: socialgouv-recherche-entreprises - app.gitlab.com/env: prod - app.gitlab.com/env.name: prod + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' labels: app: recherche-entreprises-api application: recherche-entreprises @@ -180,6 +192,303 @@ spec: - api-recherche-entreprises.fabrique.social.gouv.fr secretName: recherche-entreprises-api-crt --- +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + name: recherche-entreprises-search + namespace: recherche-entreprises +spec: + replicas: 1 + selector: + matchLabels: + app: recherche-entreprises-search + template: + metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + spec: + containers: + - image: ghcr.io/socialgouv/recherche-entreprises/search:1.2.3 + livenessProbe: + failureThreshold: 6 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 5 + name: recherche-entreprises-search + ports: + - containerPort: 3000 + name: http + readinessProbe: + failureThreshold: 15 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 0 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 16Mi + startupProbe: + failureThreshold: 12 + httpGet: + path: /healthz + port: http + periodSeconds: 5 + env: + - name: ELASTICSEARCH_INDEX_NAME + value: search-entreprises +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + name: recherche-entreprises-search + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises +spec: + ports: + - name: http + port: 80 + targetPort: 3000 + selector: + app: recherche-entreprises-search + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + cert-manager.io/cluster-issuer: letsencrypt-prod + kubernetes.io/tls-acme: 'true' + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-search + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + name: recherche-entreprises-search + namespace: recherche-entreprises +spec: + rules: + - host: search-recherche-entreprises.fabrique.social.gouv.fr + http: + paths: + - backend: + service: + name: recherche-entreprises-search + port: + name: http + path: / + pathType: Prefix + tls: + - hosts: + - search-recherche-entreprises.fabrique.social.gouv.fr + secretName: recherche-entreprises-search-crt +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + name: recherche-entreprises-front + namespace: recherche-entreprises +spec: + replicas: 1 + selector: + matchLabels: + app: recherche-entreprises-front + template: + metadata: + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + spec: + containers: + - image: ghcr.io/socialgouv/recherche-entreprises/front:1.2.3 + livenessProbe: + failureThreshold: 6 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 5 + name: recherche-entreprises-front + ports: + - containerPort: 80 + name: http + readinessProbe: + failureThreshold: 15 + httpGet: + path: /healthz + port: http + initialDelaySeconds: 0 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 16Mi + startupProbe: + failureThreshold: 12 + httpGet: + path: /healthz + port: http + periodSeconds: 5 + env: + - name: REACT_APP_API_URL + value: >- + https://search-recherche-entreprises.fabrique.social.gouv.fr/api/v1 +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + name: recherche-entreprises-front + annotations: + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + namespace: recherche-entreprises +spec: + ports: + - name: http + port: 80 + targetPort: 80 + selector: + app: recherche-entreprises-front + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + cert-manager.io/cluster-issuer: letsencrypt-prod + kubernetes.io/tls-acme: 'true' + kapp.k14s.io/disable-default-ownership-label-rules: '' + kapp.k14s.io/disable-default-label-scoping-rules: '' + app.github.com/job: '5678' + app.github.com/ref: refs/tags/v1.2.3 + app.github.com/repo: socialgouv/recherche-entreprises + app.github.com/run: '1234' + app.github.com/sha: '0123456' + labels: + app: recherche-entreprises-front + application: recherche-entreprises + component: recherche-entreprises + owner: recherche-entreprises + team: recherche-entreprises + name: recherche-entreprises-front + namespace: recherche-entreprises +spec: + rules: + - host: recherche-entreprises.fabrique.social.gouv.fr + http: + paths: + - backend: + service: + name: recherche-entreprises-front + port: + name: http + path: / + pathType: Prefix + tls: + - hosts: + - recherche-entreprises.fabrique.social.gouv.fr + secretName: recherche-entreprises-front-crt +--- apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: diff --git a/.k8s/__tests__/dev.ts b/.k8s/__tests__/dev.ts index a2457c24..ed94032b 100644 --- a/.k8s/__tests__/dev.ts +++ b/.k8s/__tests__/dev.ts @@ -1,7 +1,7 @@ // import { getEnvManifests } from "@socialgouv/kosko-charts/testing"; -import { project } from "@socialgouv/kosko-charts/testing/fake/gitlab-ci.env"; +import { project } from "@socialgouv/kosko-charts/testing/fake/github-actions.env"; jest.setTimeout(1000 * 60); test("kosko generate --dev", async () => { diff --git a/.k8s/__tests__/indexing-dev.ts b/.k8s/__tests__/indexing-dev.ts index f53628d0..3c0466bc 100644 --- a/.k8s/__tests__/indexing-dev.ts +++ b/.k8s/__tests__/indexing-dev.ts @@ -1,7 +1,7 @@ // import { getEnvManifests } from "@socialgouv/kosko-charts/testing"; -import { project } from "@socialgouv/kosko-charts/testing/fake/gitlab-ci.env"; +import { project } from "@socialgouv/kosko-charts/testing/fake/github-actions.env"; jest.setTimeout(1000 * 60); test("kosko generate --dev", async () => { diff --git a/.k8s/__tests__/indexing-preprod.ts b/.k8s/__tests__/indexing-preprod.ts index d2497231..e49008a6 100644 --- a/.k8s/__tests__/indexing-preprod.ts +++ b/.k8s/__tests__/indexing-preprod.ts @@ -1,7 +1,7 @@ // import { getEnvManifests } from "@socialgouv/kosko-charts/testing"; -import { project } from "@socialgouv/kosko-charts/testing/fake/gitlab-ci.env"; +import { project } from "@socialgouv/kosko-charts/testing/fake/github-actions.env"; jest.setTimeout(1000 * 60); test("kosko generate --dev", async () => { diff --git a/.k8s/__tests__/indexing-prod.ts b/.k8s/__tests__/indexing-prod.ts index 66e19f6e..6b11ee25 100644 --- a/.k8s/__tests__/indexing-prod.ts +++ b/.k8s/__tests__/indexing-prod.ts @@ -1,7 +1,7 @@ // import { getEnvManifests } from "@socialgouv/kosko-charts/testing"; -import { project } from "@socialgouv/kosko-charts/testing/fake/gitlab-ci.env"; +import { project } from "@socialgouv/kosko-charts/testing/fake/github-actions.env"; jest.setTimeout(1000 * 60); test("kosko generate --dev", async () => { diff --git a/.k8s/__tests__/preprod.ts b/.k8s/__tests__/preprod.ts index ea9aed49..b97eaa9b 100644 --- a/.k8s/__tests__/preprod.ts +++ b/.k8s/__tests__/preprod.ts @@ -1,7 +1,7 @@ // import { getEnvManifests } from "@socialgouv/kosko-charts/testing"; -import { project } from "@socialgouv/kosko-charts/testing/fake/gitlab-ci.env"; +import { project } from "@socialgouv/kosko-charts/testing/fake/github-actions.env"; jest.setTimeout(1000 * 60); test("kosko generate --preprod", async () => { diff --git a/.k8s/__tests__/prod.ts b/.k8s/__tests__/prod.ts index 52dfcd21..2e83a09f 100644 --- a/.k8s/__tests__/prod.ts +++ b/.k8s/__tests__/prod.ts @@ -1,14 +1,14 @@ // import { getEnvManifests } from "@socialgouv/kosko-charts/testing"; -import { project } from "@socialgouv/kosko-charts/testing/fake/gitlab-ci.env"; +import { project } from "@socialgouv/kosko-charts/testing/fake/github-actions.env"; jest.setTimeout(1000 * 60); test("kosko generate --prod", async () => { expect( await getEnvManifests("prod", "", { ...project("recherche-entreprises").prod, - RANCHER_PROJECT_ID: "c-lfcxv:p-d7g9q", + RANCHER_PROJECT_ID: "c-lfcxv:p-d7g9q", }) ).toMatchSnapshot(); }); diff --git a/.k8s/components/api-legacy.ts b/.k8s/components/api-legacy.ts new file mode 100644 index 00000000..854bce44 --- /dev/null +++ b/.k8s/components/api-legacy.ts @@ -0,0 +1,23 @@ +import env from "@kosko/env"; +import { create } from "@socialgouv/kosko-charts/components/app"; +import { addEnv, getDeployment } from "@socialgouv/kosko-charts/utils"; +import { getGithubRegistryImagePath } from "@socialgouv/kosko-charts/utils/getGithubRegistryImagePath"; +import { EnvVar } from "kubernetes-models/v1"; + +const getManifests = async () => { + const manifests = await create("recherche-entreprises-api", { + config: { + containerPort: 3000, + subDomainPrefix: "api-", + }, + deployment: { + image: + "harbor.fabrique.social.gouv.fr/cdtn/recherche-entreprises-api:1.5.8", + }, + env, + }); + + return manifests; +}; + +export default getManifests; diff --git a/.k8s/components/api.ts b/.k8s/components/api.ts index 72fc8cbf..ed6bee4a 100644 --- a/.k8s/components/api.ts +++ b/.k8s/components/api.ts @@ -1,16 +1,34 @@ import env from "@kosko/env"; import { create } from "@socialgouv/kosko-charts/components/app"; -import { getHarborImagePath } from "@socialgouv/kosko-charts/utils/getHarborImagePath"; +import { addEnv, getDeployment } from "@socialgouv/kosko-charts/utils"; +import { getGithubRegistryImagePath } from "@socialgouv/kosko-charts/utils/getGithubRegistryImagePath"; +import { EnvVar } from "kubernetes-models/v1"; -const manifests = create("recherche-entreprises-api", { - config: { - containerPort: 3000, - subDomainPrefix: "api-" - }, - deployment: { - image: getHarborImagePath({ name: "recherche-entreprises-api" }) - }, - env, -}); +const getManifests = async () => { + const manifests = await create("recherche-entreprises-search", { + config: { + containerPort: 3000, + subDomainPrefix: "search-", + }, + deployment: { + image: getGithubRegistryImagePath({ + name: "search", + project: "recherche-entreprises", + }), + }, + env, + }); + const deployment = getDeployment(manifests); -export default manifests; + addEnv({ + deployment, + data: new EnvVar({ + name: "ELASTICSEARCH_INDEX_NAME", + value: "search-entreprises", + }), + }); + + return manifests; +}; + +export default getManifests; diff --git a/.k8s/components/front.ts b/.k8s/components/front.ts new file mode 100644 index 00000000..a16f2387 --- /dev/null +++ b/.k8s/components/front.ts @@ -0,0 +1,38 @@ +import env from "@kosko/env"; +import { create } from "@socialgouv/kosko-charts/components/app"; +import { getGithubRegistryImagePath } from "@socialgouv/kosko-charts/utils/getGithubRegistryImagePath"; +import { + getDeployment, + getIngressHost, + addEnvs, +} from "@socialgouv/kosko-charts/utils"; + +import api from "./api"; + +const getManifests = async () => { + const manifests = await create("recherche-entreprises-front", { + config: { + containerPort: 80, + }, + deployment: { + image: getGithubRegistryImagePath({ + name: "front", + project: "recherche-entreprises", + }), + }, + env, + }); + const deployment = getDeployment(manifests); + const apiManifests = await api(); + const apiUrl = getIngressHost(apiManifests); + addEnvs({ + deployment, + data: { + REACT_APP_API_URL: `https://${apiUrl}/api/v1`, + }, + }); + + return manifests; +}; + +export default getManifests; diff --git a/.k8s/components/jobs/indexing.ts b/.k8s/components/jobs/indexing.ts index 05d2c71c..cfcf9353 100644 --- a/.k8s/components/jobs/indexing.ts +++ b/.k8s/components/jobs/indexing.ts @@ -1,8 +1,8 @@ -import env, { Environment } from "@kosko/env"; +import env from "@kosko/env"; import { SealedSecret } from "@kubernetes-models/sealed-secrets/bitnami.com/v1alpha1/SealedSecret"; -import gitlab from "@socialgouv/kosko-charts/environments/gitlab"; +import github from "@socialgouv/kosko-charts/environments/github"; import { addInitContainer } from "@socialgouv/kosko-charts/utils/addInitContainer"; -import { getHarborImagePath } from "@socialgouv/kosko-charts/utils/getHarborImagePath"; +import { getGithubRegistryImagePath } from "@socialgouv/kosko-charts/utils/getGithubRegistryImagePath"; import { updateMetadata } from "@socialgouv/kosko-charts/utils/updateMetadata"; import fs from "fs"; import { IIoK8sApiCoreV1PodSpec } from "kubernetes-models/api/core/v1/PodSpec"; @@ -29,21 +29,21 @@ const manifests = []; const secrets = { dev: { ELASTICSEARCH_URL: - "AgDGWiSE5kUv0Mv4Dot8KfbSGm5XVlyi4V9yLiMI97deZctFWRd2T31MtTUz/XgT4VlFH4NUhK+dQonLEILE2P9rs4MMDaY09cc3NZiDLB9GJoBcWnWsa02QV4Xykk8UthTHQPFC4g+0ElFU4UewbIc3Zc2ZqIk5Y803TnYkRuAF1bM1ZdpWGnm+yh7+lXnbRMmFo62vhPVB8lnl9Z/RVxhs1jx6xnbIFyTjvelM4/sySiyHZ0h74fL61DAvOcNBgCTd49RRBBzcAhp6a8KL45FL1eAYyOqS8vW5uNeFu59Cu9ByRruhyTELjpPtLesjsBUHiqmfBh09esKw+mdtMmfYxIsy+JGPS50N8GiNj+n0A5XXtLQkm9oT884r4LEQip35YHyenh20NmueHX8RGMOXRT05PWnbL70NtZHqUwzFdc21vdFhXAb/MVaMgqcKG4RPkRS+qljn+pQjJ8DkY0EqgWOCfSyjB+jKJ65peSqqORq+B8zTDgpv9ZaN+xSDsfi+PckPPJqKOjaTkQW/VdfwpT6ipOUpNgznUOGy7XN9vxAQR9P79n6KlIyMsme4hN7OHX5ko0xWQZcCyfHbSbT7JHY5UpmTv+c0NtyrV755FQN75HDFRhQYHtgLnhR9PuBCZ8Gvtg2T/Nr63GsTMC5e52bj2zIRlOUTrsvxEIYKQoBPILoueEIcMz64/UiMAZ9RwH308wRHOrZtch5gclzUQWZEgI5Vx2cJQ/zr9iqBaedg2v8GqvESeKw1e7pgXoLE6pFKJtQa7cMwQgmrBkkxrEfePcRDP4yAMVQaiM8DoqIjVg==", + "AgCxK1MqqVkCOHvMjjcieUsj1uE/ATUzAfYTYVEMqcWFxTnugOaYlXB2mTuZtzdjXNvINREzDynnVRBghPIJvrJm59vPgexC5FyQptdPf2XwQAG6n0yMLWAEBM+O8ieO1D6vBu6BYuq4QYoItq+SMTE6APNX+p6wi4/LP04cR8p8bVKNxckPMfyVfQGhUSTD+2gC5ow2x1Mx67odqqP3E8eK9tm5tU6ltzOpKqJKITUQIqTQbSKFr8p77DE4IrdbPn8MpvdP+VrT+ztF4thPkpVpuYWy9rABdQKeEuNdN2AuvmPeJs/2B80iApIgtWWQjV/iG+WlLGMohvWDZVsBGuBuYGzhjJlr5w8OljwKDihb2cG1M/Ot/LM1xRuCC85wvqH0oMhnnP/3DZts5aYAN+qjAezOkPT9kjT+gu0WoQB+E5r9ITcYYHYbJw/YSxS2z86Ul0amRRW4ovAWT5c/nFDZU6i74I8YyVh8oOtrieZgNF4ajJcIr5YpjT2LICKoZR0KExrQ4V30zZJ+wZcvd8W3e/KCmKsFxloTkLq/sjEiFQoe6ffYTQhSUXZynqrdUKZsdoSleprfctlJtf3rzxt5hZIpcLulXrYk7pPPiHkTcV3Zc4zkN3BFE1A9vsgTvh6xTXlvUekJHHMXU3t8ZyZD8vvemxv0FE8G33XWQF2DEkmyc3gXMlfPDLho4fHv0h0iId0c9klFkOpmp8REFWJyuRQjq+hOHvfoCSu1ymsCwA0CMAguJoX1XEhn0aA9QIgf0VyBe/cVJwu/GfW74UBi6x5feqzFovYIUXbDP6kW", ELASTICSEARCH_API_KEY: - "AgDE1F8SFMKtRpPd9BKozxXC5YcwVae1SRc+ECX/9EUhLSaqxGXenqVbtE2tj6kovFcru8sSMgPQZIOGDel4QIpwGC9HozNQWGXkEf9AABjulJ82hwlyo/22XOYodqAs3FuZc0tiodEK6+GUw9KKVf6I2P3Y55AzwfZNuar319LG1H0OmkvJbaXjkQwuELjBeU0OG1hPSf3LSIX4F6hc9JlunyDgWFT+vqe7FN+0Q/BmRgbY9SJ4ZqD+TZxzLfKCmWAtGFWW2eFf5jJDmuXp5A4QK+jSd1vonJGjFn16fEgA32tOnBxaA+7TpcBqVq/oOqYvBckuXlDUrBIXkfKRK/FqjvFCxtkhpzrYRe6FFntA6MFmnYvzhsUEoIOSrn3O8Unlaae//am/sukctTkqTum4IF0316k5UwUjcEsd/1qTFM84l3mcPBeU656t1B0sAaTK5JEPSOkJvz1nk9Ei1RUXPJCVTp3i64hia5OeWgEaTWUsP1udhMhC1eGSt0tf5QtNnnDlpASebmI7oubuSiOLveXg/+RcHNXTztrbc3xSNAUIML8GTXNbA4TJXorvZKdecvVbmtsbH7u+4awna7J+SuAysKNdWdTeMsT0uMdFA0o4fez+PQVXc9+IF0DqU0T8EDeLfz62663aW1R7rQ2KEjK/2Vvy8W6EUjXIdY1DEPv1BmHpI9xRJJ8P8sTucaXBmohjPJb1lXzh+/9AmeIRhy/mDJTnCox/UOfyRrpx+FcU/Vi6TvJvV8ab7JZbufOVmo1iEPnP3Wx3rcQ=", + "AgANlBsYYlxFPxOSitnqdZ4LhP822km9ijYFc46pkpkSKoYGRHNm7gbpaPTRNWJ4u647IYI8Hl8E63GrtACnhntEIvJlHTE8ZwOw7kIA7NuPj/xma4nN3RTlazH8dmaGskd/wCD8mDZS2R3MhXxqSy6DVGbNJyRAmrgLnzCOgp5/aJkQAC3sZ9GOce0x+X7ngd+qKrOzf+CZS/hnf4/aQvEj6Caq141nl74jpuwpfjb3PWKl9aATJocZkyNw92ITRmfnGu5oE0VIf+BJe8tyBvLN7dcPQ5txBEfDh/XBX3eCwpAJMeyN6XHioJKX9BbN1rCG5lt8jAurB6WAxeAVtt5maytoX9aYAWZMiK7FQ3HubkrDHvDPBJyaeyzLWgcXeox077xiCti7BaKssVO2itM6BO9gMyvC2hwnw5LjtItj1mY25wGRmMz5B8/9vMYMZ6Xmjf3DaczAUwL3RcW3NYqjbUc03pz45uAavC2ImNXtIBIoaaXh0T0rX6ZcQaJZxpax29KydecKDoDnkkx3owpVZoZryebXgG/sW34ddaoHsLwgKwBdxVoNO+L04g46JdXoJclPA/7GVPDz0voMCHIZymrBfRPf/xdqtq5w0EdiIAf7d6GogOjaUU9ujKMJJKw//nu3RmYziVWcaAHzkgKJZahRe+k2Kb8o+YsGFPFYoR936GAnJRnLn8sEMppuP2tHrij6SN/jSRbJll0HgsA/BKzPDhwSu0tiDt81HDZdxXusre5MRNzyiESOoKGSq7hc7ABHCIHSWyhbVNU=", }, preprod: { ELASTICSEARCH_URL: - "AgB82d7LnDA/6RxRlks/7cRtmWuz6Ql1p4gCwdghu3X85Ek4FqSL6ui1tIBiuM1pZcaiwM6ZizsKA0ofq5iBafUwRQOzTFc3XAMy7XltrymG/QwBRmYKS4w4Ub1DPuYpVxEUC6Jngyex7OvhCKUK7pugjG6Q8FXO6i9iyVVEpKnAcSVLaUe+olmlOrO2RMjIK3mgKX/xOFT+2FYiN5/LJob+w/+p0hPlZaMsLrLOl/i5N4LuI5ckg+FawifD2MnN057fsLbwt0m63g7ZHvXtGT66tbTcQgpWfy5kLe2m7oIbzdk+oPoh4FS8PnyU6nMC8sOkC3v/GUMK91qCas01RBoyPRTTs11yX3gbYHti5Nc3zDt36YHPhrqfRQHQ8xONYkx5SkAylnDr1JoXyfrKDwZUBvLQ6Xh3gGI7qu909LxZ2ryWd9WRslpB1+8bOLN0tV20slAesGYFC/W6e5GT0AhwWqwJ9usGLf0dM7GE+IXJegIlconcM+2x/FW3RQ5XnK5kI/coiha5pxBkK0p8pbwmLnOwH5c2QoBD5xgLCZ3wleMcbTWdSynzm/LSYkWZzL15M3dy4m8c+qXc88LCbAHTZ2UaAH/XF7pcuMvOF33ZesjgaWLumFoUvhtaG1gZN97eU2/K1xLwo+x/vt06P6vUbU2Emj9cEziTaQqx1ZPUI4Hsp5ZNpbAGRsLMhCf7G7YvxZkpxh+7GAKvW1JmhF1DYgBapCen2bxQN/XK/pNniL89T0hiKSwh/Fo8yDOnqAGova3T1Nq6fLFcRYNxtKFVlsn5zsUgyVPCtyn+cUKIFw==", + "AgCxK1MqqVkCOHvMjjcieUsj1uE/ATUzAfYTYVEMqcWFxTnugOaYlXB2mTuZtzdjXNvINREzDynnVRBghPIJvrJm59vPgexC5FyQptdPf2XwQAG6n0yMLWAEBM+O8ieO1D6vBu6BYuq4QYoItq+SMTE6APNX+p6wi4/LP04cR8p8bVKNxckPMfyVfQGhUSTD+2gC5ow2x1Mx67odqqP3E8eK9tm5tU6ltzOpKqJKITUQIqTQbSKFr8p77DE4IrdbPn8MpvdP+VrT+ztF4thPkpVpuYWy9rABdQKeEuNdN2AuvmPeJs/2B80iApIgtWWQjV/iG+WlLGMohvWDZVsBGuBuYGzhjJlr5w8OljwKDihb2cG1M/Ot/LM1xRuCC85wvqH0oMhnnP/3DZts5aYAN+qjAezOkPT9kjT+gu0WoQB+E5r9ITcYYHYbJw/YSxS2z86Ul0amRRW4ovAWT5c/nFDZU6i74I8YyVh8oOtrieZgNF4ajJcIr5YpjT2LICKoZR0KExrQ4V30zZJ+wZcvd8W3e/KCmKsFxloTkLq/sjEiFQoe6ffYTQhSUXZynqrdUKZsdoSleprfctlJtf3rzxt5hZIpcLulXrYk7pPPiHkTcV3Zc4zkN3BFE1A9vsgTvh6xTXlvUekJHHMXU3t8ZyZD8vvemxv0FE8G33XWQF2DEkmyc3gXMlfPDLho4fHv0h0iId0c9klFkOpmp8REFWJyuRQjq+hOHvfoCSu1ymsCwA0CMAguJoX1XEhn0aA9QIgf0VyBe/cVJwu/GfW74UBi6x5feqzFovYIUXbDP6kW", ELASTICSEARCH_API_KEY: - "AgCWc+7OMggEJuNUXxL8CufezAMvJaT3svLkFi9KiKost/avFQNBoXDGixl7vERoCDVvDxnPlcn7m7MIjveP5oIfewhJzYQua9WmQrSMTab1soqUIjMHWj+2A8y0qSYg4s21w/X6bc+H0/O4+ax0QNVtm/MWc30vnCOF+uVibM2WcDkSY/FGE2bE7hcmQWeDbsmnRxDaZTY10ME2ycZpv9eAMoXshCQ98k4LFT9DM0D51az4BibLJjUtsW/vMn/FNKv9/teOGiCAFE/pfoyeZ4QN7ZsoFzCSHwj++Rd0hWScX8VCvbS5pgmfioz8SHmQFrxrN994CAPlr2Rw7mxfkzSETrDgzQVmzKD+j49PZL6cz77cCh9DGE70Pco91szWbsaUQ/lcWUTlFm39Z0Xmf6uV1eCSbyPcM3ogMx95Rjb3MnGa2zrt4OPLMMA2YfyqMtrrVTcq2aJ4fGvWAmVZHH3k2QMcPiYLGHQ8gnVsjI6LYGJEM27RVj99/ppZpoPWeims00fBHMclr3/4czfQCBKcr8GFouxfcKVYGJ4gRNdH0Qn5gZIPXOOiRKUVS0ik0zuT1Xu9u9kU6Df2RVOQawiIOy14+5RrMqt0li1PuCFZjRTqbsLGogOzJQfaXKOafNjxswUqXSn4gR3pVBmm0tPJx+9iFdSrkavvGBrQeSDrNzY5UjeZwIgz9hveUUe1dxARBK8iXu0Q8P6W+VnFActyQs0gnoc4Zzb3jJNfxgDWv1LF/MK22b5G2YcZ3bk5LSsGxP1KICJ0J1MMSL8=", + "AgANlBsYYlxFPxOSitnqdZ4LhP822km9ijYFc46pkpkSKoYGRHNm7gbpaPTRNWJ4u647IYI8Hl8E63GrtACnhntEIvJlHTE8ZwOw7kIA7NuPj/xma4nN3RTlazH8dmaGskd/wCD8mDZS2R3MhXxqSy6DVGbNJyRAmrgLnzCOgp5/aJkQAC3sZ9GOce0x+X7ngd+qKrOzf+CZS/hnf4/aQvEj6Caq141nl74jpuwpfjb3PWKl9aATJocZkyNw92ITRmfnGu5oE0VIf+BJe8tyBvLN7dcPQ5txBEfDh/XBX3eCwpAJMeyN6XHioJKX9BbN1rCG5lt8jAurB6WAxeAVtt5maytoX9aYAWZMiK7FQ3HubkrDHvDPBJyaeyzLWgcXeox077xiCti7BaKssVO2itM6BO9gMyvC2hwnw5LjtItj1mY25wGRmMz5B8/9vMYMZ6Xmjf3DaczAUwL3RcW3NYqjbUc03pz45uAavC2ImNXtIBIoaaXh0T0rX6ZcQaJZxpax29KydecKDoDnkkx3owpVZoZryebXgG/sW34ddaoHsLwgKwBdxVoNO+L04g46JdXoJclPA/7GVPDz0voMCHIZymrBfRPf/xdqtq5w0EdiIAf7d6GogOjaUU9ujKMJJKw//nu3RmYziVWcaAHzkgKJZahRe+k2Kb8o+YsGFPFYoR936GAnJRnLn8sEMppuP2tHrij6SN/jSRbJll0HgsA/BKzPDhwSu0tiDt81HDZdxXusre5MRNzyiESOoKGSq7hc7ABHCIHSWyhbVNU=", }, prod: { ELASTICSEARCH_URL: "AgA0evdMnmx3uImqSzGIHorhP2zdv0hFREYZt0FLKi9Eg389OmU1f8CKgpOJp5LPedSAghc8HEd//YeqYRvhQZhfclkw15FZJX7xxz3H75wYJMuNxnLPz8cEyruuA2NrqVoCNuf8p06aI1hfhjNlIKnPjewR14hK5tdKVmsRc1bfPOygUhslbp+aDCbyCFAVFamhaHYHYKdBKZ4B3V+pgLfQvJl2Xfnm3ChXO9Y7ptnI7IJjyzpResdQwasAppc6onvOrGAdms4wW0TZQOsrRV/3JwFHwbqwiX8DzdwkvLlK37HwV7XddiHOQsa3Z+ONI5y9Uhbw2+3ynMi0h8dVOxHmjeq4iPVBqSTWgRfqomz7MR+MSQfNwaE/D20qlt1fzrFvNezc07XxwYxogUS9QFVe8vgXbeoOFmgUA25pKUsOVgjs+ulA5uH7fMv9JhF5H93250FE+/VcHS1wtUAZw67w9GndnpEgs/kmqR9juD4qVEslrEb36OmsZi4hkgNKh8kK2GybTTaTw4I5xSuWu0mpQqZR3i5M8biT/B0YStW1EkajX02obUyQKSJk8VjLwXIFQ726kse5sMh8Ste+z1lKEIfq6jOKZZ7sUiSTwuLuGTmEqy5mdcmkyeCgp4paYOyzKHhYDNXaoZkKguZP9DuOY+44g4uch9Uj3hGvZb+4dV41FGIJ3PB0YZbbxgHF3UA6iHGkZcITwxhn4lJADU7yJyQcrziDXccKa1yiGe9BOOjGn2+WOLF0lrLsA6Uxk70sEiwZygYJQCJGN6KsHPz7Resz6aF/YTY3K+m65VeI4g==", ELASTICSEARCH_API_KEY: - "AgAw0rLyRSJJHgwBKDwtTfrBJtMTZ2jaQ07AZArXOsCazbaGOCfLtdtoB3tmlZw+fPYkLpCcWEDypjjt2qvoaMTQJp/LZcyXG69ccPLTj5wDzTSiYsIIewBNN6d0A71lUdbtAFgdyFPrxjYIDuj9SVtQqRjv6Fd/gmEnAsGh2szYRfeRIse8zqI/ICWuBpfidZ7lLDzJbTnCK4AYkan2zWqk4Xp5X8owyAaKc3gkG3CKDiR0Re+cAdfpQ2VJ7KX22vnn8jtu98i98vwGtG2iJngX7oLWXgYxR+ptkKdSt54lwzRCCfRgEI32Rl0JNHcY8QpYq+SVlxpMpNNLk5rp7NscR6AeU+YVQH0R+ZidEPC3Ys/ELIZ2wLd/++DKVRFQfgSoC1c0DkBy+u49Ye/5eV0m8p0FW+ch12ylVynnWvbIfhYsRQx2c9xsI7KN3ysUcktWjDech2qI4zpkTUyV+KhWqVpO2LjbJoD6TFOVgd/CoFGeNB7Yjds0ccerXk1z8lirdm2irSJ+B1szZBzGCVXGwqnPu/KNebaPdwk+Dy4MH3fFe9ktwHcSJuiwBsnx38FuFHI0qWDTk/StOzprYD16WoAihB+LCVrOvJEJTnAWf5lCs2QStRiD5gFAHMtdW/Z8Q29QgXI8s+19YZ3Wz6JmJYgUKayYR/7qZqV5eD5aVQR/BphmZiH2SJh4zJ5lFk3r6a02/OwlwqIVfCYFmm9thFHsbJr5A3698128+KRrB/WLTKV1AvmPWWcDoj7HbFi+tYobtwNQpCQWw28=", + "AgABUhcCHYMldKg9JPfijTDeiF5Fdo0h7NIHCv5/B2Jwnyb1LciNXJ4WmI+7jVweUhx2k0LZzIpyG7VRVMRzliDqcDH4CuNdjFmsZ/KKD1re88AVArYNblasISTx4JxeomlG903SVJgIHjdZDLHsiUYnB+BeM11UBUGlVhaCH0+mV+1alXK+xkxLbaFIh3853onWvbZTvPkc74qFCKfqJX5J9cxLrwX+qq48EwZZIspaR8LtlcZfSem1NSavHyLUxBXP8IEF2o4stt3haKbq+kO+2dPr8/o141KyNzabbbP82wXikuDk3yRqX4j0yAZhfT+X4Sk8kJKH947CV2bHmkr0ySy9+qTiQqwwHthEKWxmCVsQocX13tGC+v9hYzjor9Eu1OFeDa+pzJ21tC5gMRIVUgsTJ4+mDqiJqPgHAB/rPMSB+JaK2DZchVR2MK4mDYcjiNOrzUNG1sRiKrgikIiBIHKBgyoWlJVkqNWzUHBDl0hCp8uEkOWcmBymNvI37HqFPxfENvIeu6vXZ2K+R51bkbEYYfcvrrFmeCh+bdpsKALhmGbtVG0NeKIHR8uicixu/ewi95LVE8hwkYeAZerlBNSF4HRs2ZvNwtqnzQtiv5Dh1aVfQe8AaXojuS22JqgE+eiZhzw225ovZZ/OYv81odymXdBhovF+uurhF1jxczPc0DWdAKagw0mZYFfC7ipBILEitnp7vsrIuZZoKoZHjRhgLikhu74USV7RrutZpPQKqgm5+RWGu1v8HZzK4eDrH3JHi9TjKbcRG50=", }, }; @@ -80,12 +80,17 @@ const sealedSecret = getSealedSecret( {} ); +const envParams = github(process.env); + // base definition of the job const jobSpec: IIoK8sApiCoreV1PodSpec = { containers: [ { name: "update-index", - image: getHarborImagePath({ name: "recherche-entreprises-index" }), + image: getGithubRegistryImagePath({ + name: "index", + project: "recherche-entreprises", + }), volumeMounts: [ { name: "data", @@ -97,6 +102,10 @@ const jobSpec: IIoK8sApiCoreV1PodSpec = { name: "ASSEMBLY_FILE", value: "/data/assembly.csv", }, + { + name: "ELASTICSEARCH_INDEX_NAME", + value: "search-entreprises", + }, ], envFrom: [ { @@ -107,12 +116,12 @@ const jobSpec: IIoK8sApiCoreV1PodSpec = { ], resources: { limits: { - cpu: "2", - memory: "18Gi", + cpu: "4", + memory: "5Gi", }, requests: { - cpu: "1", - memory: "14Gi", + cpu: "2", + memory: "2Gi", }, }, }, @@ -126,36 +135,33 @@ const jobSpec: IIoK8sApiCoreV1PodSpec = { ], }; -// script for the initContainer of the index image -const initContainerScript = ` -apt-get update -y && apt-get install -y wget - -export DATA_DIR="/data" - -cd /data - -echo "running get-data.sh..." - -/mnt/scripts/get-data.sh - -pip3 install -r /mnt/scripts/requirements.txt - -echo "running assemble_data.py..." - -python3 /mnt/scripts/assemble_data.py $DATA_DIR/StockUniteLegale_utf8.zip $DATA_DIR/geo/ $DATA_DIR/WEEZ.csv $DATA_DIR/assembly.csv -`; - // initContainer definition, run above script and store data in a temp mount const initContainer = new Container({ - args: ["-c", initContainerScript], + args: ["/mnt/scripts/sqlite.sh"], command: ["sh"], - image: `python:3.9.4`, + image: `ubuntu:18.04`, imagePullPolicy: "Always", - name: `download-data`, + name: `download-build-data`, + env: [ + { + name: "DATA_DIR", + value: "/mnt/scripts/data", + }, + ], + resources: { + limits: { + cpu: "4", + memory: "2Gi", + }, + requests: { + cpu: "2", + memory: "1Gi", + }, + }, volumeMounts: [ { name: "data", - mountPath: "/data", + mountPath: "/mnt/scripts/data", }, { mountPath: "/mnt/scripts", @@ -166,9 +172,9 @@ const initContainer = new Container({ // add local files as volume to the initContainer const localFiles = [ - path.join(__dirname, "../../../assembly/scripts/get-data.sh"), - path.join(__dirname, "../../../assembly/src/assemble_data.py"), - path.join(__dirname, "../../../assembly/requirements.txt"), + path.join(__dirname, "../../../sqlite.sh"), + path.join(__dirname, "../../../import.sql"), + path.join(__dirname, "../../../export.sql"), ]; if (!jobSpec.volumes) { @@ -176,7 +182,7 @@ if (!jobSpec.volumes) { } jobSpec.volumes.push({ configMap: { - name: `config-map-files-${process.env.CI_COMMIT_SHORT_SHA}`, + name: `config-map-files-${process.env.GITHUB_SHA}`, defaultMode: 0o777, }, name: "local-files", @@ -192,14 +198,14 @@ const configMap = new ConfigMap({ {} ), metadata: { - name: `config-map-files-${process.env.CI_COMMIT_SHORT_SHA}`, + name: `config-map-files-${process.env.GITHUB_SHA}`, }, }); // create the final job const job = new Job({ metadata: { - name: `update-index-${process.env.CI_COMMIT_SHORT_SHA}`, + name: `update-index-${process.env.GITHUB_SHA}`, }, spec: { backoffLimit: 3, @@ -209,8 +215,6 @@ const job = new Job({ }, }); -const envParams = gitlab(process.env); - updateMetadata(configMap, { annotations: envParams.metadata.annotations ?? {}, labels: envParams.metadata.labels ?? {}, diff --git a/.k8s/environments/dev/recherche-entreprises-api.sealed-secret.yaml b/.k8s/environments/dev/recherche-entreprises-api.sealed-secret.yaml index 80687036..4ae3ddf4 100644 --- a/.k8s/environments/dev/recherche-entreprises-api.sealed-secret.yaml +++ b/.k8s/environments/dev/recherche-entreprises-api.sealed-secret.yaml @@ -3,16 +3,16 @@ kind: SealedSecret metadata: name: elastic-recherche-entreprises-read annotations: - sealedsecrets.bitnami.com/cluster-wide: 'true' + sealedsecrets.bitnami.com/cluster-wide: "true" spec: encryptedData: ELASTICSEARCH_URL: >- - AgDGWiSE5kUv0Mv4Dot8KfbSGm5XVlyi4V9yLiMI97deZctFWRd2T31MtTUz/XgT4VlFH4NUhK+dQonLEILE2P9rs4MMDaY09cc3NZiDLB9GJoBcWnWsa02QV4Xykk8UthTHQPFC4g+0ElFU4UewbIc3Zc2ZqIk5Y803TnYkRuAF1bM1ZdpWGnm+yh7+lXnbRMmFo62vhPVB8lnl9Z/RVxhs1jx6xnbIFyTjvelM4/sySiyHZ0h74fL61DAvOcNBgCTd49RRBBzcAhp6a8KL45FL1eAYyOqS8vW5uNeFu59Cu9ByRruhyTELjpPtLesjsBUHiqmfBh09esKw+mdtMmfYxIsy+JGPS50N8GiNj+n0A5XXtLQkm9oT884r4LEQip35YHyenh20NmueHX8RGMOXRT05PWnbL70NtZHqUwzFdc21vdFhXAb/MVaMgqcKG4RPkRS+qljn+pQjJ8DkY0EqgWOCfSyjB+jKJ65peSqqORq+B8zTDgpv9ZaN+xSDsfi+PckPPJqKOjaTkQW/VdfwpT6ipOUpNgznUOGy7XN9vxAQR9P79n6KlIyMsme4hN7OHX5ko0xWQZcCyfHbSbT7JHY5UpmTv+c0NtyrV755FQN75HDFRhQYHtgLnhR9PuBCZ8Gvtg2T/Nr63GsTMC5e52bj2zIRlOUTrsvxEIYKQoBPILoueEIcMz64/UiMAZ9RwH308wRHOrZtch5gclzUQWZEgI5Vx2cJQ/zr9iqBaedg2v8GqvESeKw1e7pgXoLE6pFKJtQa7cMwQgmrBkkxrEfePcRDP4yAMVQaiM8DoqIjVg== + AgCxK1MqqVkCOHvMjjcieUsj1uE/ATUzAfYTYVEMqcWFxTnugOaYlXB2mTuZtzdjXNvINREzDynnVRBghPIJvrJm59vPgexC5FyQptdPf2XwQAG6n0yMLWAEBM+O8ieO1D6vBu6BYuq4QYoItq+SMTE6APNX+p6wi4/LP04cR8p8bVKNxckPMfyVfQGhUSTD+2gC5ow2x1Mx67odqqP3E8eK9tm5tU6ltzOpKqJKITUQIqTQbSKFr8p77DE4IrdbPn8MpvdP+VrT+ztF4thPkpVpuYWy9rABdQKeEuNdN2AuvmPeJs/2B80iApIgtWWQjV/iG+WlLGMohvWDZVsBGuBuYGzhjJlr5w8OljwKDihb2cG1M/Ot/LM1xRuCC85wvqH0oMhnnP/3DZts5aYAN+qjAezOkPT9kjT+gu0WoQB+E5r9ITcYYHYbJw/YSxS2z86Ul0amRRW4ovAWT5c/nFDZU6i74I8YyVh8oOtrieZgNF4ajJcIr5YpjT2LICKoZR0KExrQ4V30zZJ+wZcvd8W3e/KCmKsFxloTkLq/sjEiFQoe6ffYTQhSUXZynqrdUKZsdoSleprfctlJtf3rzxt5hZIpcLulXrYk7pPPiHkTcV3Zc4zkN3BFE1A9vsgTvh6xTXlvUekJHHMXU3t8ZyZD8vvemxv0FE8G33XWQF2DEkmyc3gXMlfPDLho4fHv0h0iId0c9klFkOpmp8REFWJyuRQjq+hOHvfoCSu1ymsCwA0CMAguJoX1XEhn0aA9QIgf0VyBe/cVJwu/GfW74UBi6x5feqzFovYIUXbDP6kW ELASTICSEARCH_API_KEY: >- - AgCXfkWdcNqxUUWf9iPMYZXx2a1hU/NTseXJ3EHDYU2+9i4EB5dKGSALKprr1OmAnt04YJ0K5ALDWfHz+U8lN5wmlyZzfn1lbhccFByyCBM+k0WvKCieuMDCgRh8bkV+bSN7kYVq7ARzL8EzzUyvr7kzfoPwB3kwUSR+9yywSsr/OnC1cpS2+vAH+iJxa/NuLzPboxuOuMiKC2EvnO1GzoB82Sv36eXwrbfDDXilAXPm/zPUee12xXlf63b1ieDaOSZd75w7i+DrkALccdmMsCR11Rms93/cU1B8xsGC2L6R3n2AEaHI8vK9FsMzfHUdIbAKceoRKJW3CTNShc8imjeInj6B7t/KYTOc41oSZ2OVls6R6wCeaGtrxvQKfUR/Pbof+mTw7rBL1RMSdLtux2ZkdtHbY8k3kBqJq5jZ/8u6WLdtSuOcbiwB16dA+xpBp3LjTEtVR+8+KM+kW1xUuKIe+GV6RbfyDf3X0W5eSSIEUUgVblD0YXjbTFBT/oJBPlKvFmrPFCxwqsYDSv64+8/TeJrZWzX2XHi61Elye0iLrCVnH3PjiHrzm2URaRuu+VoaFi3TpYWGsgow29+IX4m2+z1NqOk/BMYUDCL9Gb6RzJwkNx21JFtN8eTgZF6w/FS/rBoK4/Ny75HE6L3D8L49upB7VEv328pk+PjHI68+AmzYTwTgUAA7HjvD/Y/mA6rrO2LiLljQTvqwZAIC8W1g0Eu2Ir/9sIatVp2GtxbQRvIekSD8WagNCY7ntzKHcQbnZ/ivEG74uoWiiGc= + AgCtqcSVac6ptXOzSGqrBps1Uz6elTOnheaAf8cqAb1G960NCqmmkx1L1FDhfSKCuhBUAp0mT/3rokX3cg/pLIqImIVlPEhq/n+ZaKPHqGt0UT9QpD3r1C4nYan/WJ1PnBlCTXYlA/dtmCakNjtf2ikvWg/6Keu1OGlHt/ymVc2guxqGl++caxPiEIQAiVHDZksc09RM2Z6jU/WnI5gzTPQjgucb2yiHBLWZsEcoCIsA2nXrp2/vQ/ATMtITKb1223jyHnJiargNnQrRVRA7Geuz5FyaqSXjqvjG1HrqVxAlVNzFPb/N3fN8C6KHCjtOuu/pOM+cqC4cFvzyzqGymcDOKGC5Ax47j5v0KsVlJZ93dfO2yD/Fi2txeuNdmQ42bvhpGbPJu/iYLXmaETvu6/qpL3UrU4i2KVyxXW0CbUzBVh1fTmCClAMDgETpWNvnRXgAuu0jbgTnWjN0zSmjia0/BJmY1Z5uct7T4Lli4Dz/vJucBthIBVgYaVX/jLqfaNWpD1TuWkTHsf4gR5HErzQp60NDej/I7TespFMqRMhrroc6VaH/Wl6wV4msRM3sYaxcHPd4kJxrOeA7cQnfqQsWs4/RG++p+TCmD18qgevVBV1z11vFu2R6KE+c1YRANE5e6EBMO8yUly1yarrx6jOo6E5kAdKqPGkNUyq3BgOvGWmkbV32slQ4hXm7zUuyVnXBmpmoryowHhqV7iW8Si6CSMKACCENVqJPrk55VaL8sw8dZfQF0aekJoEqf/UqAv/DZUTYOAg4QI+4iD4= template: metadata: annotations: - sealedsecrets.bitnami.com/cluster-wide: 'true' + sealedsecrets.bitnami.com/cluster-wide: "true" name: elastic-recherche-entreprises-read - type: Opaque \ No newline at end of file + type: Opaque diff --git a/.k8s/environments/prod/recherche-entreprises-api.sealed-secret.yaml b/.k8s/environments/prod/recherche-entreprises-api.sealed-secret.yaml index 3ca5cd7c..87e09af1 100644 --- a/.k8s/environments/prod/recherche-entreprises-api.sealed-secret.yaml +++ b/.k8s/environments/prod/recherche-entreprises-api.sealed-secret.yaml @@ -5,8 +5,8 @@ metadata: spec: encryptedData: ELASTICSEARCH_URL: AgAjiLY8OLRszj3zw7ngLaY3auR33D6sJSDaLQJWekd3Y4aHg5duw8wbdkMSu3WUARDGbFqyfoRQsG57xAliVJVLz07vFhxe7TXOnVMkpWH9pYdVqbl0blAnqyxTI7Uly9GvYDF7OcM23IvGA0svXHyge+nIUqO/zgvEyiYL/jiIfjXwKjXg0mJnH6/WszPBf9oaZYpwl0HOYHUK5LNIWyE5/nSSzCEMPamlACCtHdyRf0EywK3hur8V7FvC8gKDnByxn7IhVboOTdCsIQhrSV1ZePbvV5wex0H58JQdJWPMJxq/rRQKsNj/9Xmy8c31j3D2nCNQs6aSDi6wN9d2057rsiwIwqCnA8gPPh6bT26T2nnFFnbNFE/BMajucxxrx3+6ctIwmbC/ZpLrabzxcvoanc8o8zTS8s7FZL3/q00mWjAVy/OgIDdLKI8B2LQp1S+FGFdTmtzTNyscxtkjiJoEQeo1Mf8tWL2+h1nfeUipRq2xiVOa8KxM8ZkDPLFIAInA9w64/Ddk3FWnm5ky1MzRioZcjKqWfT6XxxNG/+4wvDCEMyDTLZyVY56sglGSSmizxXdBUeSR/n08F2k8Vw9s+qjTch5O1g/Agv87CVl+fROV/a4ayrP/gyTmLBT0OIsh6ZZtgIx1UJexF7fF8obFUcCAo8rvcj0ZlyP/ZNfmFu6CPUqd374DZyvZpwtnbX9fzpC2I1ZYNvV9vCVs5+BFULhbldsQPo5VMiEK1T+hunQr08yl3c/gtGYBpt5c6nn5xsopBWkwjjttVnsVVRZEDEwiDhlKbaJ/hedY3mZJ2g== - ELASTICSEARCH_API_KEY: AgBeveBsjm4jtYUPCGG29nyKrmf6Qo/N+XE2fr6ROfLaEeLHY4Ak7LxtL11Gk7+nPMB+/MyJbNGWFNd7trVwTb/NeYyLD77bvsq8Dr421+KiM8MO0JPt+RyieaMCV/ZhRzQJPzPznO8mP1yQTcKgBIoSVMTNZ27EfwLzSu99i6WW4M3EgbY1WcV0Ofyepyp/MsW/yuGWQJmvPuygcw0D4hfHSXBrOs4e1nXMsNIuqs00aV7mHFAzYZ+ZBq+htxRWvgPYTXH4m1L6dxb89Q85JdJS13akPDawXcTYkac72F4QxYTBZttZkDAg5mhbexZyr+yDpwKNTBBaLzaHNjw1CTIdGeAROVNH5m01g0zUNjhabOPibvWq3+JidmxabbW3TiGJxmOMiyPkTHFFeOjZHsgLGiXoZzoRKgMrigctBiWiTsO5hZ6RAISqtM8+u+w/48zZUQA/aHQDrjpuzle1y9aJKikIkKepHTlepvbeh7w48Vv7RfZzJ701tnVuuMNi9VswEprHG32P6P0cT03g+oL1n4vpybpx++Sb8TkctQQnAUBCeT1dSuYqAWvzSszVbIkA3YJZXqTDGwTIa6YnEqzXIkrZ1muX0FiQn/Nl28uBYcTv9ysmdt7GIixtJotrDDUawOsVwnEBtqiBy9tfziyQUBKoSbZF3lQ1YN8yYqNnMsGqD833N/nBnZwFgmZqbe3Eb44j08e+i/w0tLsDHxIdEdTGmsZR2QTGyMlhxcEyWGpRU0Ft6JlfhIlYv02vNCv7ncI5AqbnghOi5p0= + ELASTICSEARCH_API_KEY: AgCqtv233/TmTQR4P6bx+wGb0uKdVlKLI7gd+gRXuMBbyyCNmm565ulZFvZJOw+f0hS0KQ6lP84R3LLQTbm40KO71Al+UW0g/4L2eSqAvx3jmQEuLMPehlG7rwitoUDsAiuRPbclb0bfro9uhtWFXgLOTNU48mTT64Xq5mFsYKdNkwXXVgP7+wNg0UMsa2TlyDLMb7JW91MN5vOwsKtr7OfoPIKa5LwHvQIkFDcI4aWBuxsS2FGTBJ81xpgCAfBOnDVYz+ymon8FyQt6Y3SgSF0wjVZC7t8qkC0nPTRj6eCBc9loOt/NLUpSBqFhEYqO0FAl++sS0CZBgvsLnS8gAiiv3UwdLOz94QL0zqFdZBh5TZgdZUQLuW6aWkueAW+0ccKop3VyBJn8fZ1xAKfIszgldpfUBxHuDJ8keEr9aJM8nYiC/lXNMb7dKDJkRoyn/X4mLv6VTwnaqvj/NwVfkK9mnZAoxQg2kEmiDWYvqNL7yRForYIZKDjU7dGUPBWPqeqvpKJ8QLsDnve1Amvhe6wr8K7FYpCLcyrAC0huPaU4/d3IFd7P/neGqocGEh1pvQDp1BWGytkmZAqu2nRp4y6zeVyPzO/G5BbVwOhvqG2BUIdFM68ekL2xFe8yeHIktqJeadgifOtYNF7xnDmsi/v57E6MAFoFiqkVG+4vtUCxQTYZNbDS5YDAo3DBaiIsdOgO0cIM/TCCxjcDPh3pQGs6fsFdENcXzgj8zZQnE3SjbojejTXfVEI6dSoHaIY4WBg0MXEwsivDJ8Ctjrc= template: metadata: name: elastic-recherche-entreprises-read - type: Opaque \ No newline at end of file + type: Opaque diff --git a/README.md b/README.md index a46c4c50..34894f59 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Exemple : [/api/v1/search?q=plume&a=paris](https://api-recherche-entreprises.fab ## Étapes : -![](https://mermaid.ink/svg/eyJjb2RlIjoiZ3JhcGggTFJcblxuU3RvY2tVbml0ZUxlZ2FsZS5jc3YtLT5QeUFzc2VtYmx5wqBcbmdlb19zaXJldC5jc3YtLT5QeUFzc2VtYmx5wqBcbnNpcmV0MmlkY2MuY3N2LS0-UHlBc3NlbWJsecKgXG5QeUFzc2VtYmx5LS0-YXNzZW1ibHkuY3N2LS0-aW5kZXgtLT5FbGFzdGljU2VhcmNoLS0-QVBJW0FQSSBIVFRQMV1cbkVsYXN0aWNTZWFyY2gtLT5BUEkyW0FQSSBIVFRQMl1cbkVsYXN0aWNTZWFyY2gtLT5DbGllbnRbQ2xpZW50IEVTXSIsIm1lcm1haWQiOnt9LCJ1cGRhdGVFZGl0b3IiOmZhbHNlfQ) +[![](https://mermaid.ink/img/eyJjb2RlIjoiZ3JhcGggTFJcblxuU3RvY2tVbml0ZUxlZ2FsZS5jc3YtLT5TUUxpdGVcbmdlb19zaXJldC5jc3YtLT5TUUxpdGVcbnNpcmV0MmlkY2MuY3N2LS0-U1FMaXRlXG5TUUxpdGUtLT5hc3NlbWJseS5jc3ZcbmFzc2VtYmx5LmNzdi0tPmluZGV4LS0-RWxhc3RpY1NlYXJjaC0tPkFQSVtBUEkgSFRUUDFdXG5FbGFzdGljU2VhcmNoLS0-QVBJMltBUEkgSFRUUDJdXG5FbGFzdGljU2VhcmNoLS0-Q2xpZW50W0NsaWVudCBFU10iLCJtZXJtYWlkIjp7fSwidXBkYXRlRWRpdG9yIjpmYWxzZSwiYXV0b1N5bmMiOnRydWUsInVwZGF0ZURpYWdyYW0iOmZhbHNlfQ)](https://mermaid-js.github.io/mermaid-live-editor/edit#eyJjb2RlIjoiZ3JhcGggTFJcblxuU3RvY2tVbml0ZUxlZ2FsZS5jc3YtLT5TUUxpdGVcbmdlb19zaXJldC5jc3YtLT5TUUxpdGVcbnNpcmV0MmlkY2MuY3N2LS0-U1FMaXRlXG5TUUxpdGUtLT5hc3NlbWJseS5jc3ZcbmFzc2VtYmx5LmNzdi0tPmluZGV4LS0-RWxhc3RpY1NlYXJjaC0tPkFQSVtBUEkgSFRUUDFdXG5FbGFzdGljU2VhcmNoLS0-QVBJMltBUEkgSFRUUDJdXG5FbGFzdGljU2VhcmNoLS0-Q2xpZW50W0NsaWVudCBFU10iLCJtZXJtYWlkIjoie30iLCJ1cGRhdGVFZGl0b3IiOmZhbHNlLCJhdXRvU3luYyI6dHJ1ZSwidXBkYXRlRGlhZ3JhbSI6ZmFsc2V9) ## Données : @@ -28,19 +28,11 @@ Exemple : [/api/v1/search?q=plume&a=paris](https://api-recherche-entreprises.fab ## Assemblage -Le CSV est généré en deux étapes dans le dossier `assembly/` : +Le script `sqlite.sh` permet de permet de télécharger les CSV, les importer dans SQLite pour les aggréger et les re-exporter en CSV. -- Téléchargement des datasets (8GB) +Le fichier `./data/assembly.csv` fait +6Go avec plus de 30 millions de lignes. - `DATA_DIR=./data/ scripts/get-data.sh` - -- Assemblage des fichiers avec Python (numpy & pandas) - - `pip install -r requirements.txt` - - `DATA_DIR=./data/ OUTPUT_DIR=./output scripts/assemble.sh` - -Au final, le fichier `./output/assembly.csv` fait environ 600Mo +Cette opération dure environ 30 minutes. ## Indexation Elastic Search @@ -48,14 +40,16 @@ Le dossier `index/` contient les scripts qui injectent le fichier `assembly.csv` La mise à jour exploite la fonctionnalité [alias](https://www.elastic.co/guide/en/elasticsearch/reference/6.8/indices-aliases.html) d'ElasticSearch pour éviter les downtimes. -Le script `scripts/create-es-keys.sh` permet de créer des token pour lire/écrire sur ces index. - Pour lancer une indexation : ```sh -yarn install - -ELASTICSEARCH_URL=https://elastic_url:9200 ELASTICSEARCH_API_KEY=key_with_writing_rights ASSEMBLY_FILE=./output/assembly.csv yarn start +yarn +ELASTICSEARCH_URL=https://elastic_url:9200 ELASTICSEARCH_API_KEY=key_with_writing_rights ASSEMBLY_FILE=./data/assembly.csv yarn start ``` -The default `ELASTICSEARCH_INDEX_NAME` is `recherche-entreprises` +Le script `scripts/create-es-keys.sh` permet de créer des token pour lire/écrire sur ces index. + +## Projets relatifs + +- Annuaire-entreprises : https://annuaire-entreprises.data.gouv.fr +- API Entreprise : https://entreprise.api.gouv.fr/catalogue/ diff --git a/api/Dockerfile b/api/Dockerfile index 35bd42d7..b7b31c0d 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -1,21 +1,24 @@ -FROM node:15-alpine as builder +FROM node:16-alpine as builder WORKDIR /app RUN chown node:node /app COPY package.json . COPY yarn.lock . -RUN yarn +RUN yarn --frozen-lockfile COPY . . RUN yarn build - +RUN yarn --frozen-lockfile --production # --- -FROM node:15-alpine +FROM node:16-alpine WORKDIR /app ENV NODE_ENV=production COPY --from=builder /app/dist ./dist -COPY package.json . +COPY --from=builder /app/node_modules ./node_modules + +WORKDIR /app/dist + USER node -ENTRYPOINT ["yarn", "start"] \ No newline at end of file +ENTRYPOINT ["node", "./src/index.js"] \ No newline at end of file diff --git a/api/README.md b/api/README.md index 67f568db..30fa7d4f 100644 --- a/api/README.md +++ b/api/README.md @@ -2,42 +2,14 @@ Exemple d'API qui exploite l'index ElasticSearch [recherche-entreprises](https://github.com/socialgouv/recherche-entreprises) -```sh -yarn install -yarn build -ELASTICSEARCH_URL=http://localhost:9200 ELASTICSEARCH_API_KEY=key yarn start -``` - -## **Generic search** - ---- - -Returns json data about companies matching search parameters - -- **URL** - - /api/v1/search - -- **Method:** - - `GET` - -- **Query Params** - - **Required:** +## API Documentation - `q=[string]` query used to perform the search on company names +Documentation [swagger / OpenAPI](./openapi.yaml) : https://api.recherche-entreprises.fabrique.social.gouv.fr - **Optional:** - - `a=[string]` city or postal code - - `l=[integer]` result limit - -- **Sample Call:** +- **Exemple:** ```sh - curl --request GET --url 'http://localhost:3000/api/v1/search?q=michelin&a=clermont&l=3' + curl --request GET --url 'http://localhost:3000/api/v1/search?query=michelin&address=clermont&limit=3&open=false' ``` - **Success Response:** @@ -214,3 +186,11 @@ Returns etablissement details for a given **siret** "siret": "85520050700710" } ``` + +## Dev + +```sh +yarn install +yarn build +ELASTICSEARCH_URL=http://localhost:9200 ELASTICSEARCH_API_KEY=key yarn start +``` diff --git a/api/openapi.yaml b/api/openapi.yaml new file mode 100644 index 00000000..03a72236 --- /dev/null +++ b/api/openapi.yaml @@ -0,0 +1,87 @@ +swagger: "2.0" +info: + description: "Document de l'API recherche-entreprises" + version: "1.0.0" + title: "API recherche-entreprises" + contact: + email: "codedutravailnumerique@travail.gouv.fr" + license: + name: "Apache-2.0" + url: "http://www.apache.org/licenses/LICENSE-2.0.html" +host: "api.recherche-entreprises.fabrique.social.gouv.fr" +basePath: "/api/v1" +schemes: + - "https" + - "http" +paths: + /search: + get: + summary: "Recherche entreprise" + description: "Reherche d'entreprise par nom d'établissement, raison sociale" + operationId: "search" + tags: + - "Recherche" + produces: + - "application/json" + parameters: + - in: "query" + name: "query" + description: "Texte de la recherche" + required: true + type: string + example: "Michelin" + value: "Michelin" + - in: "query" + name: "address" + description: "Localisation de l'entreprise" + required: false + example: "Lyon" + value: "Lyon" + type: string + - in: "query" + name: "limit" + description: "Nombre de résultats max" + required: false + example: 100 + value: 100 + default: 100 + type: integer + - in: "query" + name: "open" + description: "Retourne uniquement les établissements ouverts" + required: false + example: false + value: false + default: true + type: boolean + - in: "query" + name: "employer" + description: "Retourne uniquement les établissements avec des employés déclarés" + required: false + example: false + value: false + default: false + type: boolean + - in: "query" + name: "convention" + description: "Retourne uniquement les établissements avec une convention collective déclarée" + required: false + example: false + value: false + default: false + type: boolean + - in: "query" + name: "ranked" + description: "Si 'true', ordonne les résultats par taille d'établissement, basée sur la tranche effectif de l'unité légale. Si 'false', ordonné par SIRET décroissant." + required: false + example: true + value: true + default: true + type: boolean + responses: + "500": + description: "Unexpexted error occured" + +externalDocs: + description: "Find out more about Swagger" + url: "http://swagger.io" diff --git a/api/package.json b/api/package.json index 9058a6ea..6ebe3104 100644 --- a/api/package.json +++ b/api/package.json @@ -3,11 +3,10 @@ "version": "1.0.0", "description": "Sample API for recherche-entreprises", "private": true, - "main": "index.js", "scripts": { "dev": "nodemon --watch 'src/**' --ext 'ts' --ignore '__tests__' --exec 'ts-node src/index.ts'", - "build": "ncc build src/index.ts -o dist", - "start": "node dist/index.js", + "build": "tsc --outDir dist && cp openapi.yaml ./dist && cp -a ./swagger-ui ./dist", + "start": "node dist/src/index.js", "test": "jest", "lint": "eslint src/**/*.ts" }, @@ -18,24 +17,27 @@ "@koa/cors": "^3.1.0", "@socialgouv/kali-data": "^2.132.0", "koa": "^2.13.3", - "koa-router": "^10.1.1" + "koa-router": "^10.1.1", + "koa-static": "^5.0.0", + "yamljs": "^0.3.0" }, "devDependencies": { "@socialgouv/eslint-config-recommended": "^1.89.0", - "@types/jest": "^26.0.24", + "@types/jest": "^27.0.3", "@types/koa": "^2.13.4", "@types/koa-router": "^7.4.4", + "@types/koa-static": "^4.0.2", "@types/koa__cors": "^3.0.3", "@types/lodash.pick": "^4.4.6", - "@types/node": "^10.17.60", + "@types/node": "^16.11.10", "@types/supertest": "^2.0.11", - "@typescript-eslint/eslint-plugin": "^4.31.2", - "@typescript-eslint/parser": "^4.31.2", - "@vercel/ncc": "^0.28.6", - "eslint": "^7.32.0", + "@types/yamljs": "^0.2.31", + "@typescript-eslint/eslint-plugin": "^5.4.0", + "@typescript-eslint/parser": "^5.4.0", + "eslint": "^8.3.0", "eslint-import-resolver-typescript": "^2.5.0", "eslint-plugin-import": "^2.24.2", - "eslint-plugin-prettier": "^3.4.1", + "eslint-plugin-prettier": "^4.0.0", "jest": "^27", "nodemon": "^2.0.13", "prettier": "^2.4.1", diff --git a/api/src/__tests__/__snapshots__/api.test.ts.snap b/api/src/__tests__/__snapshots__/api.test.ts.snap index 59a4cc22..ca5cdc66 100644 --- a/api/src/__tests__/__snapshots__/api.test.ts.snap +++ b/api/src/__tests__/__snapshots__/api.test.ts.snap @@ -2,7 +2,7 @@ exports[`Test entreprise search correct siren 1`] = ` Object { - "activitePrincipale": "Édition de livres", + "caractereEmployeurUniteLegale": "O", "conventions": Array [ Object { "etat": "VIGUEUR_ETEN", @@ -24,15 +24,12 @@ Object { "title": "Convention collective nationale des bureaux d'études techniques, des cabinets d'ingénieurs-conseils et des sociétés de conseils du 15 décembre 1987. ", "url": "https://www.legifrance.gouv.fr/affichIDCC.do?idConvention=KALICONT000005635173", }, - Object { - "idcc": 9999, - "shortTitle": "", - }, ], - "etablissements": 35, + "etablissements": 133, + "etatAdministratifUniteLegale": "A", "highlightLabel": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", "label": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", - "matching": 35, + "matching": 133, "simpleLabel": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", "siren": "855200507", } @@ -41,19 +38,28 @@ Object { exports[`Test etablissement search correct siret 1`] = ` Object { "activitePrincipale": "Traitement de données, hébergement et activités connexes", - "address": "107 Rue Servient 69003 Lyon", - "convention": Object { - "etat": "VIGUEUR_ETEN", - "id": "KALICONT000005635597", - "idcc": 45, - "mtime": 1562873918, - "shortTitle": "Caoutchouc", - "texte_de_base": "KALITEXT000005673838", - "title": "Convention collective nationale du caoutchouc du 6 mars 1953. Étendue par arrêté du 29 mai 1969 JORF 18 juin 1969 ", - "url": "https://www.legifrance.gouv.fr/affichIDCC.do?idConvention=KALICONT000005635597", - }, - "etablissements": 35, + "address": "107 RUE SERVIENT 69003 LYON", + "caractereEmployeurUniteLegale": "O", + "categorieEntreprise": "GE", + "conventions": Array [ + Object { + "etat": "VIGUEUR_ETEN", + "id": "KALICONT000005635597", + "idcc": 45, + "mtime": 1562873918, + "shortTitle": "Caoutchouc", + "texte_de_base": "KALITEXT000005673838", + "title": "Convention collective nationale du caoutchouc du 6 mars 1953. Étendue par arrêté du 29 mai 1969 JORF 18 juin 1969 ", + "url": "https://www.legifrance.gouv.fr/affichIDCC.do?idConvention=KALICONT000005635597", + }, + ], + "etablissements": 133, + "etatAdministratifEtablissement": "A", + "etatAdministratifUniteLegale": "A", "highlightLabel": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", + "idccs": Array [ + "0045", + ], "label": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", "matching": 1, "simpleLabel": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", @@ -64,7 +70,146 @@ Object { exports[`Test search generic search 1`] = ` Object { - "activitePrincipale": "Édition de livres", + "activitePrincipale": "Fabrication et rechapage de pneumatiques", + "allMatchingEtablissements": Array [ + Object { + "address": "7 AVENUE DU PRÉSIDENT RENÉ COTY 10600 LA CHAPELLE-SAINT-LUC", + "idccs": Array [ + "0045", + ], + "siret": "85520050701171", + }, + Object { + "address": "PLACE ALEXANDRE DIEU 18230 SAINT-DOULCHARD", + "idccs": Array [ + "0045", + "9999", + ], + "siret": "85520050701106", + }, + Object { + "address": "ZAC DE L’ARNAHURT 33650 LA BRÈDE", + "idccs": Array [ + "0045", + ], + "siret": "85520050702823", + }, + Object { + "address": "RUE DE CHARLIEU 42300 ROANNE", + "idccs": Array [ + "0045", + ], + "siret": "85520050701445", + }, + Object { + "address": "ZI BLAVOZY-ST GERM LAPRADE 43700 BLAVOZY", + "idccs": Array [ + "0045", + "9999", + ], + "siret": "85520050701866", + }, + Object { + "address": "AVENUE EDOUARD MICHELIN 56000 VANNES", + "idccs": Array [ + "0045", + ], + "siret": "85520050700637", + }, + Object { + "address": "ZA PLATEAU D’HERIN 59220 ROUVIGNIES", + "idccs": Array [ + "0045", + ], + "siret": "85520050703086", + }, + Object { + "address": "107 RUE SERVIENT 69003 LYON", + "idccs": Array [ + "0045", + ], + "siret": "85520050703169", + }, + Object { + "address": "21 RUE MARCELLIN BERTHELOT 86000 POITIERS", + "idccs": Array [ + "0045", + ], + "siret": "85520050700132", + }, + Object { + "address": "ROUTE D’ARLES 13300 SALON-DE-PROVENCE", + "idccs": Array [ + "0045", + ], + "siret": "85520050703094", + }, + Object { + "address": "1 RUE GUTENBERG 37300 JOUÉ-LÈS-TOURS", + "idccs": Array [ + "0045", + "9999", + ], + "siret": "85520050700850", + }, + Object { + "address": "16 RUE DE TOUTLEMONDE 49300 CHOLET", + "idccs": Array [ + "0045", + "9999", + ], + "siret": "85520050700710", + }, + Object { + "address": "PLACE DES CARMES DECHAUX 63000 CLERMONT-FERRAND", + "idccs": Array [ + "0045", + "1486", + "9999", + ], + "siret": "85520050700017", + }, + Object { + "address": "ZI DE FELET 63300 THIERS", + "idccs": Array [ + "0045", + ], + "siret": "85520050702278", + }, + Object { + "address": "USINE MICHELIN DE BLANZY LA FIOLLE (ZONE INDUSTRIELLE) BLANZY", + "idccs": Array [ + "0045", + "9999", + ], + "siret": "85520050700306", + }, + Object { + "address": "ROUTE DE NANTES 85000 LA ROCHE-SUR-YON", + "idccs": Array [ + "0045", + "9999", + ], + "siret": "85520050701379", + }, + Object { + "address": "RUE DU XAY 88190 GOLBEY", + "idccs": Array [ + "0045", + ], + "siret": "85520050700108", + }, + Object { + "address": "27 COURS DE L’ILE SEGUIN 92100 BOULOGNE-BILLANCOURT", + "idccs": Array [ + "0045", + "1486", + "9999", + ], + "siret": "85520050703144", + }, + ], + "caractereEmployeurUniteLegale": "O", "conventions": Array [ Object { "etat": "VIGUEUR_ETEN", @@ -87,10 +232,11 @@ Object { "url": "https://www.legifrance.gouv.fr/affichIDCC.do?idConvention=KALICONT000005635173", }, ], - "etablissements": 35, + "etablissements": 133, + "etatAdministratifUniteLegale": "A", "highlightLabel": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", "label": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", - "matching": 20, + "matching": 18, "simpleLabel": "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN", "siren": "855200507", } diff --git a/api/src/__tests__/api.test.ts b/api/src/__tests__/api.test.ts index 4c41b56e..632bbbbb 100644 --- a/api/src/__tests__/api.test.ts +++ b/api/src/__tests__/api.test.ts @@ -12,17 +12,38 @@ app.use(router.routes()); const apptest = supertest(http.createServer(app.callback())); -const searchCall = ( - query: string, - address: string | undefined, - limit: number | undefined -) => { - const addressQP = address ? `&a=${address}` : ""; - const limitQP = limit ? `&l=${limit}` : ""; - - return apptest.get( - `${API_PREFIX}/search?onlyWithConvention=true&q=${query}${addressQP}${limitQP}` - ); +const searchCall = ({ + query, + address, + limit, + open, + employer, + convention, + ranked, +}: { + query: string; + address?: string; + limit?: number; + open?: string; + employer?: boolean; + convention?: boolean; + ranked?: string; +}) => { + const addressQP = address ? `&address=${address}` : ""; + const limitQP = limit ? `&limit=${limit}` : ""; + + const openQP = open ? `&open=${open}` : ""; + const employerQP = employer ? `&employer=${employer}` : ""; + + const rankedQP = ranked ? `&ranked=${ranked}` : ""; + + const url = `${API_PREFIX}/search?convention=${ + convention?.toString() || true + }&query=${query}${addressQP}${limitQP}${openQP}${employerQP}${rankedQP}`; + + // console.log(url); + + return apptest.get(url); }; const michelinSiren = "855200507"; @@ -30,62 +51,69 @@ const michelinSiret = `${michelinSiren}03169`; describe("Test search", () => { test("generic search", async () => { - const { status, body } = await searchCall("michelin", undefined, undefined); + const { status, body } = await searchCall({ query: "michelin" }); + expect(status).toBe(200); expect(body.entreprises).toBeDefined(); expect(body.entreprises.length).toEqual(20); expect(body.entreprises[0].siren).toEqual(michelinSiren); // eslint-disable-next-line no-unused-vars - const { matchingEtablissement, ...partialBody } = body.entreprises[0]; + const { firstMatchingEtablissement, ...partialBody } = body.entreprises[0]; expect(partialBody).toMatchSnapshot(); }); test("with limit", async () => { const limit = 50; - const { body } = await searchCall("michelin", undefined, limit); + const { body } = await searchCall({ limit, query: "michelin" }); expect(body.entreprises.length).toEqual(limit); }); test("search with postal code and city", async () => { - const { body: b1 } = await searchCall("michelin", undefined, undefined); - expect(b1.entreprises[0].matchingEtablissement.address).not.toBe( - `"23 Place des Carmes Dechaux 63000 Clermont-Ferrand"` + const { body: b1 } = await searchCall({ query: "michelin" }); + expect(b1.entreprises[0].firstMatchingEtablissement.address).not.toBe( + `"23 PLACE DES CARMES DECHAUX 63000 CLERMONT-FERRAND"` ); - const { body: b2 } = await searchCall("michelin", "63 000", undefined); + const { body: b2 } = await searchCall({ + address: "63 000", + query: "michelin", + }); expect( - b2.entreprises[0].matchingEtablissement.address + b2.entreprises[0].firstMatchingEtablissement.address ).toMatchInlineSnapshot( - `"23 Place des Carmes Dechaux 63000 Clermont-Ferrand"` + `"PLACE DES CARMES DECHAUX 63000 CLERMONT-FERRAND"` ); - const { body: b3 } = await searchCall("michelin", "clermont", undefined); + const { body: b3 } = await searchCall({ + address: "clermont", + query: "michelin", + }); expect( - b3.entreprises[0].matchingEtablissement.address + b3.entreprises[0].firstMatchingEtablissement.address ).toMatchInlineSnapshot( - `"23 Place des Carmes Dechaux 63000 Clermont-Ferrand"` + `"PLACE DES CARMES DECHAUX 63000 CLERMONT-FERRAND"` ); - const { body: b4 } = await searchCall("michelin", "63", undefined); + const { body: b4 } = await searchCall({ address: "63", query: "michelin" }); expect( - b4.entreprises[0].matchingEtablissement.address + b4.entreprises[0].firstMatchingEtablissement.address ).toMatchInlineSnapshot( - `"23 Place des Carmes Dechaux 63000 Clermont-Ferrand"` + `"PLACE DES CARMES DECHAUX 63000 CLERMONT-FERRAND"` ); - }); + }, 15000); test("search with diatrics", async () => { - const { body: b1 } = await searchCall("michelin", undefined, undefined); - const { body: b2 } = await searchCall("michélin", undefined, undefined); - const { body: b3 } = await searchCall("Mîchèlin", undefined, undefined); + const { body: b1 } = await searchCall({ query: "michelin" }); + const { body: b2 } = await searchCall({ query: "michélin" }); + const { body: b3 } = await searchCall({ query: "Mîchèlin" }); expect(b1).toStrictEqual(b2); expect(b1).toStrictEqual(b3); }); test("search with siret", async () => { - const { body } = await searchCall(michelinSiret, undefined, undefined); - expect(body.entreprises[0].matchingEtablissement.siret).toEqual( + const { body } = await searchCall({ query: michelinSiret }); + expect(body.entreprises[0].firstMatchingEtablissement.siret).toEqual( michelinSiret ); }); @@ -116,6 +144,14 @@ describe("Test etablissement search", () => { ); expect(status).toEqual(400); }); + + test("multiple conventions for one etablissement", async () => { + const manyCCsSiret = "00572078400106"; + const { body } = await apptest.get( + `${API_PREFIX}/etablissement/${manyCCsSiret}` + ); + expect(body.conventions.length).toEqual(2); + }); }); describe("Test entreprise search", () => { @@ -123,8 +159,9 @@ describe("Test entreprise search", () => { const { body, status } = await apptest.get( `${API_PREFIX}/entreprise/${michelinSiren}` ); - // We delete matchingEtablissement since it comes from collapse wich is non deterministic - delete body.matchingEtablissement; + // We delete matching etablissement since it comes from collapse which is non deterministic + delete body.firstMatchingEtablissement; + delete body.allMatchingEtablissements; expect(status).toEqual(200); expect(body.siren).toEqual(michelinSiren); expect(body).toMatchSnapshot(); @@ -142,3 +179,68 @@ describe("Test entreprise search", () => { expect(status).toEqual(400); }); }); + +describe("Test api params", () => { + test("not only open", async () => { + const { body: b1 } = await searchCall({ + open: "false", + query: "michelin", + limit: 1, + convention: false, + }); + expect( + b1.entreprises[0].firstMatchingEtablissement + .etatAdministratifEtablissement + ).toEqual("F"); + }); + + test("not only employer", async () => { + const getNotEmployer = (resp: any) => + resp.entreprises.filter( + (e: any) => e.caractereEmployeurUniteLegale == "N" + ); + + const { body: notOnlyEmployer } = await searchCall({ + employer: false, + query: "michelin", + }); + expect(getNotEmployer(notOnlyEmployer).length).toBeGreaterThan(0); + + const { body: onlyEmployer } = await searchCall({ + employer: true, + query: "michelin", + }); + expect(getNotEmployer(onlyEmployer).length).toBe(0); + }); + + test("not with convention", async () => { + const { body: withConvention } = await searchCall({ + query: "truc", + limit: 1, + }); + const { body: noConvention } = await searchCall({ + convention: false, + query: "truc", + limit: 1, + }); + expect(noConvention.entreprises[0].conventions).toEqual([]); + expect(withConvention.entreprises[0].conventions.length).toBeGreaterThan(0); + }); + + test("unranked search", async () => { + const { + body: { entreprises: ranked }, + } = await searchCall({ query: "michelin" }); + + const { + body: { entreprises: unranked }, + } = await searchCall({ query: "michelin", ranked: "false" }); + + expect(unranked).not.toStrictEqual(ranked); + expect(ranked[0].label).toEqual( + "MANUFACTURE FRANCAISE DES PNEUMATIQUES MICHELIN" + ); + expect(unranked[0].label).not.toEqual(ranked[0].label); + expect(unranked[2].label).toEqual("BOULANGERIE MICHELIN"); + }); +}); diff --git a/api/src/elastic/index.ts b/api/src/elastic/index.ts index 6bc9e9f2..45d8768a 100644 --- a/api/src/elastic/index.ts +++ b/api/src/elastic/index.ts @@ -6,8 +6,8 @@ const API_KEY = process.env.ELASTICSEARCH_API_KEY; const auth = API_KEY ? { apiKey: API_KEY } : undefined; -export const ELASTICSEARCH_INDEX = - process.env.ELASTICSEARCH_INDEX || "recherche-entreprises"; +export const ELASTICSEARCH_INDEX_NAME = + process.env.ELASTICSEARCH_INDEX_NAME || "recherche-entreprises"; const esClientConfig = { auth, diff --git a/api/src/elastic/queries.ts b/api/src/elastic/queries.ts index 5c96eb06..48369369 100644 --- a/api/src/elastic/queries.ts +++ b/api/src/elastic/queries.ts @@ -7,12 +7,12 @@ const defaultLimit = 20; const conventionsSet = Object.fromEntries( kaliConventions.map((c) => { - const { num, etat, id, mtime, texte_de_base, url, title } = c; - return [num, { etat, id, mtime, texte_de_base, title, url }]; + const { num, etat, id, mtime, texte_de_base, url, title, shortTitle } = c; + return [num, { etat, id, mtime, texte_de_base, title, url, shortTitle }]; }) ); -// we remove deduplicate tokens to compose company's label +// we remove duplicated tokens to compose company's label const formatLabel = (naming: string[]) => { const labelTokens = naming .join(" ") @@ -46,11 +46,16 @@ export const mapHit = ({ denominationUsuelle1UniteLegale, denominationUsuelle2UniteLegale, denominationUsuelle3UniteLegale, + etatAdministratifEtablissement, + categorieEntreprise, + etatAdministratifUniteLegale, + caractereEmployeurUniteLegale, activitePrincipale, etablissements, siret, - address, + geo_adresse, naming, + idccs, }, inner_hits, highlight, @@ -68,23 +73,40 @@ export const mapHit = ({ ( acc: any, { - fields: { convention, idcc }, - }: { fields: { convention: string[]; idcc: string } } + fields: { convention, idccs }, + }: { fields: { convention: string[]; idccs: string[] } } ) => { - const kaliData = idcc ? conventionsSet[idcc] : undefined; - const o = { - idcc: parseInt(idcc), - shortTitle: convention ? convention[0] : "", - ...kaliData, - }; - if (!acc.has(o.idcc)) { - acc.set(o.idcc, o); - } + idccs?.forEach((idcc) => { + const idccNum = parseInt(idcc); + // ignore idcc 0 and 9999 : unkown ccs + if (idccNum && idccNum > 0 && idccNum < 9999) { + const kaliData = conventionsSet[idccNum]; + const o = { + idcc: idccNum, + // shortTitle: convention ? convention[0] : "", + ...kaliData, + }; + if (!acc.has(o.idcc)) { + acc.set(o.idcc, o); + } + } + }); + return acc; }, new Map() ); + const allMatchingEtablissements = inner_hits.matchingEtablissements.hits.hits + .filter((h: any) => h.fields) + .map( + ({ fields: { "geo_adresse.keyword": address, siret, idccs } }: any) => ({ + address: address[0], + siret: siret[0], + idccs, + }) + ); + // take first by priority const simpleLabel = [ denominationUniteLegale, @@ -97,49 +119,57 @@ export const mapHit = ({ return { activitePrincipale, + caractereEmployeurUniteLegale, conventions: Array.from(conventions.values()), etablissements: parseInt(etablissements), + etatAdministratifUniteLegale, highlightLabel, label, matching, - matchingEtablissement: { - address, + firstMatchingEtablissement: { + address: geo_adresse, + idccs, + categorieEntreprise, siret, + etatAdministratifEtablissement, }, + allMatchingEtablissements, simpleLabel, siren, }; }; +// rank by "effectif" const rank_feature = { boost: 10, field: "trancheEffectifsUniteLegale" }; const collapse = (withAllConventions: boolean) => ({ field: "siren", inner_hits: { _source: false, - docvalue_fields: ["idcc", "convention"], + docvalue_fields: ["siret", "geo_adresse.keyword", "idccs"], name: "matchingEtablissements", size: withAllConventions ? 10000 : 1, }, }); -const addressFilter = (address: string | undefined) => - address - ? [ - { - prefix: { - cp: address ? address.replace(/\D/g, "") : "", - }, +const addressFilter = (address: string) => { + // check if address filter is code postal or commune + const cp = parseInt(address); + + return cp + ? { + prefix: { + codePostalEtablissement: cp.toString(), }, - { - match: { - ville: { - query: address, - }, + } + : { + match: { + libelleCommuneEtablissement: { + query: address, }, }, - ] - : [{ match_all: {} }]; + }; +}; export type SearchArgs = { query: string; @@ -147,16 +177,65 @@ export type SearchArgs = { // return convention of every etablissements associated to the main company addAllConventions?: boolean; // only search for etablissements with convention attached - onlyWithConvention?: boolean; + convention: boolean; limit?: number | undefined; + // etablissement still open + open: boolean; + // etablissement employeur + employer: boolean; + // rank by effectif ? + ranked: boolean; +}; + +const onlyConventionFilter = { term: { withIdcc: true } }; + +const openFilters = [ + { term: { etatAdministratifUniteLegale: "A" } }, + { term: { etatAdministratifEtablissement: "A" } }, +]; + +const employerFilter = { + term: { + caractereEmployeurUniteLegale: "O", + }, +}; + +const makeFilters = ( + convention: boolean, + open: boolean, + employer: boolean, + address: string | undefined +) => { + const filters = []; + + if (convention) { + filters.push(onlyConventionFilter); + } + + if (open) { + filters.push(...openFilters); + } + + if (employer) { + filters.push(employerFilter); + } + + if (address) { + filters.push(addressFilter(address)); + } + + return filters; }; export const entrepriseSearchBody = ({ query, address, addAllConventions = true, - onlyWithConvention = true, + convention, limit = defaultLimit, + open, + employer, + ranked, }: SearchArgs) => ({ collapse: collapse(addAllConventions), highlight: { @@ -166,18 +245,8 @@ export const entrepriseSearchBody = ({ }, query: { bool: { - filter: onlyWithConvention - ? [ - { term: { withIdcc: onlyWithConvention } }, - { range: { "idcc.number": { lt: 5001 } } }, - ] - : undefined, + filter: makeFilters(convention, open, employer, address), must: [ - { - bool: { - should: addressFilter(address), - }, - }, { bool: { should: [ @@ -190,7 +259,7 @@ export const entrepriseSearchBody = ({ }, ], should: [ - { rank_feature }, + ranked ? { rank_feature } : undefined, // rank by siret with minimum boosting in order to ensure results appear in the same order // useful to always have the same first etablissement when no address passed { rank_feature: { boost: 0.1, field: "siretRank" } }, diff --git a/api/src/index.ts b/api/src/index.ts index f071ae4a..b3b177a1 100644 --- a/api/src/index.ts +++ b/api/src/index.ts @@ -1,15 +1,17 @@ import Koa from "koa"; import cors from "@koa/cors"; - +import serve from "koa-static"; import { router } from "./routes"; export const app = new Koa(); app.use(cors()); -const port = process.env.API_PORT || 3000; +const port = process.env.PORT || 3000; +app.use(serve("./swagger-ui")); app.use(router.routes()); + app.listen(port); console.log(`API started on port ${port}`); diff --git a/api/src/lib/index.ts b/api/src/lib/index.ts index c22d4272..8da78a92 100644 --- a/api/src/lib/index.ts +++ b/api/src/lib/index.ts @@ -1,4 +1,4 @@ -import { ELASTICSEARCH_INDEX, elasticsearchClient } from "../elastic"; +import { ELASTICSEARCH_INDEX_NAME, elasticsearchClient } from "../elastic"; import type { SearchArgs } from "../elastic/queries"; import { entrepriseSearchBody, mapHit } from "../elastic/queries"; @@ -6,27 +6,34 @@ export const search = async ({ query, address, addAllConventions, - onlyWithConvention, + convention, limit, + open, + employer, + ranked, }: SearchArgs) => { const body = entrepriseSearchBody({ addAllConventions, address, + convention, + employer, limit, - onlyWithConvention, + open, query, + ranked, }); const response = await elasticsearchClient.search({ body, - index: ELASTICSEARCH_INDEX, + // explain: true, + index: ELASTICSEARCH_INDEX_NAME, }); - // console.log(JSON.stringify(body, null, 2)); + // console.log(JSON.stringify(body, null, 2)); const entreprises = response.body.hits.hits.map(mapHit); - // console.log(JSON.stringify(entreprises, null, 2)); + // console.log(JSON.stringify(response, null, 2)); return entreprises; }; @@ -34,14 +41,17 @@ export const search = async ({ export const searchEntreprise = async (siren: string) => { const body = entrepriseSearchBody({ addAllConventions: true, + convention: false, + employer: false, limit: 1, - onlyWithConvention: false, + open: false, query: siren, + ranked: true, }); const response = await elasticsearchClient.search({ body, - index: ELASTICSEARCH_INDEX, + index: ELASTICSEARCH_INDEX_NAME, }); const matches = response.body.hits.hits.map(mapHit); @@ -56,30 +66,29 @@ export const searchEntreprise = async (siren: string) => { export const searchEtablissement = async (siret: string) => { const body = entrepriseSearchBody({ addAllConventions: false, + convention: false, + employer: false, limit: 1, - onlyWithConvention: false, + open: false, query: siret, + ranked: true, }); const response = await elasticsearchClient.search({ body, - index: ELASTICSEARCH_INDEX, + index: ELASTICSEARCH_INDEX_NAME, }); const matches = response.body.hits.hits.map(mapHit); if (matches && matches.length >= 1) { - const etablissement = matches[0]; + const raw = matches[0]; // update some fields related to etablissement - const convention = etablissement.conventions[0] - ? etablissement.conventions[0] - : undefined; - delete etablissement.conventions; - etablissement["convention"] = convention; - etablissement.siret = etablissement.matchingEtablissement.siret; - etablissement.address = etablissement.matchingEtablissement.address; - delete etablissement.matchingEtablissement; + const firstMatching = raw.firstMatchingEtablissement; + const etablissement = { ...raw, ...firstMatching }; + delete etablissement.firstMatchingEtablissement; + delete etablissement.allMatchingEtablissements; return etablissement; } else { diff --git a/api/src/routes/index.ts b/api/src/routes/index.ts index 9c7b1b38..38926d9e 100644 --- a/api/src/routes/index.ts +++ b/api/src/routes/index.ts @@ -1,4 +1,5 @@ import Router from "koa-router"; +import yamljs from "yamljs"; import pkg from "../../package.json"; import { search, searchEntreprise, searchEtablissement } from "../lib"; @@ -7,24 +8,31 @@ export const router = new Router(); export const API_PREFIX = "/api/v1"; +const parseBoolean = (param: string, defaultz = true) => + param === undefined ? defaultz : param.toLowerCase() !== "false"; + router.get(`${API_PREFIX}/search`, async (ctx) => { - const { q: query, a: address, l: limit, onlyWithConvention } = ctx.query; + const { query, address, limit, convention, open, employer, ranked } = + ctx.query; if (!query) { - ctx.throw(400, `query parameter q is required`); + ctx.throw(400, `query parameter query is required`); } try { const entreprises = await search({ addAllConventions: true, address: address as string, + convention: parseBoolean(convention as string, false), + employer: parseBoolean(employer as string, false), limit: parseInt(limit as string), - onlyWithConvention: !!onlyWithConvention, + open: parseBoolean(open as string, true), query: query as string, + ranked: parseBoolean(ranked as string, true), }); ctx.body = { entreprises }; } catch (err) { - console.log(JSON.stringify(err)); + console.log(err); ctx.throw(500); } }); @@ -52,7 +60,7 @@ router.get(`${API_PREFIX}/etablissement/:siret`, async (ctx) => { if ((err as any).status) { throw err; } else { - console.log(JSON.stringify(err)); + console.log(err); ctx.throw(500); } } @@ -81,7 +89,7 @@ router.get(`${API_PREFIX}/entreprise/:siren`, async (ctx) => { if ((err as any).status) { throw err; } else { - console.log(JSON.stringify(err)); + console.log(err); ctx.throw(500); } } @@ -92,10 +100,16 @@ router.get(`/healthz`, (ctx) => { ctx.body = { hello: "world" }; }); -router.get(`/`, (ctx) => { +router.get(`/version`, (ctx) => { ctx.body = { about: "https://github.com/SocialGouv/recherche-entreprises", success: true, version: pkg.version, }; }); + +const spec = yamljs.load("./openapi.yaml"); + +router.get("/swagger.json", (ctx) => { + ctx.body = spec; +}); diff --git a/api/swagger-ui/favicon-16x16.png b/api/swagger-ui/favicon-16x16.png new file mode 100644 index 00000000..8b194e61 Binary files /dev/null and b/api/swagger-ui/favicon-16x16.png differ diff --git a/api/swagger-ui/favicon-32x32.png b/api/swagger-ui/favicon-32x32.png new file mode 100644 index 00000000..249737fe Binary files /dev/null and b/api/swagger-ui/favicon-32x32.png differ diff --git a/api/swagger-ui/index.html b/api/swagger-ui/index.html new file mode 100644 index 00000000..d9aa5c7f --- /dev/null +++ b/api/swagger-ui/index.html @@ -0,0 +1,61 @@ + + + +
+ +