Skip to content

Commit

Permalink
Supplement and organize the dependency requirements.txt files and REA…
Browse files Browse the repository at this point in the history
…DME.md files for each kernel; Unified the startup method of all kernels. (#1589)
  • Loading branch information
birdflyi authored Jul 11, 2024
1 parent 54478a9 commit 867253f
Show file tree
Hide file tree
Showing 14 changed files with 149 additions and 60 deletions.
32 changes: 16 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
FROM continuumio/miniconda3

LABEL maintainer="Yike Cheng<cyk_cd@163.com>"

RUN mkdir python_kernel \
&& mkdir python_kernel/notebook \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ easydict==1.9 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ py2neo==2021.2.3 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ plotly==5.9.0 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ clickhouse-driver==0.2.3 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ numpy==1.23.2 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ jupyterlab==3.4.5 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ matplotlib==3.5.3 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ pandas==1.4.3 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ pyyaml==6.0
ARG BASE_IMAGE

FROM ${BASE_IMAGE}
# FROM registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0
# FROM continuumio/miniconda3 # LABEL maintainer="Yike Cheng<cyk_cd@163.com>"

USER root

RUN mkdir -p /python_kernel/notebook

WORKDIR /python_kernel/notebook

ARG KER_REL_PATH # Kernel Relative Path e.g. './pycjs'

COPY ${KER_REL_PATH}/requirements.txt ${KER_REL_PATH}/requirements.txt

RUN pip install -r ${KER_REL_PATH}/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/

EXPOSE 8888

CMD jupyter lab --notebook-dir=/python_kernel/notebook --ip='*' --port=8888 --allow-root --no-browser
CMD jupyter lab --notebook-dir=${WORKDIR} --ip='*' --port=8888 --allow-root --no-browser
8 changes: 4 additions & 4 deletions notebook/node_vm2_pycaller.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@
"\n",
"def processTechFieldRepoOpenrank(options, title):\n",
" data = openDigger.index.openrank.getRepoOpenrank(\n",
" dict(**baseOptions, \n",
" dict(baseOptions, \n",
" **dict(options)\n",
" )\n",
" )\n",
Expand Down Expand Up @@ -880,7 +880,7 @@
" }\n",
"def processTechFieldRepoActivity(options, title):\n",
" data = openDigger.index.activity.getRepoActivity(\n",
" dict(**baseOptions, \n",
" dict(baseOptions, \n",
" **dict(options)\n",
" )\n",
" )\n",
Expand Down Expand Up @@ -1121,10 +1121,10 @@
" \"type\": 'scatter'\n",
" }\n",
"def processRegionsCompanyRepoActivity(options, title):\n",
" data = openDigger.index.activity.getRepoActivity({\n",
" data = openDigger.index.activity.getRepoActivity(dict({\n",
" \"startYear\": startYear, \"endYear\": endYear, \"startMonth\": startMonth, \"endMonth\": endMonth,\n",
" \"groupBy\": 'Company', \"groupTimeRange\": 'year', \"order\": 'DESC',\n",
" })\n",
" }, **options))\n",
" \n",
" data_records = pd.DataFrame(data).to_dict('records')\n",
" data_periodranks = openDigger.getRank(data_records, lambda x: x[\"name\"], lambda x: x[\"activity\"])\n",
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"build": "tsc",
"notebook": "npm run build && docker pull registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0 && docker run -it --rm -p 8888:8888 -v $(pwd):/home/node/notebook registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0",
"notebook:win": "npm run build && docker pull registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0 && docker run -it --rm -p 8888:8888 -v %cd%:/home/node/notebook registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0",
"notebook-pycjs": "npm run build && docker build --build-arg KER_REL_PATH=./pycjs --build-arg BASE_IMAGE=registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0 -t opendigger-jupyter-python:1.0 . && docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v .:/python_kernel/notebook opendigger-jupyter-python:1.0",
"pull-label-file-test": "tsc && node lib/ci/pull_label_file_test.js",
"cron": "npm run build && node --max-old-space-size=25600 lib/cron/index.js",
"test": "CLICKHOUSE_HOST=http://ci.open-digger.cn:8123 CLICKHOUSE_USERNAME=default CLICKHOUSE_PASSWORD= mocha"
Expand Down
59 changes: 59 additions & 0 deletions pycjs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Getting Start

## If you want to do some data analysis work:
Start your ClickHouse container, which should be set up in [Clickhouse-sample-data](../sample_data/README.md)

1. Clone OpenDigger `git clone https://github.com/X-lab2017/open-digger.git`

2. Enter the repo path `cd open-digger`

Install the necessary packages `npm install`.

3. Go to the `src` folder(pycjs does not implement any bottom layer details) in the open-digger root directory, create a file named 'local_config.py'(this file has already added into `.gitignore` file.) for Python Kernel with the following contents:

```python
local_config = {
'db': {
'clickhouse': {
'host':'172.17.0.1',
'user':'default'
},
'neo4j':{
'port': '7687',
}
}
}
```
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect container_name`(the container_name is set by command docker run --name xxx), and copy the `Gateway` like this:

```shell
$ docker inspect container_name | grep Gateway
"Gateway": "172.17.0.1",
"IPv6Gateway": "",
"Gateway": "172.17.0.1",
"IPv6Gateway": "",
```
If you use your own data, you can also change `host` field to your own host IP

Return the repo path `cd open-digger`.

Build ts `npm run build`. Since the npm run build command is important to active every settings change, the kernel pycjs supports `npm run notebook-pycjs` to execute the *npm run build, docker build and docker run* command automatically, instead of manually executing them step by step as below.

4. Use `docker build --build-arg KER_REL_PATH='./pycjs' --build-arg BASE_IMAGE='registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0' -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.

> If you are using **Windows CMD**, all the `$(pwd)` here should be replaced by `%cd%`. And if you are using **Windows Powershell**, all the `$(pwd)` here should be replaced by `${pwd}`.
>
> **Notice:** Pathnames of directories like "pwd" may use `\` to join the directory in some versions of Windows. We recommend using absolute paths.
5. Then we can use `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to create and run the container.

6. Open the link in console log like `http://127.0.0.1:8888/lab?token=xxxxx`.

7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to reload the sorce code.

8. You can find the notebook folder, where we provide demos in the handbook. You can create a new file, and happy data exploring!
Attention: you need to do this work in `notebook` or other parallel folder. If you run in root directory, it can't work because of python import rules.

## If you are a developer:

You can also make `workspace.py` in `python` folder. and run it.
6 changes: 4 additions & 2 deletions pycjs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
clickhouse-driver>=0.2.8
ipynbname==2023.2.0.0
ipython==8.0.1
ipython-genutils==0.2.0
jupyterlab>=3.2.8
matplotlib>=3.5.3
node-vm2==0.4.7
numpy>=1.21.5
pandas>=1.4.4
numpy>=1.23.2
pandas>=1.4.3
tabulate==0.9.0
8 changes: 4 additions & 4 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
}
}
```
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect containert_name`, and copy the `Gateway` like this:
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect container_name`(the container_name is set by command docker run --name xxx), and copy the `Gateway` like this:

```shell
$ docker inspect container_name | grep Gateway
Expand All @@ -32,17 +32,17 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
"IPv6Gateway": "",
```
If you use your own data, you can also change `host` field to your own host IP
4. Use `docker build -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
4. Use `docker build --build-arg KER_REL_PATH='./python' --build-arg BASE_IMAGE='continuumio/miniconda3' -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.

> If you are using **Windows CMD**, all the `$(pwd)` here should be replaced by `%cd%`. And if you are using **Windows Powershell**, all the `$(pwd)` here should be replaced by `${pwd}`.
>
> **Notice:** Pathnames of directories like "pwd" may use `\` to join the directory in some versions of Windows. We recommend using absolute paths.
5. Then we can use `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to create and run the container.
5. Then we can use `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to create and run the container.

6. Open the link in console log like `http://127.0.0.1:8888/lab?token=xxxxx`.

7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to reload the sorce code.
7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to reload the sorce code.

8. You can find the notebook folder, where we provide demos in the handbook. You can create a new file, and happy data exploring!
Attention: you need to do this work in `notebook` or other parallel folder. If you run in root directory, it can't work because of python import rules.
Expand Down
2 changes: 1 addition & 1 deletion python/db/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ def query(q):
return client.execute(q)
def queryDataframe(q):
client = getClient()
return client.query_dataframe(q)
return client.query_dataframe(q, replace_nonwords=False)
9 changes: 9 additions & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
easydict==1.9
py2neo>=2021.2.3
plotly==5.9.0
clickhouse-driver>=0.2.8
numpy>=1.23.2
jupyterlab==3.4.5
matplotlib>=3.5.3
pandas>=1.4.3
pyyaml>=6.0
10 changes: 5 additions & 5 deletions python_v2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da

2. Enter the repo path `cd open-digger`

3. Go to the `python` folder in the open-digger root directory, create a file named 'local_config.py'(this file has already added into `.gitignore` file.) for Python Kernel with the following contents:
3. Go to the `python_v2` folder in the open-digger root directory, create a file named 'local_config.py'(this file has already added into `.gitignore` file.) for Python Kernel with the following contents:

```python
local_config = {
Expand All @@ -22,7 +22,7 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
}
}
```
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect containert_name`, and copy the `Gateway` like this:
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect container_name`(the container_name is set by command docker run --name xxx), and copy the `Gateway` like this:

```shell
$ docker inspect container_name | grep Gateway
Expand All @@ -32,17 +32,17 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
"IPv6Gateway": "",
```
If you use your own data, you can also change `host` field to your own host IP
4. Use `docker build -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
4. Use `docker build --build-arg KER_REL_PATH='./python_v2' --build-arg BASE_IMAGE='continuumio/miniconda3' -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.

> If you are using **Windows CMD**, all the `$(pwd)` here should be replaced by `%cd%`. And if you are using **Windows Powershell**, all the `$(pwd)` here should be replaced by `${pwd}`.
>
> **Notice:** Pathnames of directories like "pwd" may use `\` to join the directory in some versions of Windows. We recommend using absolute paths.
5. Then we can use `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to create and run the container.
5. Then we can use `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to create and run the container.

6. Open the link in console log like `http://127.0.0.1:8888/lab?token=xxxxx`.

7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to reload the sorce code.
7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to reload the sorce code.

8. You can find the notebook folder, where we provide demos in the handbook. You can create a new file, and happy data exploring!
Attention: you need to do this work in `notebook` or other parallel folder. If you run in root directory, it can't work because of python import rules.
Expand Down
14 changes: 7 additions & 7 deletions python_v2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@
}
}
def mergeConfig(base_config, local_config):
for key in base_config.keys():
if isinstance(base_config[key], dict) and isinstance(local_config[key], dict):
mergeConfig(base_config[key], local_config[key])
else:
base_config[key] = local_config[key]
for key, val in local_config.items():
if isinstance(val, dict):
mergeConfig(base_config[key], val)
else:
base_config[key] = val
return base_config
def getConfig(local_config=None):
local_config = local_config or {}
def getConfig():
global config
if not inited:
try:
from local_config import local_config
config = mergeConfig(config, local_config)
return config
except:
Expand Down
2 changes: 1 addition & 1 deletion python_v2/db/clickhouse_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ def query(self, q):
return self.client.execute(q)

def queryDataframe(self,q):
return self.client.query_dataframe(q)
return self.client.query_dataframe(q, replace_nonwords=False)
8 changes: 4 additions & 4 deletions python_v2/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
DateTime==5.4
clickhouse-driver==0.2.6
clickhouse-driver>=0.2.8
easydict==1.11
ipynbname==2023.2.0.0
jupyterlab
jupyterlab>=3.2.8
matplotlib>=3.5.3
numpy>=1.21.5
pandas==1.4.4
numpy>=1.23.2
pandas>=1.4.3
plotly==5.9.0
py2neo==2021.2.4
typing==3.7.4.3
Expand Down
9 changes: 0 additions & 9 deletions requirements_python.txt

This file was deleted.

Loading

0 comments on commit 867253f

Please sign in to comment.