Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pylint integration updates #643

Merged
merged 16 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)

## [Unreleased]
### Added
- Added pylint `unspecified-encoding` and `missing-function-docstring` and ignored opensearchpy for lints (([#643](https://github.com/opensearch-project/opensearch-py/pull/643)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "ignored opensearchpy for lints" mean? Remove it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's important to talk about https://github.com/macohen/opensearch-py/blob/9963f01cbfaa3841b801e7d2b0d5126ea444fd9b/noxfile.py#L122 or ignore it or you tell me I need to figure this out before it can be merged. pylint for generated code seemed challenging and I at least wanted to get the other things right first.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will #645 solve this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure. I'll look into the generated code on the next round. My guess is that the code generator code will have very specific needs, maybe requiring line by line instructions. will look..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the last PR I added back the original four lints for opensearchpy and opened a new issue to add the rest: #647

- Added pylint `line-too-long` and `invalid-name` ([#590](https://github.com/opensearch-project/opensearch-py/pull/590))
- Added pylint `pointless-statement` ([#611](https://github.com/opensearch-project/opensearch-py/pull/611))
- Added a log collection guide ([#579](https://github.com/opensearch-project/opensearch-py/pull/579))
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/bench_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ async def index_records(client: Any, index_name: str, item_count: int) -> None:

async def test_async(client_count: int = 1, item_count: int = 1) -> None:
"""
asynchronously index with item_count records and run client_count clients. This function can be used to
test balancing the number of items indexed with the number of documents.
asynchronously index with item_count records and run client_count
clients. This function can be used to test balancing the number of
items indexed with the number of documents.
"""
host = "localhost"
port = 9200
Expand Down
13 changes: 6 additions & 7 deletions benchmarks/bench_info_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,12 @@

def get_info(client: Any, request_count: int) -> float:
"""get info from client"""
tt: float = 0
for n in range(request_count):
total_time: float = 0
for request in range(request_count):
start = time.time() * 1000
client.info()
total_time = time.time() * 1000 - start
tt += total_time
return tt
total_time += time.time() * 1000 - start
return total_time


def test(thread_count: int = 1, request_count: int = 1, client_count: int = 1) -> None:
Expand Down Expand Up @@ -71,8 +70,8 @@ def test(thread_count: int = 1, request_count: int = 1, client_count: int = 1) -
thread.start()

latency = 0
for t in threads:
latency += t.join()
for thread in threads:
latency += thread.join()

print(f"latency={latency}")

Expand Down
30 changes: 15 additions & 15 deletions benchmarks/bench_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,29 @@

def index_records(client: Any, index_name: str, item_count: int) -> Any:
"""bulk index item_count records into index_name"""
tt = 0
for n in range(10):
total_time = 0
for iteration in range(10):
data: Any = []
for i in range(item_count):
for item in range(item_count):
data.append(
json.dumps({"index": {"_index": index_name, "_id": str(uuid.uuid4())}})
)
data.append(json.dumps({"value": i}))
data.append(json.dumps({"value": item}))
data = "\n".join(data)

start = time.time() * 1000
rc = client.bulk(data)
if rc["errors"]:
raise Exception(rc["errors"])
response = client.bulk(data)
if response["errors"]:
raise Exception(response["errors"])

server_time = rc["took"]
total_time = time.time() * 1000 - start
server_time = response["took"]
this_time = time.time() * 1000 - start

if total_time < server_time:
raise Exception(f"total={total_time} < server={server_time}")
if this_time < server_time:
raise Exception(f"total={this_time} < server={server_time}")

tt += total_time - server_time
return tt
total_time += this_time - server_time
return total_time


def test(thread_count: int = 1, item_count: int = 1, client_count: int = 1) -> None:
Expand Down Expand Up @@ -105,8 +105,8 @@ def test(thread_count: int = 1, item_count: int = 1, client_count: int = 1) -> N
thread.start()

latency = 0
for t in threads:
latency += t.join()
for thread in threads:
latency += thread.join()

clients[0].indices.refresh(index=index_name)
count = clients[0].count(index=index_name)
Expand Down
82 changes: 80 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# under the License.


from typing import Any
from typing import Any, List

import nox

Expand All @@ -43,6 +43,10 @@

@nox.session(python=["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"]) # type: ignore
def test(session: Any) -> None:
"""
runs all tests with a fresh python environment using "python setup.py test"
:param session: current nox session
"""
session.install(".")
# ensure client can be imported without aiohttp
session.run("python", "-c", "import opensearchpy\nprint(opensearchpy.OpenSearch())")
Expand All @@ -59,6 +63,10 @@ def test(session: Any) -> None:

@nox.session(python=["3.7"]) # type: ignore
def format(session: Any) -> None:
"""
runs black and isort to format the files accordingly
:param session: current nox session
"""
session.install(".")
session.install("black", "isort")

Expand All @@ -71,6 +79,10 @@ def format(session: Any) -> None:

@nox.session(python=["3.7"]) # type: ignore
def lint(session: Any) -> None:
"""
runs isort, black, flake8, pylint, and mypy to check the files according to each utility's function
:param session: current nox session
"""
session.install(
"flake8",
"black",
Expand All @@ -89,7 +101,9 @@ def lint(session: Any) -> None:
session.run("isort", "--check", *SOURCE_FILES)
session.run("black", "--check", *SOURCE_FILES)
session.run("flake8", *SOURCE_FILES)
session.run("pylint", *SOURCE_FILES)

lint_per_folder(session)

session.run("python", "utils/license_headers.py", "check", *SOURCE_FILES)

# Workaround to make '-r' to still work despite uninstalling aiohttp below.
Expand All @@ -108,8 +122,68 @@ def lint(session: Any) -> None:
session.run("mypy", "--strict", "test_opensearchpy/test_types/sync_types.py")


def lint_per_folder(session: Any) -> None:
"""
allows configuration of pylint rules per folder and runs a pylint command for each folder
:param session: the current nox session
"""

# any paths that should not be run through pylint
exclude_path_from_linting: List[str] = []

# all paths not referenced in override_enable will run these lints
default_enable = [
"line-too-long",
"invalid-name",
"pointless-statement",
"unspecified-encoding",
"missing-function-docstring",
"missing-param-doc",
"differing-param-doc",
]
override_enable = {
"test_opensearchpy/": [
"line-too-long",
# "invalid-name", lots of short functions with one or two character names
"pointless-statement",
"unspecified-encoding",
"missing-param-doc",
"differing-param-doc",
# "missing-function-docstring", test names usually are, self describing
],
"opensearchpy/": [
"line-too-long",
"invalid-name",
"pointless-statement",
"unspecified-encoding",
],
}

for source_file in SOURCE_FILES:
if source_file in exclude_path_from_linting:
continue

args = [
"--disable=all",
"--max-line-length=240",
"--good-names-rgxs=^[_a-z][_a-z0-9]?$",
"--load-plugins",
"pylint.extensions.docparams",
]
if source_file in override_enable:
args.append(f"--enable={','.join(override_enable[source_file])}")
else:
args.append(f"--enable={','.join(default_enable)}")
args.append(source_file)
session.run("pylint", *args)
macohen marked this conversation as resolved.
Show resolved Hide resolved


@nox.session() # type: ignore
def docs(session: Any) -> None:
"""
builds the html documentation for the client
:param session: current nox session
"""
session.install(".")
session.install(".[docs]")
with session.chdir("docs"):
Expand All @@ -118,6 +192,10 @@ def docs(session: Any) -> None:

@nox.session() # type: ignore
def generate(session: Any) -> None:
"""
generates the base API code
:param session: current nox session
"""
session.install("-rdev-requirements.txt")
session.run("python", "utils/generate_api.py")
format(session)
6 changes: 6 additions & 0 deletions opensearchpy/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@


def to_str(x: Union[str, bytes], encoding: str = "ascii") -> str:
"""
returns x as a string encoded in "encoding" if it is not already a string
:param x: the value to convert to a str
:param encoding: the encoding to convert to - see https://docs.python.org/3/library/codecs.html#standard-encodings
:return: an encoded str
"""
if not isinstance(x, str):
return x.decode(encoding)
return x
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

def main() -> None:
"""
demonstrates various functions to operate on the index (e.g. clear different levels of cache, refreshing the
index)
demonstrates various functions to operate on the index
(e.g. clear different levels of cache, refreshing the index)
"""
# Set up
client = OpenSearch(
Expand Down
9 changes: 6 additions & 3 deletions samples/aws/search_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@

def main() -> None:
"""
connects to a cluster specified in environment variables, creates an index, inserts documents,
connects to a cluster specified in environment variables,
creates an index, inserts documents,
searches the index, deletes the document, deletes the index.
the environment variables are "ENDPOINT" for the cluster endpoint, AWS_REGION for the region in which the cluster
is hosted, and SERVICE to indicate if this is an ES 7.10.2 compatible cluster
the environment variables are "ENDPOINT" for the cluster
endpoint, AWS_REGION for the region in which the cluster
is hosted, and SERVICE to indicate if this is an ES 7.10.2
compatible cluster
"""
# verbose logging
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
Expand Down
7 changes: 4 additions & 3 deletions samples/aws/search_urllib3.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@

def main() -> None:
"""
1. connects to an OpenSearch cluster on AWS defined by environment variables (i.e. ENDPOINT - cluster endpoint like
my-test-domain.us-east-1.es.amazonaws.com; AWS_REGION like us-east-1, us-west-2; and SERVICE like es which
differentiates beteween serverless and the managed service.
1. connects to an OpenSearch cluster on AWS defined by environment variables
(i.e. ENDPOINT - cluster endpoint like my-test-domain.us-east-1.es.
amazonaws.com; AWS_REGION like us-east-1, us-west-2; and SERVICE like es which
differentiates between serverless and the managed service.
2. creates an index called "movies" and adds a single document
3. queries for that document
4. deletes the document
Expand Down
2 changes: 1 addition & 1 deletion samples/bulk/bulk_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main() -> None:
data.append({"index": {"_index": index_name, "_id": i}})
data.append({"value": i})

rc = client.bulk(data)
rc = client.bulk(data) # pylint: disable=invalid-name
if rc["errors"]:
print("There were errors:")
for item in rc["items"]:
Expand Down
6 changes: 3 additions & 3 deletions samples/bulk/bulk_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

def main() -> None:
"""
demonstrates how to bulk load data using opensearchpy.helpers including examples of serial, parallel, and streaming
bulk load
demonstrates how to bulk load data using opensearchpy.helpers
including examples of serial, parallel, and streaming bulk load
"""
# connect to an instance of OpenSearch

Expand Down Expand Up @@ -56,7 +56,7 @@ def main() -> None:
data.append({"_index": index_name, "_id": i, "value": i})

# serialized bulk raising an exception on error
rc = helpers.bulk(client, data)
rc = helpers.bulk(client, data) # pylint: disable=invalid-name
print(f"Bulk-inserted {rc[0]} items (bulk).")

# parallel bulk with explicit error checking
Expand Down
2 changes: 1 addition & 1 deletion samples/bulk/bulk_ld.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def main() -> None:
data += json.dumps({"index": {"_index": index_name, "_id": i}}) + "\n"
data += json.dumps({"value": i}) + "\n"

rc = client.bulk(data)
rc = client.bulk(data) # pylint: disable=invalid-name
if rc["errors"]:
print("There were errors:")
for item in rc["items"]:
Expand Down
3 changes: 2 additions & 1 deletion samples/document_lifecycle/document_lifecycle_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

def main() -> None:
"""
provides samples for different ways to handle documents including indexing, searching, updating, and deleting
provides samples for different ways to handle documents
including indexing, searching, updating, and deleting
"""
# Connect to OpenSearch
client = OpenSearch(
Expand Down
17 changes: 10 additions & 7 deletions samples/hello/hello.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@

def main() -> None:
"""
an example showing how to create an synchronous connection to OpenSearch, create an index, index a document
and search to return the document
an example showing how to create an synchronous connection to
OpenSearch, create an index, index a document and search to
return the document
"""
host = "localhost"
port = 9200
Expand Down Expand Up @@ -49,19 +50,21 @@ def main() -> None:

document = {"title": "Moneyball", "director": "Bennett Miller", "year": "2011"}

id = "1"
doc_id = "1"

response = client.index(index=index_name, body=document, id=id, refresh=True)
response = client.index(index=index_name, body=document, id=doc_id, refresh=True)

print(response)

# search for a document

q = "miller"
user_query = "miller"

query = {
"size": 5,
"query": {"multi_match": {"query": q, "fields": ["title^2", "director"]}},
"query": {
"multi_match": {"query": user_query, "fields": ["title^2", "director"]}
},
}

response = client.search(body=query, index=index_name)
Expand All @@ -70,7 +73,7 @@ def main() -> None:

# delete the document

response = client.delete(index=index_name, id=id)
response = client.delete(index=index_name, id=doc_id)

print(response)

Expand Down
Loading
Loading