Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
KOLANICH committed Oct 24, 2023
0 parents commit b8c4455
Show file tree
Hide file tree
Showing 24 changed files with 2,207 additions and 0 deletions.
Empty file added .ci/aptPackagesToInstall.txt
Empty file.
1 change: 1 addition & 0 deletions .ci/pythonPackagesToInstallFromGit.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/eddieantonio/perfection.git
12 changes: 12 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
root = true

[*]
charset = utf-8
indent_style = tab
indent_size = 4
insert_final_newline = true
end_of_line = lf

[*.{yml,yaml}]
indent_style = space
indent_size = 2
1 change: 1 addition & 0 deletions .github/.templateMarker
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
KOLANICH/python_project_boilerplate.py
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
allow:
- dependency-type: "all"
15 changes: 15 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]

jobs:
build:
runs-on: ubuntu-22.04
steps:
- name: typical python workflow
uses: KOLANICH-GHActions/typical-python-workflow@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
__pycache__
*.py[co]
/*.egg-info
*.srctrlbm
*.srctrldb
build
dist
.eggs
monkeytype.sqlite3
/.ipynb_checkpoints
51 changes: 51 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest

variables:
DOCKER_DRIVER: overlay2
SAST_ANALYZER_IMAGE_TAG: latest
SAST_DISABLE_DIND: "true"
SAST_CONFIDENCE_LEVEL: 5
CODECLIMATE_VERSION: latest

include:
- template: SAST.gitlab-ci.yml
- template: Code-Quality.gitlab-ci.yml
- template: License-Management.gitlab-ci.yml

build:
tags:
- shared
- linux
stage: build
variables:
GIT_DEPTH: "1"
PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages

before_script:
- export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables`
- apt-get update
# todo:
#- apt-get -y install
#- pip3 install --upgrade
#- python3 ./fix_python_modules_paths.py

script:
- python3 -m build -nw bdist_wheel
- mv ./dist/*.whl ./dist/PerfectPrecomputedHashtable-0.CI-py3-none-any.whl
- pip3 install --upgrade ./dist/*.whl
- coverage run --source=PerfectPrecomputedHashtable -m --branch pytest --junitxml=./rspec.xml ./tests/test.py
- coverage report -m
- coverage xml

coverage: "/^TOTAL(?:\\s+\\d+){4}\\s+(\\d+%).+/"

cache:
paths:
- $PYTHONUSERBASE

artifacts:
paths:
- dist
reports:
junit: ./rspec.xml
cobertura: ./coverage.xml
1 change: 1 addition & 0 deletions Code_Of_Conduct.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
No codes of conduct!
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include UNLICENSE
include *.md
include tests
include .editorconfig
51 changes: 51 additions & 0 deletions PerfectPrecomputedHashtable/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import typing
from hashlib import blake2s
from pathlib import Path
from struct import unpack

import perfection

from .hash import *


def prepareForPerfectHashtable(powCfg, src: typing.Mapping[typing.Any, typing.Any], obj2Bytes):
fullMappingList = genTable(powCfg, src, obj2Bytes)
assert len(fullMappingList) == len(src)
fullMappingList = sorted(set(fullMappingList), key=lambda el: el[0])
assert len(fullMappingList) == len(src)

maxEl = fullMappingList[-1]
minEl = fullMappingList[0]
offset = minEl[0]

reducedSpan = maxEl[0] - offset
#print("Span", reducedSpan, "min", offset, "max", maxEl[0])

fullMappingList = [(el[0] - offset, *el[1:]) for el in fullMappingList]

reducedHashesList = sorted({el[0] for el in fullMappingList})
assert len(reducedHashesList) == len(src)

return offset, fullMappingList, reducedHashesList


def genPerfectHashtable(powCfg: POWConfig, src: typing.Mapping[typing.Any, typing.Any], obj2Bytes):
offset, fullMappingList, reducedHashesList = prepareForPerfectHashtable(powCfg, src, obj2Bytes)
h = perfection.hash_parameters(reducedHashesList, minimize=False)

tbl = [(None, None)] * len(h.slots)

for reduced, half, full, value in fullMappingList:
idx = perfectHashRemap(h, reduced)
tbl[idx] = (half, value)

cfg = Config(
p=powCfg,
h=HashConfig(
t=h.t,
offset=offset - h.offset,
r=h.r
)
)

return GeneratedHashtable(t=tbl, c=cfg)
173 changes: 173 additions & 0 deletions PerfectPrecomputedHashtable/codegen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import ast
import typing
from pathlib import Path

from . import hash as hashModule
from . import reducers
from .hash import Config, HashConfig, POWConfig, GeneratedHashtable, PoorManDataClass

__all__ = ("genCode", "genAST", "GenCfg")


def primitive2AST(v: typing.Any):
return ast.parse(repr(v), mode="eval").body

identityLambdaArgName = "x"
identityLambdaAST = ast.Lambda(args=ast.arguments(posonlyargs=[], args=[ast.arg(arg=identityLambdaArgName)], kwonlyargs=[], kw_defaults=[], defaults=[]), body=ast.Name(id=identityLambdaArgName, ctx=ast.Load()))

class GenCfg(PoorManDataClass):
__slots__ = ("tableName", "configName", "preamble", "valueASTGen", "o2b", "nnConstName")

def __init__(self, tableName: str = "hashedHashTable", configName: str = "gHT", nnConstName: str = "nN", preamble=None, valueASTGen: typing.Callable = primitive2AST, o2b=identityLambdaAST) -> None:
self.tableName = tableName
self.configName = configName
self.valueASTGen = valueASTGen
self.preamble = preamble
self.o2b = o2b
self.nnConstName = nnConstName


def makeTableAst(gCfg: GenCfg, tbl):
return ast.Tuple(elts=[tableRowAst(gCfg, el) for el in tbl], ctx=ast.Load())


def tableRowAst(gCfg: GenCfg, r):
checkValue, value = r
if checkValue is not None:
return ast.Expr(value=ast.Tuple(elts=[ast.Constant(value=checkValue), gCfg.valueASTGen(value)], ctx=ast.Load()))
return ast.Name(id=gCfg.nnConstName, ctx=ast.Load())


class HexUnparser(ast._Unparser):
def _write_constant(self, value):
if isinstance(value, int):
a = repr(value)
b = hex(value)
self.write(min([a, b], key=len))
else:
super()._write_constant(value)


def makeStorageClass(name, dic: typing.Mapping[str, typing.Any]) -> ast.ClassDef:
body = []
for k, v in dic.items():
needAssignment = True
if not isinstance(v, ast.AST):
v = primitive2AST(v)
else:
if hasattr(v, "name"):
needAssignment = False
v.name = k

if needAssignment:
v = ast.Assign(
targets=[
ast.Name(
id=k,
ctx=ast.Store(),
)
],
value=v,
)

body.append(v)

return ast.ClassDef(
name=name,
bases=[],
keywords=[],
body=body,
decorator_list=[],
)


def takeFromModuleAST(moduleAST, toTake):
taken = {}
for node in moduleAST.body:
if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
if node.name in toTake:
taken[node.name] = node
elif isinstance(node, ast.Assign):
if len(node.targets) == 1:
t = node.targets[0]
if isinstance(t, ast.Name) and t.id in toTake:
taken[t.id] = node
return taken


def takeFromFile(path: Path, toTake):
return takeFromModuleAST(ast.parse(path.read_text()), toTake)


def takeFromModule(module, toTake):
return takeFromFile(Path(module.__file__), toTake)

def patchHashtableLookup(htl, gCfg, argToReplace: str = 'gHT'):
argToReplaceIdx = None

for i, arg in enumerate(htl.args.args):
if arg.arg == argToReplace:
argToReplaceIdx = i
del htl.args.args[argToReplaceIdx]

if argToReplace != gCfg.configName:
htl.body.insert(0, ast.Assign(targets=[ast.Name(id=argToReplace, ctx=ast.Store())], value=ast.Name(id=gCfg.configName, ctx=ast.Load())))


def genAST(g: GeneratedHashtable, gCfg: GenCfg) -> ast.Module:
p = makeStorageClass("p", {"nonce": g.c.p.nonce})

fromReducers = takeFromModule(reducers, {"uint8_t", g.c.p.reducer.__name__})

reducerF = fromReducers[g.c.p.reducer.__name__]
reducerF.name = "reducer"
p.body.append(reducerF)

h = makeStorageClass(
"h",
{
"t": g.c.h.t,
"r": g.c.h.r,
"offset": g.c.h.offset,
"o2b": gCfg.o2b
},
)

paramsStorageClass = ast.ClassDef(
name="c",
bases=[],
keywords=[],
body=[p, h],
decorator_list=[],
)

tableBodyAst = makeTableAst(gCfg, g.t)

nnConstAssignment = ast.Assign(targets=[ast.Name(id=gCfg.nnConstName, ctx=ast.Store())], value=ast.Tuple(elts=[ast.Constant(value=None)]*2, ctx=ast.Load()))
tableAssignment = ast.Assign(targets=[ast.Name(id=gCfg.tableName, ctx=ast.Store())], value=tableBodyAst)
generatedHashtableClass = makeStorageClass(gCfg.configName, {"t": ast.Name(id=gCfg.tableName, ctx=ast.Load())})
generatedHashtableClass.body.insert(0, paramsStorageClass)

fromHash = takeFromModule(hashModule, {"uint16_t", "twoShorts", "halfIntHash", "hashtableLookup", "singleByteHashStrings", "perfectHashRemap"})
patchHashtableLookup(fromHash["hashtableLookup"], gCfg)

r = ast.Module(body=[
ast.Import(names=[ast.alias(name='typing')]),
ast.ImportFrom(module='hashlib', names=[ast.alias(name='blake2s')], level=0),
ast.ImportFrom(module='struct', names=[ast.alias(name='Struct')], level=0),

*(gCfg.preamble if gCfg.preamble else ()),
fromReducers["uint8_t"],
*fromHash.values(),
nnConstAssignment,
tableAssignment,
generatedHashtableClass,
], type_ignores=[])
ast.fix_missing_locations(r)
return r


def genCode(g: GeneratedHashtable, gCfg: GenCfg) -> str:
a = genAST(g=g, gCfg=gCfg)
up = HexUnparser()
return up.visit(a)
Loading

0 comments on commit b8c4455

Please sign in to comment.