Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial python binding #33

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions bindings/python/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Python binding to use pHash on images

## Local use

Check that all dependencies are available and create the header pHash.h:

```
mkdir build-phash
cd build-phash
cmake ../../../
cd ..
```

Build the python extension in the current directory:

```
python setup.py build_ext --inplace
```


8 changes: 8 additions & 0 deletions bindings/python/phash.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# distutils: language=c++
from libc.stdint cimport uint8_t, uint64_t

cdef extern from "../../src/pHash.h":
uint8_t* ph_mh_imagehash(const char *, int&, float, float)
double ph_hammingdistance2(uint8_t*, int, uint8_t*, int)
int ph_dct_imagehash(const char *path, uint64_t&)
int ph_hamming_distance(uint64_t hash1, uint64_t hash1)
57 changes: 57 additions & 0 deletions bindings/python/phash.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# distutils: language=c++
import os
from libc.stdlib cimport free
from libc.stdint cimport uint8_t, uint64_t

from phash cimport (ph_mh_imagehash, ph_hammingdistance2,
ph_dct_imagehash, ph_hamming_distance)


cdef class MHImageHash:
cdef bytes c_hash

def __getstate__(self):
return self.c_hash

def __setstate__(self, state):
self.c_hash = state

def hamming_distance(self, MHImageHash other):
return int(round(100 * ph_hammingdistance2(self.c_hash, len(self.c_hash), other.c_hash, len(other.c_hash))))

@staticmethod
def from_path(path, alpha=2.0, level=1.0):
cdef uint8_t* c_hash
cdef int c_hashlen = 0
c_hash = ph_mh_imagehash(os.fsencode(path), c_hashlen, alpha, level)
if c_hash is NULL:
raise RuntimeError("ph_mh_imagehash failed")
else:
obj = MHImageHash()
try:
obj.c_hash = c_hash[:c_hashlen]
finally:
free(c_hash)
return obj


cdef class DCTImageHash:
cdef uint64_t c_hash

def __getstate__(self):
return self.c_hash

def __setstate__(self, state):
self.c_hash = state

def hamming_distance(self, DCTImageHash other):
return ph_hamming_distance(self.c_hash, other.c_hash)

@staticmethod
def from_path(path):
obj = DCTImageHash()
if ph_dct_imagehash(os.fsencode(path), obj.c_hash) == 0:
return obj
else:
raise RuntimeError("ph_dct_imagehash failed")

53 changes: 53 additions & 0 deletions bindings/python/phash_add_directory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Hash all images in the specified directory
"""
import os
import pickle
import argparse
from pathlib import Path
from multiprocessing import Pool, cpu_count
from phash import DCTImageHash, MHImageHash


def dct_hash_image(path):
print(path, flush=True)
return DCTImageHash.from_path(Path(path))


def mh_hash_image(path):
print(path, flush=True)
return MHImageHash.from_path(Path(path))


def parse_args():
p = argparse.ArgumentParser()
p.add_argument('directory')
p.add_argument('cache_file', default='phash.cache', nargs='?')
p.add_argument('-e', '--extension', default='.jpg')
p.add_argument('-m', '--hash-method', default='dct', choices=('dct', 'mh'))
return p.parse_args()


def main():
opts = parse_args()
if not os.path.isdir(opts.directory):
raise RuntimeError('directory %s does not exist' % opts.directory)
image_files = [os.path.join(opts.directory, p) for p in os.listdir(opts.directory) if p.endswith(opts.extension)]
with Pool(processes=cpu_count()) as p:
if opts.hash_method == 'dct':
hashes = p.map(dct_hash_image, image_files)
elif opts.hash_method == 'mh':
hashes = p.map(mh_hash_image, image_files)
else:
raise RuntimeError('unknown hash method %s' % opts.hash_method)
if os.path.exists(opts.cache_file):
with open(opts.cache_file, 'rb') as fin:
image_files_in, hashes_in = pickle.load(fin)
image_files += image_files_in
hashes += hashes_in
with open(opts.cache_file, 'wb') as fout:
pickle.dump((image_files, hashes), fout, pickle.HIGHEST_PROTOCOL)


if __name__ == '__main__':
main()
57 changes: 57 additions & 0 deletions bindings/python/phash_show_similar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Print a list of similar pictures
optionally delete it (maybe in a second pass)
"""
import os
import sys
import argparse
import pickle


def parse_args():
p = argparse.ArgumentParser()
p.add_argument('cache_file')
p.add_argument('-t', '--threshold', type=int, default=10)
p.add_argument('-d', '--delete-threshold', type=float)
return p.parse_args()


def main():
opts = parse_args()
with open(opts.cache_file, 'rb') as fin:
image_files, hashes = pickle.load(fin)
print('hashed', len(hashes), 'files with', hashes[0].__class__.__name__, file=sys.stderr)

print('<title>', opts.cache_file, '</title>')
print('<table border=1>')
print('<tr>')
print('<td></td>')
for i in range(opts.threshold):
print('<th>%d</th>' % i)
print('</tr>')
for i1, h1 in enumerate(hashes):
similar = {}
for i2, h2 in enumerate(hashes[i1+1:], start=i1+1):
distance = h1.hamming_distance(h2)
if opts.delete_threshold is not None and distance < opts.delete_threshold:
print('deleting file', image_files[i2], file=sys.stderr)
os.unlink(image_files[i2])
elif distance < opts.threshold:
similar.setdefault(distance, []).append(i2)
if len(similar):
print('<tr>')
print('<th><a href="%s">[%03d]</a></th>' % (image_files[i1], i1))
for i in range(opts.threshold):
if i in similar:
print('<td>')
for j in similar[i]:
print('<a href="%s">[%03d]</a>' % (image_files[j], j))
print('</td>')
else:
print('<td></td>')
print('</tr>')
print('</table>')


if __name__ == '__main__':
main()
12 changes: 12 additions & 0 deletions bindings/python/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os
from setuptools import setup
from distutils.extension import Extension
from Cython.Build import cythonize

phash_ext = Extension('phash',
sources=['phash.pyx', os.path.abspath('../../src/pHash.cpp')],
libraries=['png', 'tiff'],
language='c++')

setup(
ext_modules=cythonize([phash_ext], language_level='3'))