Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add objwrapper pkg #82

Merged
merged 13 commits into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
29 changes: 29 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Seafobj CI

on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-latest
env:
OSS_ACCESS_KEY: ${{ secrets.OSS_ACCESS_KEY }}
OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }}
S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
OSS_BUCKET: ${{ secrets.OSS_BUCKET }}
OSS_REGION: ${{ secrets.OSS_REGION }}
S3_BUCKET: ${{ secrets.S3_BUCKET }}
S3_REGION: ${{ secrets.S3_REGION }}

steps:
- uses: actions/checkout@v1
with:
fetch-depth: 1
- uses: actions/setup-python@v1
with:
python-version: "3.8"
- name: install dependencies and test
run: |
cd $GITHUB_WORKSPACE
./ci/install-deps.sh
python ./ci/run.py
11 changes: 11 additions & 0 deletions ci/install-deps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

set -e -x

SCRIPT=${BASH_SOURCE[0]}
TESTS_DIR=$(dirname "${SCRIPT}")/..
SETUP_DIR=${TESTS_DIR}/ci

cd $SETUP_DIR

pip install -r requirements.txt
6 changes: 6 additions & 0 deletions ci/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
termcolor>=2.3.0
requests>=2.31.0
pytest>=7.4.0
pytest-instafail
boto3==1.28.12
oss2==2.18.4
18 changes: 18 additions & 0 deletions ci/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python3

import os

from utils import setup_logging, shell
from os.path import abspath, join

TOPDIR = abspath(join(os.getcwd()))


def main():
shell("py.test", env=dict(os.environ))



if __name__ == "__main__":
setup_logging()
main()
50 changes: 50 additions & 0 deletions ci/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import logging
from subprocess import PIPE, CalledProcessError, Popen
import sys

import termcolor


logger = logging.getLogger(__name__)


def setup_logging():
kw = {
"format": "[%(asctime)s][%(module)s]: %(message)s",
"datefmt": "%m/%d/%Y %H:%M:%S",
"level": logging.DEBUG,
"stream": sys.stdout,
}

logging.basicConfig(**kw)
logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
logging.WARNING
)


def shell(cmd, inputdata=None, wait=True, **kw):
info('calling "%s" in %s', cmd, kw.get("cwd", os.getcwd()))
kw["shell"] = not isinstance(cmd, list)
kw["stdin"] = PIPE if inputdata else None
p = Popen(cmd, **kw)
if inputdata:
p.communicate(inputdata)
if wait:
p.wait()
if p.returncode:
raise CalledProcessError(p.returncode, cmd)
else:
return p


def info(fmt, *a):
logger.info(green(fmt), *a)


def green(s):
return _color(s, "green")


def _color(s, color):
return s if not os.isatty(sys.stdout.fileno()) else termcolor.colored(str(s), color)
65 changes: 65 additions & 0 deletions objwrapper/alioss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import http.client
import oss2

# set log level to WARNING
# the api set_file_logger exists after oss2 2.6.0, which has a lot of 'INFO' log
try:
log_file_path = "log.log"
oss2.set_file_logger(log_file_path, 'oss2', logging.WARNING)
except:
pass

class OSSConf(object):
def __init__(self, key_id, key, bucket_name, host, use_https):
self.key_id = key_id
self.key = key
self.bucket_name = bucket_name
self.host = host
self.use_https = use_https

class SeafOSSClient(object):
'''Wraps a oss connection and a bucket'''
def __init__(self, conf):
self.conf = conf
if conf.use_https:
host = 'https://%s' % conf.host
else:
host = 'http://%s' % conf.host
# Due to a bug in httplib we can't use https
self.auth = oss2.Auth(conf.key_id, conf.key)
self.service = oss2.Service(self.auth, conf.host)
self.bucket = oss2.Bucket(self.auth, conf.host, conf.bucket_name)

def read_object_content(self, obj_id):
res = self.bucket.get_object(obj_id)
return res.read()

def read_obj_raw(self, real_obj_id):
data = self.read_object_content(real_obj_id)
return data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

作为一个底层通用的库,只需要提供一个 read_obj(self, key) 的方法就行了。


def get_name(self):
return 'OSS storage backend'

def list_objs(self, repo_id=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

作为一个通用的库,不应该有 seafile 特定的信息。这里 repo_id 应该改为 prefix。

for key in oss2.ObjectIterator(self.bucket, prefix=repo_id):
token = key.key.split('/')
if len(token) == 2:
repo_id = token[0]
obj_id = token[1]
size = key.size
obj = [repo_id, obj_id, size]
yield obj

def obj_exists(self, key):
return self.bucket.object_exists(key)

def write_obj(self, data, key):
self.bucket.put_object(key, data)

def remove_obj(self, key):
self.bucket.delete_object(key)

def stat_raw(self, key):
size = self.bucket.get_object_meta(key).headers['Content-Length']
return int(size)
118 changes: 118 additions & 0 deletions objwrapper/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import boto3
from botocore.exceptions import ClientError

class S3Conf(object):
def __init__(self, key_id, key, bucket_name, host, port, use_v4_sig, aws_region, use_https, path_style_request, sse_c_key):
self.key_id = key_id
self.key = key
self.bucket_name = bucket_name
self.host = host
self.port = port
self.use_v4_sig = use_v4_sig
self.aws_region = aws_region
self.use_https = use_https
self.path_style_request = path_style_request
self.sse_c_key = sse_c_key


class SeafS3Client(object):
"""Wraps a s3 connection and a bucket"""
def __init__(self, conf):
self.conf = conf
self.client = None
self.bucket = None
self.do_connect()

def do_connect(self):
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
addressing_style = 'virtual'
if self.conf.path_style_request:
addressing_style = 'path'
if self.conf.use_v4_sig:
config = boto3.session.Config(signature_version='s3v4', s3={'addressing_style':addressing_style})
else:
config = boto3.session.Config(signature_version='s3',s3={'addressing_style':addressing_style})

if self.conf.host is None:
self.client = boto3.client('s3',
region_name=self.conf.aws_region,
aws_access_key_id=self.conf.key_id,
aws_secret_access_key=self.conf.key,
use_ssl=self.conf.use_https,
config=config)
else:
# https://github.com/boto/boto3/blob/master/boto3/session.py#L265
endpoint_url = 'https://%s' % self.conf.host if self.conf.use_https else 'http://%s' % self.conf.host
if self.conf.port:
endpoint_url = '%s:%s' % (endpoint_url, self.conf.port)
self.client = boto3.client('s3',
aws_access_key_id=self.conf.key_id,
aws_secret_access_key=self.conf.key,
endpoint_url=endpoint_url,
config=config)

self.bucket = self.conf.bucket_name

def read_object_content(self, obj_id):
if self.conf.sse_c_key:
obj = self.client.get_object(Bucket=self.bucket, Key=obj_id, SSECustomerKey=self.conf.sse_c_key, SSECustomerAlgorithm='AES256')
else:
obj = self.client.get_object(Bucket=self.bucket, Key=obj_id)
return obj.get('Body').read()


def read_obj_raw(self, real_obj_id):
data = self.read_object_content(real_obj_id)
return data

def get_name(self):
return 'S3 storage backend'

def list_objs(self, repo_id=None):
paginator = self.client.get_paginator('list_objects_v2')
if repo_id:
iterator = paginator.paginate(Bucket=self.bucket, Prefix=repo_id)
else:
iterator = paginator.paginate(Bucket=self.bucket)
for page in iterator:
for content in page.get('Contents', []):
tokens = content.get('Key', '').split('/')
if len(tokens) == 2:
repo_id = tokens[0]
obj_id = tokens[1]
obj = [repo_id, obj_id, content.get('Size', 0)]
yield obj


def obj_exists(self, s3_path):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里的参数都改为 key 吧。

bucket = self.bucket
try:
if self.conf.sse_c_key:
self.client.head_object(Bucket=bucket, Key=s3_path, SSECustomerKey=self.conf.sse_c_key, SSECustomerAlgorithm='AES256')
else:
self.client.head_object(Bucket=bucket, Key=s3_path)
exists = True
except ClientError:
exists = False

return exists

def write_obj(self, data, s3_path):
bucket = self.bucket
if self.conf.sse_c_key:
self.client.put_object(Bucket=bucket, Key=s3_path, Body=data, SSECustomerKey=self.conf.sse_c_key, SSECustomerAlgorithm='AES256')
else:
self.client.put_object(Bucket=bucket, Key=s3_path, Body=data)

def remove_obj(self, s3_path):
bucket = self.bucket
self.client.delete_object(Bucket=bucket, Key=s3_path)

def stat_raw(self, s3_path):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stat_raw 改为 stat_obj。

bucket = self.bucket
if self.conf.sse_c_key:
obj = self.client.get_object(Bucket=bucket, Key=s3_path, SSECustomerKey=self.conf.sse_c_key, SSECustomerAlgorithm='AES256')
else:
obj = self.client.get_object(Bucket=bucket, Key=s3_path)
size = int(obj.get('ContentLength'))
return size
68 changes: 6 additions & 62 deletions seafobj/backends/alioss.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,7 @@
from .base import AbstractObjStore

from seafobj.exceptions import GetObjectError
import http.client
import oss2

# set log level to WARNING
# the api set_file_logger exists after oss2 2.6.0, which has a lot of 'INFO' log
try:
log_file_path = "log.log"
oss2.set_file_logger(log_file_path, 'oss2', logging.WARNING)
except:
pass

class OSSConf(object):
def __init__(self, key_id, key, bucket_name, host, use_https):
self.key_id = key_id
self.key = key
self.bucket_name = bucket_name
self.host = host
self.use_https = use_https

class SeafOSSClient(object):
'''Wraps a oss connection and a bucket'''
def __init__(self, conf):
self.conf = conf
if conf.use_https:
host = 'https://%s' % conf.host
else:
host = 'http://%s' % conf.host
# Due to a bug in httplib we can't use https
self.auth = oss2.Auth(conf.key_id, conf.key)
self.service = oss2.Service(self.auth, conf.host)
self.bucket = oss2.Bucket(self.auth, conf.host, conf.bucket_name)

def read_object_content(self, obj_id):
res = self.bucket.get_object(obj_id)
return res.read()
from objwrapper.alioss import SeafOSSClient

class SeafObjStoreOSS(AbstractObjStore):
'''OSS backend for seafile objects'''
Expand All @@ -52,46 +18,24 @@ def get_name(self):
return 'OSS storage backend'

def list_objs(self, repo_id=None):
object_list = []
next_marker = ''
while True:
if repo_id:
Simp_obj_info = self.oss_client.bucket.list_objects(repo_id, '',next_marker)
else:
Simp_obj_info = self.oss_client.bucket.list_objects('', '', next_marker)

object_list = Simp_obj_info.object_list

for key in object_list:
token = key.key.split('/')
if len(token) == 2:
repo_id = token[0]
obj_id = token[1]
size = key.size
obj = [repo_id, obj_id, size]
yield obj

if Simp_obj_info.is_truncated == False:
break
else:
next_marker = Simp_obj_info.next_marker
return self.oss_client.list_objs(repo_id)

def obj_exists(self, repo_id, obj_id):
key = '%s/%s' % (repo_id, obj_id)

return self.oss_client.bucket.object_exists(key)
return self.oss_client.obj_exists(key)

def write_obj(self, data, repo_id, obj_id):
key = '%s/%s' % (repo_id, obj_id)

self.oss_client.bucket.put_object(key, data)
self.oss_client.write_obj(data, key)

def remove_obj(self, repo_id, obj_id):
key = '%s/%s' % (repo_id, obj_id)

self.oss_client.bucket.delete_object(key)
self.oss_client.remove_obj(key)

def stat_raw(self, repo_id, obj_id):
key = '%s/%s' % (repo_id, obj_id)

return self.oss_client.bucket.get_object_meta(key).headers['Size']
return self.oss_client.stat_raw(key)
Loading
Loading