Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ set(CYTHON_EXTENSIONS
error
scalar
schema
table
)

foreach(module ${CYTHON_EXTENSIONS})
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@
list_, struct, field,
DataType, Field, Schema, schema)

from pyarrow.array import RowBatch, Table, from_pandas_dataframe
from pyarrow.array import RowBatch, from_pandas_dataframe

from pyarrow.table import Column, Table
2 changes: 2 additions & 0 deletions python/pyarrow/array.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ cdef class Array:
cdef init(self, const shared_ptr[CArray]& sp_array)
cdef getitem(self, int i)

cdef object box_arrow_array(const shared_ptr[CArray]& sp_array)


cdef class BooleanArray(Array):
pass
Expand Down
75 changes: 2 additions & 73 deletions python/pyarrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ from pyarrow.scalar import NA
from pyarrow.schema cimport Schema
import pyarrow.schema as schema

from pyarrow.table cimport Table

def total_allocated_bytes():
cdef MemoryPool* pool = pyarrow.GetMemoryPool()
return pool.bytes_allocated()
Expand Down Expand Up @@ -287,76 +289,3 @@ cdef class RowBatch:
return self.arrays[i]


cdef class Table:
'''
Do not call this class's constructor directly.
'''
cdef:
shared_ptr[CTable] sp_table
CTable* table

def __cinit__(self):
pass

cdef init(self, const shared_ptr[CTable]& table):
self.sp_table = table
self.table = table.get()

@staticmethod
def from_pandas(df, name=None):
pass

@staticmethod
def from_arrays(names, arrays, name=None):
cdef:
Array arr
Table result
c_string c_name
vector[shared_ptr[CField]] fields
vector[shared_ptr[CColumn]] columns
shared_ptr[CSchema] schema
shared_ptr[CTable] table

cdef int K = len(arrays)

fields.resize(K)
columns.resize(K)
for i in range(K):
arr = arrays[i]
c_name = tobytes(names[i])

fields[i].reset(new CField(c_name, arr.type.sp_type, True))
columns[i].reset(new CColumn(fields[i], arr.sp_array))

if name is None:
c_name = ''
else:
c_name = tobytes(name)

schema.reset(new CSchema(fields))
table.reset(new CTable(c_name, schema, columns))

result = Table()
result.init(table)

return result

def to_pandas(self):
"""
Convert the arrow::Table to a pandas DataFrame
"""
cdef:
PyObject* arr
shared_ptr[CColumn] col

import pandas as pd

names = []
data = []
for i in range(self.table.num_columns()):
col = self.table.column(i)
check_status(pyarrow.ArrowToPandas(col, &arr))
names.append(frombytes(col.get().name()))
data.append(<object> arr)

return pd.DataFrame(dict(zip(names, data)), columns=names)
5 changes: 4 additions & 1 deletion python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_string GetString(int i)

cdef cppclass CChunkedArray" arrow::ChunkedArray":
pass
int64_t length()
int64_t null_count()
int num_chunks()
const shared_ptr[CArray]& chunk(int i)

cdef cppclass CColumn" arrow::Column":
CColumn(const shared_ptr[CField]& field,
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/schema.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,7 @@ cdef class Schema:
CSchema* schema

cdef init(self, const vector[shared_ptr[CField]]& fields)
cdef init_schema(self, const shared_ptr[CSchema]& schema)

cdef DataType box_data_type(const shared_ptr[CDataType]& type)
cdef Schema box_schema(const shared_ptr[CSchema]& schema)
9 changes: 9 additions & 0 deletions python/pyarrow/schema.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ cdef class Schema:
self.schema = new CSchema(fields)
self.sp_schema.reset(self.schema)

cdef init_schema(self, const shared_ptr[CSchema]& schema):
self.schema = schema.get()
self.sp_schema = schema

@classmethod
def from_fields(cls, fields):
cdef:
Expand Down Expand Up @@ -223,3 +227,8 @@ cdef DataType box_data_type(const shared_ptr[CDataType]& type):
cdef DataType out = DataType()
out.init(type)
return out

cdef Schema box_schema(const shared_ptr[CSchema]& type):
cdef Schema out = Schema()
out.init_schema(type)
return out
46 changes: 46 additions & 0 deletions python/pyarrow/table.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from pyarrow.includes.common cimport shared_ptr
from pyarrow.includes.libarrow cimport CChunkedArray, CColumn, CTable


cdef class ChunkedArray:
cdef:
shared_ptr[CChunkedArray] sp_chunked_array
CChunkedArray* chunked_array

cdef init(self, const shared_ptr[CChunkedArray]& chunked_array)
cdef _check_nullptr(self)


cdef class Column:
cdef:
shared_ptr[CColumn] sp_column
CColumn* column

cdef init(self, const shared_ptr[CColumn]& column)
cdef _check_nullptr(self)


cdef class Table:
cdef:
shared_ptr[CTable] sp_table
CTable* table

cdef init(self, const shared_ptr[CTable]& table)
cdef _check_nullptr(self)
Loading