PyArrow
Install:
pip install pyarrow
Load Data
import pyarrow.parquet as pq
table = pq.read_table('/path/to/table')
Load data from OSS
import ossfs
import pyarrow.parquet as pq
OSS_ENDPOINT = 'http://oss-cn-wulanchabu-internal.aliyuncs.com'
OSS_BUCKET = 'bucket-name'
OSS_ACCESS_KEY = '***'
OSS_ACCESS_SECRET = '***'
fs = ossfs.OSSFileSystem(endpoint=OSS_ENDPOINT, key=OSS_ACCESS_KEY, secret=OSS_ACCESS_SECRET)
table = pq.read_table(f'{OSS_BUCKET}/path/to/table', filesystem=fs)