This repository has been archived by the owner on Dec 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.php
127 lines (100 loc) · 2.99 KB
/
benchmark.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
<?php
use jocoon\parquet\ParquetOptions;
use jocoon\parquet\ParquetReader;
use jocoon\parquet\ParquetWriter;
use jocoon\parquet\CompressionMethod;
require_once('vendor/autoload.php');
ini_set('memory_limit', '4G');
$readTimes = [];
$uwts = [];
$gwts = [];
$swts = [];
for ($i = 0; $i < 10; $i++)
{
$readTime = null;
$uwt = null;
$gwt = null;
$swt = null;
ReadLargeFile($readTime, $uwt, $gwt, $swt);
$readTimes[] = $readTime;
if($uwt) $uwts[] = $uwt;
if($gwt) $gwts[] = $gwt;
if($swt) $swts[] = $swt;
echo("iteration #{$i}: {$readTime}, uwt: {$uwt}, gwt: {$gwt}, swt: {$swt}".chr(10));
}
$meanRead = array_sum($readTimes)/count($readTimes);
$meanUw = count($uwts) > 0 ? array_sum($uwts)/count($uwts) : null;
$meanGw = count($gwts) > 0 ? array_sum($gwts)/count($gwts) : null;
$meanSw = count($swts) > 0 ? array_sum($swts)/count($swts) : null;
echo("mean(read): {$meanRead}, mean(uw): {$meanUw}, mean(gw): {$meanGw}, mean(sw): {$meanSw}");
function ReadLargeFile(&$readTime, &$uncompressedWriteTime, &$gzipWriteTime, &$snappyWriteTime)
{
// Schema schema;
// DataColumn[] columns;
$start = hrtime(true);
$handle = fopen(__DIR__ . '/tests/data/customer.impala.parquet', 'r');
$opts = new ParquetOptions();
$opts->TreatByteArrayAsString = true;
$reader = new ParquetReader($handle, $opts);
$schema = $reader->schema;
$cl = [];
$rgr = $reader->OpenRowGroupReader(0);
foreach($reader->schema->getDataFields() as $field) {
$dataColumn = $rgr->ReadColumn($field);
$cl[] = $dataColumn;
}
$columns = $cl;
$readTime = (hrtime(true) - $start) / 1e9;
// let GC collect
$reader = null;
//
// Writing uncompressed data
//
$dest = fopen('perf.uncompressed.parquet', 'w');
$start = hrtime(true);
$writer = new ParquetWriter($schema, $dest);
$writer->compressionMethod = CompressionMethod::None;
$rg = $writer->CreateRowGroup();
foreach($columns as $dc) {
$rg->WriteColumn($dc);
}
$rg->finish();
$writer->finish();
$uncompressedWriteTime = (hrtime(true) - $start) / 1e9;
// let GC collect
$writer = null;
//
// Writing GZIP compressed data
//
$dest = fopen('perf.gzip.parquet', 'w');
$start = hrtime(true);
$writer = new ParquetWriter($schema, $dest);
$writer->compressionMethod = CompressionMethod::Gzip;
$rg = $writer->CreateRowGroup();
foreach($columns as $dc) {
$rg->WriteColumn($dc);
}
$rg->finish();
$writer->finish();
$gzipWriteTime = (hrtime(true) - $start) / 1e9;
//
// Execute snappy-compression benchmark
// only of ext is available
//
if(extension_loaded('snappy')) {
//
// Writing Snappy compressed data
//
$dest = fopen('perf.snappy.parquet', 'w');
$start = hrtime(true);
$writer = new ParquetWriter($schema, $dest);
$writer->compressionMethod = CompressionMethod::Snappy;
$rg = $writer->CreateRowGroup();
foreach($columns as $dc) {
$rg->WriteColumn($dc);
}
$rg->finish();
$writer->finish();
$snappyWriteTime = (hrtime(true) - $start) / 1e9;
}
}