1
1
from typing import List
2
2
3
- from happybase import Connection
4
-
5
- from feast .infra .key_encoding_utils import serialize_entity_key
6
- from feast .protos .feast .types .EntityKey_pb2 import EntityKey
3
+ from happybase import ConnectionPool
7
4
8
5
9
6
class HbaseConstants :
@@ -28,7 +25,7 @@ def get_col_from_feature(feature):
28
25
return HbaseConstants .DEFAULT_COLUMN_FAMILY + ":" + feature
29
26
30
27
31
- class HbaseUtils :
28
+ class HBaseConnector :
32
29
"""
33
30
Utils class to manage different Hbase operations.
34
31
@@ -40,14 +37,22 @@ class HbaseUtils:
40
37
"""
41
38
42
39
def __init__ (
43
- self , conn : Connection = None , host : str = None , port : int = None , timeout = None
40
+ self ,
41
+ pool : ConnectionPool = None ,
42
+ host : str = None ,
43
+ port : int = None ,
44
+ connection_pool_size : int = 4 ,
44
45
):
45
- if conn is None :
46
+ if pool is None :
46
47
self .host = host
47
48
self .port = port
48
- self .conn = Connection (host = host , port = port , timeout = timeout )
49
+ self .pool = ConnectionPool (
50
+ host = host ,
51
+ port = port ,
52
+ size = connection_pool_size ,
53
+ )
49
54
else :
50
- self .conn = conn
55
+ self .pool = pool
51
56
52
57
def create_table (self , table_name : str , colm_family : List [str ]):
53
58
"""
@@ -60,7 +65,9 @@ def create_table(self, table_name: str, colm_family: List[str]):
60
65
cf_dict : dict = {}
61
66
for cf in colm_family :
62
67
cf_dict [cf ] = dict ()
63
- return self .conn .create_table (table_name , cf_dict )
68
+
69
+ with self .pool .connection () as conn :
70
+ return conn .create_table (table_name , cf_dict )
64
71
65
72
def create_table_with_default_cf (self , table_name : str ):
66
73
"""
@@ -69,7 +76,8 @@ def create_table_with_default_cf(self, table_name: str):
69
76
Arguments:
70
77
table_name: Name of the Hbase table.
71
78
"""
72
- return self .conn .create_table (table_name , {"default" : dict ()})
79
+ with self .pool .connection () as conn :
80
+ return conn .create_table (table_name , {"default" : dict ()})
73
81
74
82
def check_if_table_exist (self , table_name : str ):
75
83
"""
@@ -78,16 +86,18 @@ def check_if_table_exist(self, table_name: str):
78
86
Arguments:
79
87
table_name: Name of the Hbase table.
80
88
"""
81
- return bytes (table_name , "utf-8" ) in self .conn .tables ()
89
+ with self .pool .connection () as conn :
90
+ return bytes (table_name , "utf-8" ) in conn .tables ()
82
91
83
92
def batch (self , table_name : str ):
84
93
"""
85
- Returns a ' Batch' instance that can be used for mass data manipulation in the hbase table.
94
+ Returns a " Batch" instance that can be used for mass data manipulation in the hbase table.
86
95
87
96
Arguments:
88
97
table_name: Name of the Hbase table.
89
98
"""
90
- return self .conn .table (table_name ).batch ()
99
+ with self .pool .connection () as conn :
100
+ return conn .table (table_name ).batch ()
91
101
92
102
def put (self , table_name : str , row_key : str , data : dict ):
93
103
"""
@@ -98,8 +108,9 @@ def put(self, table_name: str, row_key: str, data: dict):
98
108
row_key: Row key of the row to be inserted to hbase table.
99
109
data: Mapping of column family name:column name to column values
100
110
"""
101
- table = self .conn .table (table_name )
102
- table .put (row_key , data )
111
+ with self .pool .connection () as conn :
112
+ table = conn .table (table_name )
113
+ table .put (row_key , data )
103
114
104
115
def row (
105
116
self ,
@@ -119,8 +130,9 @@ def row(
119
130
timestamp: timestamp specifies the maximum version the cells can have.
120
131
include_timestamp: specifies if (column, timestamp) to be return instead of only column.
121
132
"""
122
- table = self .conn .table (table_name )
123
- return table .row (row_key , columns , timestamp , include_timestamp )
133
+ with self .pool .connection () as conn :
134
+ table = conn .table (table_name )
135
+ return table .row (row_key , columns , timestamp , include_timestamp )
124
136
125
137
def rows (
126
138
self ,
@@ -140,52 +152,69 @@ def rows(
140
152
timestamp: timestamp specifies the maximum version the cells can have.
141
153
include_timestamp: specifies if (column, timestamp) to be return instead of only column.
142
154
"""
143
- table = self .conn .table (table_name )
144
- return table .rows (row_keys , columns , timestamp , include_timestamp )
155
+ with self .pool .connection () as conn :
156
+ table = conn .table (table_name )
157
+ return table .rows (row_keys , columns , timestamp , include_timestamp )
145
158
146
159
def print_table (self , table_name ):
147
160
"""Prints the table scanning all the rows of the hbase table."""
148
- table = self .conn .table (table_name )
149
- scan_data = table .scan ()
150
- for row_key , cols in scan_data :
151
- print (row_key .decode ("utf-8" ), cols )
161
+ with self .pool .connection () as conn :
162
+ table = conn .table (table_name )
163
+ scan_data = table .scan ()
164
+ for row_key , cols in scan_data :
165
+ print (row_key .decode ("utf-8" ), cols )
152
166
153
167
def delete_table (self , table : str ):
154
168
"""Deletes the hbase table given the table name."""
155
169
if self .check_if_table_exist (table ):
156
- self .conn .delete_table (table , disable = True )
170
+ with self .pool .connection () as conn :
171
+ conn .delete_table (table , disable = True )
157
172
158
173
def close_conn (self ):
159
174
"""Closes the happybase connection."""
160
- self .conn .close ()
175
+ with self .pool .connection () as conn :
176
+ conn .close ()
161
177
162
178
163
179
def main ():
180
+ from feast .infra .key_encoding_utils import serialize_entity_key
181
+ from feast .protos .feast .types .EntityKey_pb2 import EntityKey
164
182
from feast .protos .feast .types .Value_pb2 import Value
165
183
166
- connection = Connection (host = "localhost" , port = 9090 )
167
- table = connection .table ("test_hbase_driver_hourly_stats" )
168
- row_keys = [
169
- serialize_entity_key (
170
- EntityKey (join_keys = ["driver_id" ], entity_values = [Value (int64_val = 1004 )]),
171
- entity_key_serialization_version = 2 ,
172
- ).hex (),
173
- serialize_entity_key (
174
- EntityKey (join_keys = ["driver_id" ], entity_values = [Value (int64_val = 1005 )]),
175
- entity_key_serialization_version = 2 ,
176
- ).hex (),
177
- serialize_entity_key (
178
- EntityKey (join_keys = ["driver_id" ], entity_values = [Value (int64_val = 1024 )]),
179
- entity_key_serialization_version = 2 ,
180
- ).hex (),
181
- ]
182
- rows = table .rows (row_keys )
183
-
184
- for row_key , row in rows :
185
- for key , value in row .items ():
186
- col_name = bytes .decode (key , "utf-8" ).split (":" )[1 ]
187
- print (col_name , value )
188
- print ()
184
+ pool = ConnectionPool (
185
+ host = "localhost" ,
186
+ port = 9090 ,
187
+ size = 2 ,
188
+ )
189
+ with pool .connection () as connection :
190
+ table = connection .table ("test_hbase_driver_hourly_stats" )
191
+ row_keys = [
192
+ serialize_entity_key (
193
+ EntityKey (
194
+ join_keys = ["driver_id" ], entity_values = [Value (int64_val = 1004 )]
195
+ ),
196
+ entity_key_serialization_version = 2 ,
197
+ ).hex (),
198
+ serialize_entity_key (
199
+ EntityKey (
200
+ join_keys = ["driver_id" ], entity_values = [Value (int64_val = 1005 )]
201
+ ),
202
+ entity_key_serialization_version = 2 ,
203
+ ).hex (),
204
+ serialize_entity_key (
205
+ EntityKey (
206
+ join_keys = ["driver_id" ], entity_values = [Value (int64_val = 1024 )]
207
+ ),
208
+ entity_key_serialization_version = 2 ,
209
+ ).hex (),
210
+ ]
211
+ rows = table .rows (row_keys )
212
+
213
+ for _ , row in rows :
214
+ for key , value in row .items ():
215
+ col_name = bytes .decode (key , "utf-8" ).split (":" )[1 ]
216
+ print (col_name , value )
217
+ print ()
189
218
190
219
191
220
if __name__ == "__main__" :
0 commit comments