|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*-- |
| 3 | + |
| 4 | +# Copyright (c) 2022 Oracle and/or its affiliates. |
| 5 | +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ |
| 6 | + |
| 7 | +from abc import ABC, abstractmethod |
| 8 | +from time import time |
| 9 | +from typing import Dict, Iterator, List, Optional, Union |
| 10 | + |
| 11 | +import impala |
| 12 | +import impala.dbapi as impyla # noqa |
| 13 | +import pandas as pd |
| 14 | +from impala.error import Error as ImpylaError # noqa |
| 15 | +from impala.error import HiveServer2Error as HS2Error # noqa |
| 16 | + |
| 17 | + |
| 18 | +class HiveConnection(ABC): |
| 19 | + """Base class Interface.""" |
| 20 | + |
| 21 | + def __init__(self, **params): |
| 22 | + """set up the impala connection.""" |
| 23 | + self.params = params |
| 24 | + self.con = None # setup the connection |
| 25 | + |
| 26 | + @abstractmethod |
| 27 | + def get_cursor(self): |
| 28 | + """return the cursor from the connection. |
| 29 | +
|
| 30 | + Returns |
| 31 | + ------- |
| 32 | + HiveServer2Cursor: |
| 33 | + cursor using a specific client. |
| 34 | + """ |
| 35 | + return None |
| 36 | + |
| 37 | + |
| 38 | +class ImpylaHiveConnection(HiveConnection): |
| 39 | + """ImpalaHiveConnection class which uses impyla client.""" |
| 40 | + |
| 41 | + def __init__(self, **params): |
| 42 | + """set up the impala connection.""" |
| 43 | + self.params = params |
| 44 | + self.con = None # setup the connection |
| 45 | + |
| 46 | + def get_cursor(self) -> "impala.hiveserver2.HiveServer2Cursor": |
| 47 | + """return the cursor from the connection. |
| 48 | +
|
| 49 | + Returns |
| 50 | + ------- |
| 51 | + impala.hiveserver2.HiveServer2Cursor: |
| 52 | + cursor using impyla client. |
| 53 | + """ |
| 54 | + return None |
| 55 | + |
| 56 | + |
| 57 | +class OracleHiveConnection(ImpylaHiveConnection): |
| 58 | + def __init__( |
| 59 | + self, |
| 60 | + host: str, |
| 61 | + port: str, |
| 62 | + **kwargs, |
| 63 | + ): |
| 64 | + """Initiate the connection. |
| 65 | +
|
| 66 | + Parameters |
| 67 | + ---------- |
| 68 | + host: str |
| 69 | + Hive host name. |
| 70 | + port: str |
| 71 | + Hive port. |
| 72 | + kwargs: |
| 73 | + Other connection parameters accepted by the client. |
| 74 | + """ |
| 75 | + pass |
| 76 | + |
| 77 | + def insert( |
| 78 | + self, |
| 79 | + table_name: str, |
| 80 | + df: pd.DataFrame, |
| 81 | + if_exists: str, |
| 82 | + partition: List[str] = None, |
| 83 | + ): |
| 84 | + """insert a table from a pandas dataframe. |
| 85 | +
|
| 86 | + Parameters |
| 87 | + ---------- |
| 88 | + table_name (str): |
| 89 | + Table Name. |
| 90 | + df (pd.DataFrame): |
| 91 | + Data to be injected to the database. |
| 92 | + if_exists (str): |
| 93 | + Whether to replace, append or fail if the table already exists. |
| 94 | + partition (List[str], optional): Defaults to None. |
| 95 | + For partitioned tables, indicate the partition that's being |
| 96 | + inserted into, either with an ordered list of partition keys or a |
| 97 | + dict of partition field name to value. For example for the |
| 98 | + partition (year=2007, month=7), this can be either (2007, 7) or |
| 99 | + {'year': 2007, 'month': 7}. |
| 100 | + """ |
| 101 | + if if_exists not in ["fail", "replace", "append"]: |
| 102 | + raise ValueError( |
| 103 | + "Unknown option `if_exists`={if_exists}. Valid options are 'fail', 'replace', 'append'" |
| 104 | + ) |
| 105 | + pass |
| 106 | + |
| 107 | + def _fetch_by_batch( |
| 108 | + self, cursor: "impala.hiveserver2.HiveServer2Cursor", chunksize: int |
| 109 | + ): |
| 110 | + """fetch the data by batch of chunksize.""" |
| 111 | + while True: |
| 112 | + rows = cursor.fetchmany(chunksize) |
| 113 | + if rows: |
| 114 | + yield rows |
| 115 | + else: |
| 116 | + break |
| 117 | + |
| 118 | + def query( |
| 119 | + self, |
| 120 | + sql: str, |
| 121 | + bind_variables: Optional[Dict] = None, |
| 122 | + chunksize: Optional[int] = None, |
| 123 | + ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: |
| 124 | + """Query data which support select statement. |
| 125 | +
|
| 126 | + Parameters |
| 127 | + ---------- |
| 128 | + sql (str): |
| 129 | + sql query. |
| 130 | + bind_variables (Optional[Dict]): |
| 131 | + Parameters to be bound to variables in the SQL query, if any. |
| 132 | + Impyla supports all DB API `paramstyle`s, including `qmark`, |
| 133 | + `numeric`, `named`, `format`, `pyformat`. |
| 134 | + chunksize (Optional[int]): . Defaults to None. |
| 135 | + chunksize of each of the dataframe in the iterator. |
| 136 | +
|
| 137 | + Returns |
| 138 | + ------- |
| 139 | + Union[pd.DataFrame, Iterator[pd.DataFrame]]: |
| 140 | + A pandas DataFrame or a pandas DataFrame iterator. |
| 141 | + """ |
| 142 | + return None |
0 commit comments