libcudf  23.12.00
orc_types.hpp
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cstdint>
20 
21 namespace cudf::io::orc {
22 
23 enum CompressionKind : uint8_t {
24  NONE = 0,
25  ZLIB = 1,
26  SNAPPY = 2,
27  LZO = 3,
28  LZ4 = 4,
29  ZSTD = 5,
30 };
31 
32 enum TypeKind : int8_t {
33  INVALID_TYPE_KIND = -1,
34  BOOLEAN = 0,
35  BYTE = 1,
36  SHORT = 2,
37  INT = 3,
38  LONG = 4,
39  FLOAT = 5,
40  DOUBLE = 6,
41  STRING = 7,
42  BINARY = 8,
43  TIMESTAMP = 9,
44  LIST = 10,
45  MAP = 11,
46  STRUCT = 12,
47  UNION = 13,
48  DECIMAL = 14,
49  DATE = 15,
50  VARCHAR = 16,
51  CHAR = 17,
52 };
53 
54 enum StreamKind : int8_t {
55  INVALID_STREAM_KIND = -1,
56  PRESENT = 0, // boolean stream of whether the next value is non-null
57  DATA = 1, // the primary data stream
58  LENGTH = 2, // the length of each value for variable length data
59  DICTIONARY_DATA = 3, // the dictionary blob
60  DICTIONARY_COUNT = 4, // deprecated prior to Hive 0.11
61  SECONDARY = 5, // a secondary data stream
62  ROW_INDEX = 6, // the index for seeking to particular row groups
63  BLOOM_FILTER = 7, // original bloom filters used before ORC-101
64  BLOOM_FILTER_UTF8 = 8, // bloom filters that consistently use utf8
65 };
66 
67 enum ColumnEncodingKind : int8_t {
68  INVALID_ENCODING_KIND = -1,
69  DIRECT = 0, // the encoding is mapped directly to the stream using RLE v1
70  DICTIONARY = 1, // the encoding uses a dictionary of unique values using RLE v1
71  DIRECT_V2 = 2, // the encoding is direct using RLE v2
72  DICTIONARY_V2 = 3, // the encoding is dictionary-based using RLE v2
73 };
74 
75 enum ProtofType : uint8_t {
76  VARINT = 0,
77  FIXED64 = 1,
78  FIXEDLEN = 2,
79  START_GROUP = 3, // deprecated
80  END_GROUP = 4, // deprecated
81  FIXED32 = 5,
82  INVALID_6 = 6,
83  INVALID_7 = 7,
84 };
85 
86 } // namespace cudf::io::orc
@ DECIMAL
all decimal characters