libcudf  23.12.00
types.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifdef __CUDACC__
20 #define CUDF_HOST_DEVICE __host__ __device__
21 #else
22 #define CUDF_HOST_DEVICE
23 #endif
24 
25 #include <cassert>
26 #include <cstddef>
27 #include <cstdint>
28 #include <iterator>
29 
35 // Forward declarations
37 namespace rmm {
38 class device_buffer;
40 
41 } // namespace rmm
42 
43 namespace cudf {
44 // Forward declaration
45 class column;
46 class column_view;
47 class mutable_column_view;
48 class string_view;
49 class list_view;
50 class struct_view;
51 
52 class scalar;
53 
54 // clang-format off
55 class list_scalar;
56 class struct_scalar;
57 class string_scalar;
58 template <typename T> class numeric_scalar;
59 template <typename T> class fixed_point_scalar;
60 template <typename T> class timestamp_scalar;
61 template <typename T> class duration_scalar;
62 
63 class string_scalar_device_view;
64 template <typename T> class numeric_scalar_device_view;
65 template <typename T> class fixed_point_scalar_device_view;
66 template <typename T> class timestamp_scalar_device_view;
67 template <typename T> class duration_scalar_device_view;
68 // clang-format on
69 
70 class table;
71 class table_view;
72 class mutable_table_view;
73 
80 using size_type = int32_t;
81 using bitmask_type = uint32_t;
82 using valid_type = uint8_t;
83 using thread_index_type = int64_t;
84 
93 template <typename T>
95 {
96  return static_cast<size_type>(std::distance(f, l));
97 }
98 
102 enum class order : bool {
103  ASCENDING,
104  DESCENDING
105 };
106 
110 enum class null_policy : bool {
111  EXCLUDE,
112  INCLUDE
113 };
114 
118 enum class nan_policy : bool {
119  NAN_IS_NULL,
120  NAN_IS_VALID
121 };
122 
127 enum class nan_equality /*unspecified*/ {
128  ALL_EQUAL,
129  UNEQUAL
130 };
131 
135 enum class null_equality : bool {
136  EQUAL,
137  UNEQUAL
138 };
139 
143 enum class null_order : bool {
144  AFTER,
145  BEFORE
146 };
147 
151 enum class sorted : bool { NO, YES };
152 
156 struct order_info {
160 };
161 
165 enum class mask_state : int32_t {
166  UNALLOCATED,
167  UNINITIALIZED,
168  ALL_VALID,
169  ALL_NULL
170 };
171 
176 enum class interpolation : int32_t {
177  LINEAR,
178  LOWER,
179  HIGHER,
180  MIDPOINT,
181  NEAREST
182 };
183 
187 enum class type_id : int32_t {
188  EMPTY,
189  INT8,
190  INT16,
191  INT32,
192  INT64,
193  UINT8,
194  UINT16,
195  UINT32,
196  UINT64,
197  FLOAT32,
198  FLOAT64,
199  BOOL8,
205  DURATION_DAYS,
210  DICTIONARY32,
211  STRING,
212  LIST,
213  DECIMAL32,
214  DECIMAL64,
215  DECIMAL128,
216  STRUCT,
217  // `NUM_TYPE_IDS` must be last!
218  NUM_TYPE_IDS
219 };
220 
227 class data_type {
228  public:
229  data_type() = default;
230  ~data_type() = default;
231  data_type(data_type const&) = default;
232  data_type(data_type&&) = default;
233 
239  data_type& operator=(data_type const&) = default;
240 
247 
253  explicit constexpr data_type(type_id id) : _id{id} {}
254 
261  explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}
262  {
263  assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128);
264  }
265 
271  [[nodiscard]] constexpr type_id id() const noexcept { return _id; }
272 
278  [[nodiscard]] constexpr int32_t scale() const noexcept { return _fixed_point_scale; }
279 
280  private:
281  type_id _id{type_id::EMPTY};
282 
283  // Below is additional type specific metadata. Currently, only _fixed_point_scale is stored.
284 
285  int32_t _fixed_point_scale{}; // numeric::scale_type not available here, use int32_t
286 };
287 
300 constexpr bool operator==(data_type const& lhs, data_type const& rhs)
301 {
302  // use std::tie in the future, breaks JITIFY currently
303  return lhs.id() == rhs.id() && lhs.scale() == rhs.scale();
304 }
305 
318 inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lhs == rhs); }
319 
330 std::size_t size_of(data_type t);
331 
333 } // namespace cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:227
data_type & operator=(data_type &&)=default
Move assignment operator for data_type.
data_type(data_type &&)=default
Move constructor.
constexpr int32_t scale() const noexcept
Returns the scale (for fixed_point types)
Definition: types.hpp:278
data_type(type_id id, int32_t scale)
Construct a new data_type object for numeric::fixed_point
Definition: types.hpp:261
data_type & operator=(data_type const &)=default
Copy assignment operator for data_type.
constexpr type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:271
data_type(data_type const &)=default
Copy constructor.
constexpr data_type(type_id id)
Construct a new data_type object.
Definition: types.hpp:253
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:143
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:135
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:80
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:110
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:81
size_type distance(T f, T l)
Similar to std::distance but returns cudf::size_type and performs static_cast
Definition: types.hpp:94
constexpr bool operator==(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for equality.
Definition: types.hpp:300
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:165
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
int64_t thread_index_type
Thread index type in kernels.
Definition: types.hpp:83
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:118
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:102
bool operator!=(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for inequality.
Definition: types.hpp:318
uint8_t valid_type
Valid type in host memory.
Definition: types.hpp:82
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
Definition: types.hpp:176
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:151
type_id
Identifies a column's logical element type.
Definition: types.hpp:187
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:127
@ BEFORE
NULL values ordered before all other values.
@ AFTER
NULL values ordered after all other values.
@ EQUAL
nulls compare equal
@ UNEQUAL
nulls compare unequal
@ INCLUDE
include null elements
@ EXCLUDE
exclude null elements
@ ALL_VALID
Null mask allocated, initialized to all elements valid.
@ UNALLOCATED
Null mask not allocated, (all elements are valid)
@ ALL_NULL
Null mask allocated, initialized to all elements NULL.
@ UNINITIALIZED
Null mask allocated, but not initialized.
@ NAN_IS_VALID
treat nans as valid elements (non-null)
@ NAN_IS_NULL
treat nans as null elements
@ ASCENDING
Elements ordered from small to large.
@ DESCENDING
Elements ordered from large to small.
@ HIGHER
Higher data point (j)
@ LOWER
Lower data point (i)
@ LINEAR
Linear interpolation between i and j.
@ NEAREST
i or j, whichever is nearest
@ BOOL8
Boolean using one byte per value, 0 == false, else true.
@ FLOAT64
8 byte floating point
@ UINT32
4 byte unsigned integer
@ LIST
List elements.
@ DURATION_MILLISECONDS
time interval of milliseconds in int64
@ NUM_TYPE_IDS
Total number of type ids.
@ UINT16
2 byte unsigned integer
@ INT64
8 byte signed integer
@ DECIMAL128
Fixed-point type with __int128_t.
@ INT16
2 byte signed integer
@ TIMESTAMP_MILLISECONDS
point in time in milliseconds since Unix Epoch in int64
@ DURATION_NANOSECONDS
time interval of nanoseconds in int64
@ STRING
String elements.
@ INT32
4 byte signed integer
@ DURATION_DAYS
time interval of days in int32
@ UINT64
8 byte unsigned integer
@ TIMESTAMP_MICROSECONDS
point in time in microseconds since Unix Epoch in int64
@ DURATION_SECONDS
time interval of seconds in int64
@ DURATION_MICROSECONDS
time interval of microseconds in int64
@ FLOAT32
4 byte floating point
@ STRUCT
Struct elements.
@ EMPTY
Always null with no underlying data.
@ TIMESTAMP_SECONDS
point in time in seconds since Unix Epoch in int64
@ TIMESTAMP_NANOSECONDS
point in time in nanoseconds since Unix Epoch in int64
@ TIMESTAMP_DAYS
point in time in days since Unix Epoch in int32
@ DECIMAL64
Fixed-point type with int64_t.
@ DECIMAL32
Fixed-point type with int32_t.
@ UINT8
1 byte unsigned integer
@ INT8
1 byte signed integer
@ DICTIONARY32
Dictionary type using int32 indices.
@ UNEQUAL
All NaNs compare unequal (IEEE754 behavior)
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.
cuDF interfaces
Definition: aggregation.hpp:34
Indicates how a collection of values has been ordered.
Definition: types.hpp:156
order ordering
Indicates the order in which the values are sorted.
Definition: types.hpp:158
null_order null_ordering
Indicates how null values compare against all other values.
Definition: types.hpp:159
sorted is_sorted
Indicates whether the collection is sorted.
Definition: types.hpp:157