We saw in the previous post how we can deal with data stored in the new VECTOR datatype that was released with MySQL 9.0.
We implemented the 4 basic mathematical operations between two vectors. To do so we created JavaScript functions. MySQL JavaScript functions are available in MySQL HeatWave and MySQL Enterprise Edition (you can use MySQL EE for free while learning, developing, and prototyping as mentioned here).
For the MySQL Community Users, extending the operations dealing with Vectors can be done by implementing User Defined Functions (UDFs) in C++ as a component.
In this article, we will see how we can create a component to add our 4 mathematical operations to MySQL.
Of course, you need to have the source code of MySQL and be able to compile it. For more information please refer to these blog posts:
- Build MySQL 8 from the source rpm in OL9
- Extending MySQL using the Component Infrastructure – part 1
- Extending MySQL using the Component Infrastructure – part 13: FAQ
The Code
The code of the component consists of 3 files that should be placed in a dedicated folder in the components directory of the source code:
mysql-server
├──components
└──vector_operations
├── CMakeLists.txt
├── vector_operations.cc
└── vector_operations.h
Disclaimer:
This code is not intended for production use and is provided solely for illustrative purposes.
DISABLE_MISSING_PROFILE_WARNING()
INCLUDE_DIRECTORIES(SYSTEM)
MYSQL_ADD_COMPONENT(vector_operations
vector_operations.cc
MODULE_ONLY
TEST_ONLY
)
/* Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is also distributed with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have included with MySQL.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#define NO_SIGNATURE_CHANGE 0
#define SIGNATURE_CHANGE 1
#include "vector_operations.h"
REQUIRES_SERVICE_PLACEHOLDER(log_builtins);
REQUIRES_SERVICE_PLACEHOLDER(log_builtins_string);
REQUIRES_SERVICE_PLACEHOLDER(udf_registration);
REQUIRES_SERVICE_PLACEHOLDER(mysql_udf_metadata);
REQUIRES_SERVICE_PLACEHOLDER(mysql_runtime_error);
SERVICE_TYPE(log_builtins) * log_bi;
SERVICE_TYPE(log_builtins_string) * log_bs;
class udf_list {
typedef std::list<std::string> udf_list_t;
public:
~udf_list() { unregister(); }
bool add_scalar(const char *func_name, enum Item_result return_type,
Udf_func_any func, Udf_func_init init_func = NULL,
Udf_func_deinit deinit_func = NULL) {
if (!mysql_service_udf_registration->udf_register(
func_name, return_type, func, init_func, deinit_func)) {
set.push_back(func_name);
return false;
}
return true;
}
bool unregister() {
udf_list_t delete_set;
/* try to unregister all of the udfs */
for (auto udf : set) {
int was_present = 0;
if (!mysql_service_udf_registration->udf_unregister(udf.c_str(),
&was_present) ||
!was_present)
delete_set.push_back(udf);
}
/* remove the unregistered ones from the list */
for (auto udf : delete_set) set.remove(udf);
/* success: empty set */
if (set.empty()) return false;
/* failure: entries still in the set */
return true;
}
private:
udf_list_t set;
} *list;
namespace udf_impl {
void error_msg_size() {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector operation",
"both vectors must have the same size");
}
const char *udf_init = "udf_init", *my_udf = "my_udf",
*my_udf_clear = "my_clear", *my_udf_add = "my_udf_add";
// UDF to implement a vector addition function between two vectors
static bool vector_addition_udf_init(UDF_INIT *initid, UDF_ARGS *args, char *) {
if (args->arg_count < 2) {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_addition",
"this function requires 2 parameters");
return true;
}
initid->maybe_null = true;
return false;
}
static void vector_addition_udf_deinit(__attribute__((unused))
UDF_INIT *initid) {
assert(initid->ptr == udf_init || initid->ptr == my_udf);
}
const char *vector_addition_udf(UDF_INIT *, UDF_ARGS *args, char *vector_sum,
unsigned long *length, char *is_null,
char *error) {
*error = 0;
*is_null = 0;
uint32_t dim_vec1 = get_dimensions(args->lengths[0], sizeof(float));
uint32_t dim_vec2 = get_dimensions(args->lengths[1], sizeof(float));
if (dim_vec1 != dim_vec2 || dim_vec1 == UINT32_MAX ||
dim_vec2 == UINT32_MAX) {
error_msg_size();
*error = 1;
*is_null = 1;
return 0;
}
float *vec1 = ((float *)args->args[0]);
float *vec2 = ((float *)args->args[1]);
unsigned long vector_length = 0;
vector_sum = vector_addition(dim_vec1, vec1, vec2, &vector_length);
*length = vector_length;
return const_cast<char *>(vector_sum);
}
// UDF to implement a vector subtraction function between two vectors
static bool vector_subtraction_udf_init(UDF_INIT *initid, UDF_ARGS *args,
char *) {
if (args->arg_count < 2) {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_subtraction",
"this function requires 2 parameters");
return true;
}
initid->maybe_null = true;
return false;
}
static void vector_subtraction_udf_deinit(__attribute__((unused))
UDF_INIT *initid) {
assert(initid->ptr == udf_init || initid->ptr == my_udf);
}
const char *vector_subtraction_udf(UDF_INIT *, UDF_ARGS *args, char *vector_sum,
unsigned long *length, char *is_null,
char *error) {
*error = 0;
*is_null = 0;
uint32_t dim_vec1 = get_dimensions(args->lengths[0], sizeof(float));
uint32_t dim_vec2 = get_dimensions(args->lengths[1], sizeof(float));
if (dim_vec1 != dim_vec2 || dim_vec1 == UINT32_MAX ||
dim_vec2 == UINT32_MAX) {
error_msg_size();
*error = 1;
*is_null = 1;
return 0;
}
float *vec1 = ((float *)args->args[0]);
float *vec2 = ((float *)args->args[1]);
unsigned long vector_length = 0;
vector_sum = vector_subtraction(dim_vec1, vec1, vec2, &vector_length);
*length = vector_length;
return const_cast<char *>(vector_sum);
}
// UDF to implement a vector product function of two vectors
static bool vector_multiplication_udf_init(UDF_INIT *initid, UDF_ARGS *args,
char *) {
if (args->arg_count < 2) {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_multiplication",
"this function requires 2 parameters");
return true;
}
initid->maybe_null = true;
return false;
}
static void vector_multiplication_udf_deinit(__attribute__((unused))
UDF_INIT *initid) {
assert(initid->ptr == udf_init || initid->ptr == my_udf);
}
const char *vector_multiplication_udf(UDF_INIT *, UDF_ARGS *args,
char *vector_sum, unsigned long *length,
char *is_null, char *error) {
*error = 0;
*is_null = 0;
uint32_t dim_vec1 = get_dimensions(args->lengths[0], sizeof(float));
uint32_t dim_vec2 = get_dimensions(args->lengths[1], sizeof(float));
if (dim_vec1 != dim_vec2 || dim_vec1 == UINT32_MAX ||
dim_vec2 == UINT32_MAX) {
error_msg_size();
*error = 1;
*is_null = 1;
return 0;
}
float *vec1 = ((float *)args->args[0]);
float *vec2 = ((float *)args->args[1]);
unsigned long vector_length = 0;
vector_sum = vector_multiplication(dim_vec1, vec1, vec2, &vector_length);
*length = vector_length;
return const_cast<char *>(vector_sum);
}
// UDF to implement a vector division function of two vectors
static bool vector_division_udf_init(UDF_INIT *initid, UDF_ARGS *args, char *) {
if (args->arg_count < 2) {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_division",
"this function requires 2 parameters");
return true;
}
initid->maybe_null = true;
return false;
}
static void vector_division_udf_deinit(__attribute__((unused))
UDF_INIT *initid) {
assert(initid->ptr == udf_init || initid->ptr == my_udf);
}
const char *vector_division_udf(UDF_INIT *, UDF_ARGS *args, char *vector_sum,
unsigned long *length, char *is_null,
char *error) {
*error = 0;
*is_null = 0;
uint32_t dim_vec1 = get_dimensions(args->lengths[0], sizeof(float));
uint32_t dim_vec2 = get_dimensions(args->lengths[1], sizeof(float));
if (dim_vec1 != dim_vec2 || dim_vec1 == UINT32_MAX ||
dim_vec2 == UINT32_MAX) {
error_msg_size();
*error = 1;
*is_null = 1;
return 0;
}
float *vec1 = ((float *)args->args[0]);
float *vec2 = ((float *)args->args[1]);
unsigned long vector_length = 0;
vector_sum = vector_division(dim_vec1, vec1, vec2, &vector_length);
*length = vector_length;
return const_cast<char *>(vector_sum);
}
} /* namespace udf_impl */
static mysql_service_status_t vector_operations_service_init() {
mysql_service_status_t result = 0;
log_bi = mysql_service_log_builtins;
log_bs = mysql_service_log_builtins_string;
LogComponentErr(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "initializing…");
list = new udf_list();
if (list->add_scalar("VECTOR_ADDITION", Item_result::STRING_RESULT,
(Udf_func_any)udf_impl::vector_addition_udf,
udf_impl::vector_addition_udf_init,
udf_impl::vector_addition_udf_deinit)) {
delete list;
return 1; /* failure: one of the UDF registrations failed */
}
if (list->add_scalar("VECTOR_SUBTRACTION", Item_result::STRING_RESULT,
(Udf_func_any)udf_impl::vector_subtraction_udf,
udf_impl::vector_subtraction_udf_init,
udf_impl::vector_subtraction_udf_deinit)) {
delete list;
return 1; /* failure: one of the UDF registrations failed */
}
if (list->add_scalar("VECTOR_MULTIPLICATION", Item_result::STRING_RESULT,
(Udf_func_any)udf_impl::vector_multiplication_udf,
udf_impl::vector_multiplication_udf_init,
udf_impl::vector_multiplication_udf_deinit)) {
delete list;
return 1; /* failure: one of the UDF registrations failed */
}
if (list->add_scalar("VECTOR_DIVISION", Item_result::STRING_RESULT,
(Udf_func_any)udf_impl::vector_division_udf,
udf_impl::vector_division_udf_init,
udf_impl::vector_division_udf_deinit)) {
delete list;
return 1; /* failure: one of the UDF registrations failed */
}
return result;
}
static mysql_service_status_t vector_operations_service_deinit() {
mysql_service_status_t result = 0;
if (list->unregister()) return 1; /* failure: some UDFs still in use */
delete list;
LogComponentErr(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "uninstalled.");
return result;
}
BEGIN_COMPONENT_PROVIDES(vector_operations_service)
END_COMPONENT_PROVIDES();
BEGIN_COMPONENT_REQUIRES(vector_operations_service)
REQUIRES_SERVICE(log_builtins), REQUIRES_SERVICE(log_builtins_string),
REQUIRES_SERVICE(mysql_udf_metadata), REQUIRES_SERVICE(udf_registration),
REQUIRES_SERVICE(mysql_runtime_error), END_COMPONENT_REQUIRES();
/* A list of metadata to describe the Component. */
BEGIN_COMPONENT_METADATA(vector_operations_service)
METADATA("mysql.author", "Oracle Corporation / lefred"),
METADATA("mysql.license", "GPL"), METADATA("mysql.dev", "lefred"),
END_COMPONENT_METADATA();
/* Declaration of the Component. */
DECLARE_COMPONENT(vector_operations_service, "mysql:vector_operations_service")
vector_operations_service_init,
vector_operations_service_deinit END_DECLARE_COMPONENT();
/* Defines list of Components contained in this library. Note that for now
we assume that library will have exactly one Component. */
DECLARE_LIBRARY_COMPONENTS &COMPONENT_REF(vector_operations_service)
END_DECLARE_LIBRARY_COMPONENTS
/* Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is also distributed with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have included with MySQL.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#define LOG_COMPONENT_TAG "community_vector"
#include <mysql/components/component_implementation.h>
#include <mysql/components/services/log_builtins.h> /* LogComponentErr */
#include <mysql/components/services/mysql_runtime_error_service.h>
#include <mysql/components/services/udf_metadata.h>
#include <mysql/components/services/udf_registration.h>
#include <mysqld_error.h> /* Errors */
#include <list>
#include <sstream>
#include <string>
#include <vector>
#include "mysql/strings/m_ctype.h"
#include "sql/field.h"
#include "sql/sql_udf.h"
#include "sql/vector_conversion.h"
void populate_vector(uint32_t vec_dim, float *vec1,
std::vector<float> &vector1) {
vector1.clear();
vector1.reserve(vec_dim);
for (uint32_t i = 0; i < vec_dim; i++) {
float value1;
memcpy(&value1, vec1 + i, sizeof(float));
vector1.push_back(value1);
}
}
std::string std_vector_to_string(const std::vector<float> &vec) {
std::ostringstream oss;
oss << "[";
for (size_t i = 0; i < vec.size(); ++i) {
// Set precision and scientific notation
oss << std::scientific << vec[i];
// Add a comma if it's not the last element
if (i != vec.size() - 1) {
oss << ",";
}
}
oss << "]";
return oss.str();
}
static char *vector_addition(uint32_t vec_dim, float *vec1, float *vec2,
unsigned long *length) {
std::vector<float> vector1;
std::vector<float> vector2;
populate_vector(vec_dim, vec1, vector1);
populate_vector(vec_dim, vec2, vector2);
std::vector<float> result(vector1.size());
for (size_t i = 0; i < vector1.size(); ++i) {
result[i] = vector1[i] + vector2[i];
}
std::string result_str = std_vector_to_string(result);
char *result_cstr = result_str.data();
String vector_string;
uint32 output_dims = Field_vector::max_dimensions;
auto dimension_bytes = Field_vector::dimension_bytes(output_dims);
if (vector_string.mem_realloc(dimension_bytes)) return 0;
bool err = from_string_to_vector(result_cstr, strlen(result_cstr),
vector_string.ptr(), &output_dims);
if (err) {
if (output_dims == Field_vector::max_dimensions) {
vector_string.replace(32, 5, "... \0", 5);
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_addition",
"Data out of range");
} else {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_addition",
"Invalid vector conversion");
}
return 0;
}
size_t output_vector_length = Field_vector::dimension_bytes(output_dims);
vector_string.length(output_vector_length);
*length = output_vector_length;
return vector_string.c_ptr_safe();
}
static char *vector_subtraction(uint32_t vec_dim, float *vec1, float *vec2,
unsigned long *length) {
std::vector<float> vector1;
std::vector<float> vector2;
populate_vector(vec_dim, vec1, vector1);
populate_vector(vec_dim, vec2, vector2);
std::vector<float> result(vector1.size());
for (size_t i = 0; i < vector1.size(); ++i) {
result[i] = vector1[i] - vector2[i];
}
std::string result_str = std_vector_to_string(result);
char *result_cstr = result_str.data();
String vector_string;
uint32 output_dims = Field_vector::max_dimensions;
auto dimension_bytes = Field_vector::dimension_bytes(output_dims);
if (vector_string.mem_realloc(dimension_bytes)) return 0;
bool err = from_string_to_vector(result_cstr, strlen(result_cstr),
vector_string.ptr(), &output_dims);
if (err) {
if (output_dims == Field_vector::max_dimensions) {
vector_string.replace(32, 5, "... \0", 5);
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_subtraction",
"Data out of range");
} else {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_subtraction",
"Invalid vector conversion");
}
return 0;
}
size_t output_vector_length = Field_vector::dimension_bytes(output_dims);
vector_string.length(output_vector_length);
*length = output_vector_length;
return vector_string.c_ptr_safe();
}
static char *vector_multiplication(uint32_t vec_dim, float *vec1, float *vec2,
unsigned long *length) {
std::vector<float> vector1;
std::vector<float> vector2;
populate_vector(vec_dim, vec1, vector1);
populate_vector(vec_dim, vec2, vector2);
std::vector<float> result(vector1.size());
for (size_t i = 0; i < vector1.size(); ++i) {
result[i] = vector1[i] * vector2[i];
}
std::string result_str = std_vector_to_string(result);
char *result_cstr = result_str.data();
String vector_string;
uint32 output_dims = Field_vector::max_dimensions;
auto dimension_bytes = Field_vector::dimension_bytes(output_dims);
if (vector_string.mem_realloc(dimension_bytes)) return 0;
bool err = from_string_to_vector(result_cstr, strlen(result_cstr),
vector_string.ptr(), &output_dims);
if (err) {
if (output_dims == Field_vector::max_dimensions) {
vector_string.replace(32, 5, "... \0", 5);
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_multiplication",
"Data out of range");
} else {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_multiplication",
"Invalid vector conversion");
}
return 0;
}
size_t output_vector_length = Field_vector::dimension_bytes(output_dims);
vector_string.length(output_vector_length);
*length = output_vector_length;
return vector_string.c_ptr_safe();
}
static char *vector_division(uint32_t vec_dim, float *vec1, float *vec2,
unsigned long *length) {
std::vector<float> vector1;
std::vector<float> vector2;
populate_vector(vec_dim, vec1, vector1);
populate_vector(vec_dim, vec2, vector2);
std::vector<float> result(vector1.size());
for (size_t i = 0; i < vector1.size(); ++i) {
if (vector2[i] == 0) {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_division",
"Division by zero is undefined");
return 0;
}
result[i] = vector1[i] / vector2[i];
}
std::string result_str = std_vector_to_string(result);
char *result_cstr = result_str.data();
String vector_string;
uint32 output_dims = Field_vector::max_dimensions;
auto dimension_bytes = Field_vector::dimension_bytes(output_dims);
if (vector_string.mem_realloc(dimension_bytes)) return 0;
bool err = from_string_to_vector(result_cstr, strlen(result_cstr),
vector_string.ptr(), &output_dims);
if (err) {
if (output_dims == Field_vector::max_dimensions) {
vector_string.replace(32, 5, "... \0", 5);
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_division",
"Data out of range");
} else {
mysql_error_service_emit_printf(mysql_service_mysql_runtime_error,
ER_UDF_ERROR, 0, "vector_division",
"Invalid vector conversion");
}
return 0;
}
size_t output_vector_length = Field_vector::dimension_bytes(output_dims);
vector_string.length(output_vector_length);
*length = output_vector_length;
return vector_string.c_ptr_safe();
}
extern REQUIRES_SERVICE_PLACEHOLDER(log_builtins);
extern REQUIRES_SERVICE_PLACEHOLDER(log_builtins_string);
extern REQUIRES_SERVICE_PLACEHOLDER(udf_registration);
extern REQUIRES_SERVICE_PLACEHOLDER(mysql_udf_metadata);
extern REQUIRES_SERVICE_PLACEHOLDER(mysql_runtime_error);
extern SERVICE_TYPE(log_builtins) * log_bi;
extern SERVICE_TYPE(log_builtins_string) * log_bs;
The code is also available on GitHub.
Once compiled, you get the component file to load:
Testing
We can load the component and test it:
In the image above, we can see how we loaded the component and then we can see that the UDFs for all four operations are available.
SQL> select vector_to_string(
vector_addition(
string_to_vector('[1,2,3]'),
string_to_vector('[4,5,6]')
)
) sum;
+---------------------------------------+
| sum |
+---------------------------------------+
| [5.00000e+00,7.00000e+00,9.00000e+00] |
+---------------------------------------+
1 row in set (0.0002 sec)
In comparison to the function developed in JavaScript in the previous article, this time we utilize the VECTOR datatype as both the input and output, we don’t use the string representation.
We can test with the same table as in the previous post:
SQL> select id, vector_addition(vec1, vec2) vec_sum from t1;
+----+----------------------------+
| id | vec_sum |
+----+----------------------------+
| 1 | 0x0000C0400000404100000040 |
| 2 | 0x0000004066662A4200001C43 |
+----+----------------------------+
2 rows in set (0.0008 sec)
SQL> select id, vector_to_string(vector_addition(vec1, vec2)) vec_sum
from t1;
+----+---------------------------------------+
| id | vec_sum |
+----+---------------------------------------+
| 1 | [6.00000e+00,1.20000e+01,2.00000e+00] |
| 2 | [2.00000e+00,4.26000e+01,1.56000e+02] |
+----+---------------------------------------+
2 rows in set (0.0003 sec)
Conclusion
The VECTOR datatype is a valuable feature of MySQL 9.0 and can be extended with ease, much like other datatypes in MySQL, using your UDFs.
Extending MySQL using the Component Architecture is relatively straightforward but more complex to build and deploy in the cloud, where creating JavaScript functions is easier with MySQL HeatWave.
Enjoy MySQL, Vectors, and coding components in C++!