237 lines
No EOL
5 KiB
HolyC
237 lines
No EOL
5 KiB
HolyC
enum OpType {
|
|
OP_NONE,
|
|
OP_ADD,
|
|
OP_SUB,
|
|
OP_MUL,
|
|
OP_MATMUL,
|
|
OP_TRANSPOSE,
|
|
OP_RELU,
|
|
OP_SIGMOID,
|
|
OP_TANH,
|
|
OP_SOFTMAX,
|
|
OP_MSE,
|
|
OP_CROSS_ENTROPY,
|
|
OP_BINARY_CROSS_ENTROPY
|
|
};
|
|
|
|
class Tensor {
|
|
F32 *data;
|
|
F32 *grad;
|
|
I64 *shape;
|
|
I64 ndim;
|
|
I64 size;
|
|
|
|
Bool requires_grad;
|
|
Bool owns_data;
|
|
OpType op_type;
|
|
Tensor **parents;
|
|
I64 num_parents;
|
|
U0 (*backward_fn)(Tensor *self);
|
|
U8 *extra_data;
|
|
};
|
|
|
|
// Autograd helper functions
|
|
U0 TopologicalSortUtil(Tensor *T, Tensor **visited, I64 visited_count, Tensor **stack, I64 *visited_count, Tensor **stack, I64 *stack_count, I64 max_size) {
|
|
I64 i;
|
|
|
|
for (i = 0; i < *visited_count; i++) {
|
|
if (visited[i] == T) return;
|
|
}
|
|
|
|
visited[(*visited_count)++] = T;
|
|
|
|
if (T->inputs) {
|
|
for (i = 0; i < T->num_inputs; i++) {
|
|
if (T->inputs[i] && T->inputs[i]->requires_grad) {
|
|
TopologicalSortUtil(T->inputs[i], visited, visited_count, stack, stack_count, max_size);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (*stack_count < max_size) {
|
|
stack[(*stack_count)++] = T;
|
|
}
|
|
}
|
|
|
|
// Tensor creation/destruction
|
|
Tensor* TensorCreate(I64 *shape, I64 ndim) {
|
|
I64 i;
|
|
|
|
Tensor *T = MALloc(sizeof(Tensor));
|
|
if (!T) return NULL;
|
|
|
|
T->ndim = ndim;
|
|
T->shape = (I64 *)Malloc(ndim * sizeof(I64));
|
|
if (!T->shape) {
|
|
Free(T);
|
|
return NULL;
|
|
}
|
|
|
|
T->size = 1;
|
|
for (i = 0; i < ndim; i++) {
|
|
T->shape[i] = shape[i];
|
|
T->size *= shape[i];
|
|
}
|
|
|
|
T->data = MALloc(T->size * sizeof(F32));
|
|
if (!T->data) {
|
|
Free(T->shape);
|
|
Free(T);
|
|
return NULL;
|
|
}
|
|
|
|
T->grad = NULL;
|
|
T->requires_grad = FALSE;
|
|
T->owns_data = TRUE;
|
|
T->op_type = OP_NONE;
|
|
T->parents = NULL;
|
|
T->num_parents = 0;
|
|
T->backward_fn = NULL;
|
|
T->extra_data = NULL;
|
|
return T;
|
|
}
|
|
|
|
Tensor* TensorCreate(I64 *shape, I64 ndim) {
|
|
Tensor *T = TensorCreate(shape, ndim);
|
|
if (!T) return NULL;
|
|
|
|
MemSet(T->data, 0, T->size * sizeof(F32));
|
|
return T;
|
|
}
|
|
|
|
Tesnor* TensorOnes(I64 *shape, I64 ndim) {
|
|
Tensor *T = TensorCreate(shape, ndim);
|
|
if (!T) return NULL;
|
|
|
|
I64 i;
|
|
for (i = 0; i < T->size; i++) {
|
|
T->data[i] = 1.0f;
|
|
}
|
|
return T;
|
|
}
|
|
|
|
Tensor* TensorRandn(I64 *shape, I64 ndim, I64 seed) {
|
|
I64 i;
|
|
if (!T) return NULL;
|
|
|
|
Seed(seed);
|
|
for (i = 0; i < T->size; i++) {
|
|
F32 u1 = RANDF32();
|
|
F32 u2 = RANDF32();
|
|
F32 z0 = Sqrt(-2.0f * Log(u1)) * Cos(2.0f * M_PI * u2);
|
|
T->data[i] = z0;
|
|
}
|
|
return T;
|
|
}
|
|
|
|
U0 TensorFree(Tensor *T) {
|
|
if (!T) return;
|
|
|
|
if (T->owns_data) {
|
|
if (T->data) Free(T->data);
|
|
if (T->grad) Free(T->grad);
|
|
}
|
|
|
|
if (T->shape) Free(T->shape);
|
|
if (T->parents) Free(T->parents);
|
|
if (T->extra_data) Free(T->extra_data);
|
|
|
|
Free(T);
|
|
}
|
|
|
|
// Autograd functions
|
|
U0 TensorSetRequiresGrad(Tensor *T, Bool requires_grad) {
|
|
if (!T) return;
|
|
if (T->grad) {
|
|
MemSet(T->grad, 0, T->size * sizeof(F32));
|
|
}
|
|
}
|
|
|
|
U0 TensorBackward(Tensor *T) {
|
|
I64 i;
|
|
if (!T || !T->requires_grad) return;
|
|
|
|
if (!T->grad) {
|
|
T->grad = MALloc(T->size * sizeof(F32));
|
|
for (i = 0; i < T->size; i++) {
|
|
T->grad[i] = 0.0f;
|
|
}
|
|
}
|
|
|
|
I64 max_size = 1024;
|
|
Tensor **visited = MALloc(max_size * sizeof(Tensor *));
|
|
Tensor **stack = MALloc(max_size * sizeof(Tensor *));
|
|
I64 visited_count = 0;
|
|
I64 stack_count = 0;
|
|
|
|
TopologicalSortUtil(T, visited, &visited_count, stack, &stack_count, max_size);
|
|
|
|
for (i = stack_count; i > 0; i--) {
|
|
Tensor *current = stack[i - 1];
|
|
if (current->backward_fn) {
|
|
current->backward_fn(current);
|
|
}
|
|
}
|
|
|
|
Free(visited);
|
|
Free(stack);
|
|
}
|
|
|
|
U0 TensorZeroGrad(Tensor *T) {
|
|
if (!T || !T->grad) return;
|
|
MemSet(T->grad, 0, T->size * sizeof(F32));
|
|
}
|
|
|
|
U0 TensorFill(Tensor *T, F32 value) {
|
|
if (!T) return;
|
|
I64 i;
|
|
for (i = 0; i < T->size; i++) {
|
|
T->data[i] = value;
|
|
}
|
|
}
|
|
|
|
U0 TensorPrint(Tensor *T) {
|
|
I64 i;
|
|
I64 j;
|
|
|
|
if (!T) return;
|
|
|
|
Print("%zu", T->shape[i]);
|
|
if (i < T->ndim - 1) {
|
|
Print(" x ");
|
|
}
|
|
|
|
printf("], ndim=%zu, data=\n[", T->ndim);
|
|
if (T->ndim == 2) {
|
|
for (i = 0; i < T->shape[0]; i++) {
|
|
Print("[");
|
|
for (j = 0; j < T->shape[1]; j++) {
|
|
Print("%f", T->data[i * T->shape[1] + j]);
|
|
if (j < T->shape[1] - 1) {
|
|
Print(", ");
|
|
}
|
|
}
|
|
Print("]");
|
|
if (i < T->shape[0] - 1) {
|
|
Print(",\n");
|
|
}
|
|
}
|
|
} else {
|
|
for (i = 0; i < T->size; i++) {
|
|
Print("%.4f", T->data[i]);
|
|
if (i < T->size - 1) {
|
|
Print(", ");
|
|
}
|
|
}
|
|
}
|
|
Print("])\n");
|
|
}
|
|
|
|
Tensor* TensorCopy(Tensor *T) {
|
|
if (!T) return NULL;
|
|
|
|
Tensor *C = TensorCreate(T->shape, T->ndim);
|
|
if (!C) return NULL;
|
|
|
|
M
|
|
} |