Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/plugin/updater_gpu/src/exact/argmax_by_key.cuh view on Meta::CPAN
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../../../../src/tree/param.h"
#include "../common.cuh"
#include "node.cuh"
#include "../types.cuh"
namespace xgboost {
namespace tree {
namespace exact {
/**
* @enum ArgMaxByKeyAlgo best_split_evaluation.cuh
* @brief Help decide which algorithm to use for multi-argmax operation
*/
enum ArgMaxByKeyAlgo {
/** simplest, use gmem-atomics for all updates */
ABK_GMEM = 0,
/** use smem-atomics for updates (when number of keys are less) */
ABK_SMEM
};
/** max depth until which to use shared mem based atomics for argmax */
static const int MAX_ABK_LEVELS = 3;
HOST_DEV_INLINE Split maxSplit(Split a, Split b) {
Split out;
if (a.score < b.score) {
out.score = b.score;
out.index = b.index;
} else if (a.score == b.score) {
out.score = a.score;
out.index = (a.index < b.index) ? a.index : b.index;
} else {
out.score = a.score;
out.index = a.index;
}
return out;
}
DEV_INLINE void atomicArgMax(Split* address, Split val) {
unsigned long long* intAddress = (unsigned long long*)address;
unsigned long long old = *intAddress;
unsigned long long assumed;
do {
assumed = old;
Split res = maxSplit(val, *(Split*)&assumed);
old = atomicCAS(intAddress, assumed, *(uint64_t*)&res);
} while (assumed != old);
}
template <typename node_id_t>
DEV_INLINE void argMaxWithAtomics(
int id, Split* nodeSplits, const bst_gpair* gradScans,
const bst_gpair* gradSums, const float* vals, const int* colIds,
const node_id_t* nodeAssigns, const Node<node_id_t>* nodes, int nUniqKeys,
node_id_t nodeStart, int len, const GPUTrainingParam& param) {
int nodeId = nodeAssigns[id];
///@todo: this is really a bad check! but will be fixed when we move
/// to key-based reduction
if ((id == 0) ||
!((nodeId == nodeAssigns[id - 1]) && (colIds[id] == colIds[id - 1]) &&
(vals[id] == vals[id - 1]))) {
if (nodeId != UNUSED_NODE) {
int sumId = abs2uniqKey(id, nodeAssigns, colIds, nodeStart, nUniqKeys);
bst_gpair colSum = gradSums[sumId];
int uid = nodeId - nodeStart;
Node<node_id_t> n = nodes[nodeId];
bst_gpair parentSum = n.gradSum;
float parentGain = n.score;
bool tmp;
Split s;
bst_gpair missing = parentSum - colSum;
s.score = loss_chg_missing(gradScans[id], missing, parentSum, parentGain,
( run in 0.758 second using v1.01-cache-2.11-cpan-39bf76dae61 )