Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/plugin/updater_gpu/src/exact/argmax_by_key.cuh  view on Meta::CPAN

/*
 * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#pragma once

#include "../../../../src/tree/param.h"
#include "../common.cuh"
#include "node.cuh"
#include "../types.cuh"

namespace xgboost {
namespace tree {
namespace exact {

/**
 * @enum ArgMaxByKeyAlgo best_split_evaluation.cuh
 * @brief Help decide which algorithm to use for multi-argmax operation
 */
enum ArgMaxByKeyAlgo {
  /** simplest, use gmem-atomics for all updates */
  ABK_GMEM = 0,
  /** use smem-atomics for updates (when number of keys are less) */
  ABK_SMEM
};

/** max depth until which to use shared mem based atomics for argmax */
static const int MAX_ABK_LEVELS = 3;

HOST_DEV_INLINE Split maxSplit(Split a, Split b) {
  Split out;
  if (a.score < b.score) {
    out.score = b.score;
    out.index = b.index;
  } else if (a.score == b.score) {
    out.score = a.score;
    out.index = (a.index < b.index) ? a.index : b.index;
  } else {
    out.score = a.score;
    out.index = a.index;
  }
  return out;
}

DEV_INLINE void atomicArgMax(Split* address, Split val) {
  unsigned long long* intAddress = (unsigned long long*)address;
  unsigned long long old = *intAddress;
  unsigned long long assumed;
  do {
    assumed = old;
    Split res = maxSplit(val, *(Split*)&assumed);
    old = atomicCAS(intAddress, assumed, *(uint64_t*)&res);
  } while (assumed != old);
}

template <typename node_id_t>
DEV_INLINE void argMaxWithAtomics(
    int id, Split* nodeSplits, const bst_gpair* gradScans,
    const bst_gpair* gradSums, const float* vals, const int* colIds,
    const node_id_t* nodeAssigns, const Node<node_id_t>* nodes, int nUniqKeys,
    node_id_t nodeStart, int len, const  GPUTrainingParam& param) {
  int nodeId = nodeAssigns[id];
  ///@todo: this is really a bad check! but will be fixed when we move
  ///   to key-based reduction
  if ((id == 0) ||
      !((nodeId == nodeAssigns[id - 1]) && (colIds[id] == colIds[id - 1]) &&
        (vals[id] == vals[id - 1]))) {
    if (nodeId != UNUSED_NODE) {
      int sumId = abs2uniqKey(id, nodeAssigns, colIds, nodeStart, nUniqKeys);
      bst_gpair colSum = gradSums[sumId];
      int uid = nodeId - nodeStart;
      Node<node_id_t> n = nodes[nodeId];
      bst_gpair parentSum = n.gradSum;
      float parentGain = n.score;
      bool tmp;
      Split s;
      bst_gpair missing = parentSum - colSum;
      s.score = loss_chg_missing(gradScans[id], missing, parentSum, parentGain,



( run in 0.758 second using v1.01-cache-2.11-cpan-39bf76dae61 )