Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/dmlc-core/src/io/s3_filesys.h  view on Meta::CPAN


namespace dmlc {
namespace io {
/*! \brief AWS S3 filesystem */
class S3FileSystem : public FileSystem {
 public:
  /*! \brief destructor */
  virtual ~S3FileSystem() {}

  /*!
   * \brief Sets AWS access credentials
   * \param aws_access_id The AWS Access Key ID
   * \param aws_secret_key The AWS Secret Key
   * \return the information about the file
   */
  void SetCredentials(const std::string& aws_access_id,
                      const std::string& aws_secret_key);

  /*!
   * \brief get information about a path
   * \param path the path to the file

xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/ApplicationMaster.java  view on Meta::CPAN

    private int numServer = 0;
    // total number of tasks
    private int numTasks;
    // maximum number of attempts to try in each task
    private int maxNumAttempt = 3;
    // command to launch
    private String command = "";

    // username
    private String userName = "";
    // user credentials
    private Credentials credentials = null;
    // application tracker hostname
    private String appHostName = "";
    // tracker URL to do
    private String appTrackerUrl = "";
    // tracker port
    private int appTrackerPort = 0;

    // whether we start to abort the application, due to whatever fatal reasons
    private boolean startAbort = false;
    // worker resources

xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/ApplicationMaster.java  view on Meta::CPAN

    //add the blacklist
    private Collection<String> blackList = new java.util.HashSet();

    public static void main(String[] args) throws Exception {
        new ApplicationMaster().run(args);
    }

    private ApplicationMaster() throws IOException {
        dfs = FileSystem.get(conf);
        userName = UserGroupInformation.getCurrentUser().getShortUserName();
        credentials = UserGroupInformation.getCurrentUser().getCredentials();
    }


    /**
     * setup security token given current user
     * @return the ByeBuffer containing the security tokens
     * @throws IOException
     */
    private ByteBuffer setupTokens() {
        try {
            DataOutputBuffer dob = new DataOutputBuffer();
            credentials.writeTokenStorageToStream(dob);
            return ByteBuffer.wrap(dob.getData(), 0, dob.getLength()).duplicate();
        } catch (IOException e) {
            throw new RuntimeException(e);  // TODO: FIXME
        }
    }


    /**
     * get integer argument from environment variable
     *

xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/Client.java  view on Meta::CPAN

    // cached maps
    private Map<String, String> cacheFiles = new java.util.HashMap<String, String>();
    // enviroment variable to setup cachefiles
    private String cacheFileArg = "";
    // args to pass to application master
    private String appArgs = "";
    // HDFS Path to store temporal result
    private String tempdir = "/tmp";
    // user name
    private String userName = "";
    // user credentials
    private Credentials credentials = null;
    // job name
    private String jobName = "";
    // queue
    private String queue = "default";
    // ApplicationMaster classpath
    private String appCp = null;
    // ApplicationMaster env
    private Map<String, String> env = new java.util.HashMap<String, String>();

    /**
     * constructor
     * @throws IOException
     */
    private Client() throws IOException {
        conf.addResource(new Path(System.getenv("HADOOP_CONF_DIR") +"/core-site.xml"));
        conf.addResource(new Path(System.getenv("HADOOP_CONF_DIR") +"/hdfs-site.xml"));
        dfs = FileSystem.get(conf);
        userName = UserGroupInformation.getCurrentUser().getShortUserName();
        credentials = UserGroupInformation.getCurrentUser().getCredentials();
    }

    /**
     * setup security token given current user
     * @return the ByeBuffer containing the security tokens
     * @throws IOException
     */
    private ByteBuffer setupTokens() throws IOException {
        DataOutputBuffer buffer = new DataOutputBuffer();
        String loc = System.getenv().get("HADOOP_TOKEN_FILE_LOCATION");
        if ((loc != null && loc.trim().length() > 0)
        ||  (!UserGroupInformation.isSecurityEnabled())) {
            this.credentials.writeTokenStorageToStream(buffer);
        } else {
            // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
            Credentials credentials = new Credentials();
            String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
            if (tokenRenewer == null || tokenRenewer.length() == 0) {
                throw new IOException(
                "Can't get Master Kerberos principal for the RM to use as renewer");
            }

            // For now, only getting tokens for the default file-system.
            final Token<?> tokens[] = dfs.addDelegationTokens(tokenRenewer, credentials);
            if (tokens != null) {
                for (Token<?> token : tokens) {
                    LOG.info("Got dt for " + dfs.getUri() + "; " + token);
                }
            }
            credentials.writeTokenStorageToStream(buffer);
        }
        return ByteBuffer.wrap(buffer.getData(), 0, buffer.getLength());
    }

    /**
     * setup all the cached files
     *
     * @param fmaps
     *            the file maps
     * @return the resource map

xgboost/doc/tutorials/aws_yarn.md  view on Meta::CPAN

===============================
This is a step-by-step tutorial on how to setup and run distributed [XGBoost](https://github.com/dmlc/xgboost)
on an AWS EC2 cluster. Distributed XGBoost runs on various platforms such as MPI, SGE and Hadoop YARN.
In this tutorial, we use YARN as an example since this is a widely used solution for distributed computing.

Prerequisite
------------
We need to get a [AWS key-pair](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html)
to access the AWS services. Let us assume that we are using a key ```mykey``` and  the corresponding permission file ```mypem.pem```.

We also need [AWS credentials](http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html),
which includes an `ACCESS_KEY_ID` and a `SECRET_ACCESS_KEY`.

Finally, we will need a S3 bucket to host the data and the model, ```s3://mybucket/```

Setup a Hadoop YARN Cluster
---------------------------
This sections shows how to start a Hadoop YARN cluster from scratch.
You can skip this step if you have already have one.
We will be using [yarn-ec2](https://github.com/tqchen/yarn-ec2) to start the cluster.



( run in 0.242 second using v1.01-cache-2.11-cpan-4d50c553e7e )