Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/dmlc-core/src/io/s3_filesys.h view on Meta::CPAN
namespace dmlc {
namespace io {
/*! \brief AWS S3 filesystem */
class S3FileSystem : public FileSystem {
public:
/*! \brief destructor */
virtual ~S3FileSystem() {}
/*!
* \brief Sets AWS access credentials
* \param aws_access_id The AWS Access Key ID
* \param aws_secret_key The AWS Secret Key
* \return the information about the file
*/
void SetCredentials(const std::string& aws_access_id,
const std::string& aws_secret_key);
/*!
* \brief get information about a path
* \param path the path to the file
xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/ApplicationMaster.java view on Meta::CPAN
private int numServer = 0;
// total number of tasks
private int numTasks;
// maximum number of attempts to try in each task
private int maxNumAttempt = 3;
// command to launch
private String command = "";
// username
private String userName = "";
// user credentials
private Credentials credentials = null;
// application tracker hostname
private String appHostName = "";
// tracker URL to do
private String appTrackerUrl = "";
// tracker port
private int appTrackerPort = 0;
// whether we start to abort the application, due to whatever fatal reasons
private boolean startAbort = false;
// worker resources
xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/ApplicationMaster.java view on Meta::CPAN
//add the blacklist
private Collection<String> blackList = new java.util.HashSet();
public static void main(String[] args) throws Exception {
new ApplicationMaster().run(args);
}
private ApplicationMaster() throws IOException {
dfs = FileSystem.get(conf);
userName = UserGroupInformation.getCurrentUser().getShortUserName();
credentials = UserGroupInformation.getCurrentUser().getCredentials();
}
/**
* setup security token given current user
* @return the ByeBuffer containing the security tokens
* @throws IOException
*/
private ByteBuffer setupTokens() {
try {
DataOutputBuffer dob = new DataOutputBuffer();
credentials.writeTokenStorageToStream(dob);
return ByteBuffer.wrap(dob.getData(), 0, dob.getLength()).duplicate();
} catch (IOException e) {
throw new RuntimeException(e); // TODO: FIXME
}
}
/**
* get integer argument from environment variable
*
xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/Client.java view on Meta::CPAN
// cached maps
private Map<String, String> cacheFiles = new java.util.HashMap<String, String>();
// enviroment variable to setup cachefiles
private String cacheFileArg = "";
// args to pass to application master
private String appArgs = "";
// HDFS Path to store temporal result
private String tempdir = "/tmp";
// user name
private String userName = "";
// user credentials
private Credentials credentials = null;
// job name
private String jobName = "";
// queue
private String queue = "default";
// ApplicationMaster classpath
private String appCp = null;
// ApplicationMaster env
private Map<String, String> env = new java.util.HashMap<String, String>();
/**
* constructor
* @throws IOException
*/
private Client() throws IOException {
conf.addResource(new Path(System.getenv("HADOOP_CONF_DIR") +"/core-site.xml"));
conf.addResource(new Path(System.getenv("HADOOP_CONF_DIR") +"/hdfs-site.xml"));
dfs = FileSystem.get(conf);
userName = UserGroupInformation.getCurrentUser().getShortUserName();
credentials = UserGroupInformation.getCurrentUser().getCredentials();
}
/**
* setup security token given current user
* @return the ByeBuffer containing the security tokens
* @throws IOException
*/
private ByteBuffer setupTokens() throws IOException {
DataOutputBuffer buffer = new DataOutputBuffer();
String loc = System.getenv().get("HADOOP_TOKEN_FILE_LOCATION");
if ((loc != null && loc.trim().length() > 0)
|| (!UserGroupInformation.isSecurityEnabled())) {
this.credentials.writeTokenStorageToStream(buffer);
} else {
// Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
Credentials credentials = new Credentials();
String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
if (tokenRenewer == null || tokenRenewer.length() == 0) {
throw new IOException(
"Can't get Master Kerberos principal for the RM to use as renewer");
}
// For now, only getting tokens for the default file-system.
final Token<?> tokens[] = dfs.addDelegationTokens(tokenRenewer, credentials);
if (tokens != null) {
for (Token<?> token : tokens) {
LOG.info("Got dt for " + dfs.getUri() + "; " + token);
}
}
credentials.writeTokenStorageToStream(buffer);
}
return ByteBuffer.wrap(buffer.getData(), 0, buffer.getLength());
}
/**
* setup all the cached files
*
* @param fmaps
* the file maps
* @return the resource map
xgboost/doc/tutorials/aws_yarn.md view on Meta::CPAN
===============================
This is a step-by-step tutorial on how to setup and run distributed [XGBoost](https://github.com/dmlc/xgboost)
on an AWS EC2 cluster. Distributed XGBoost runs on various platforms such as MPI, SGE and Hadoop YARN.
In this tutorial, we use YARN as an example since this is a widely used solution for distributed computing.
Prerequisite
------------
We need to get a [AWS key-pair](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html)
to access the AWS services. Let us assume that we are using a key ```mykey``` and the corresponding permission file ```mypem.pem```.
We also need [AWS credentials](http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html),
which includes an `ACCESS_KEY_ID` and a `SECRET_ACCESS_KEY`.
Finally, we will need a S3 bucket to host the data and the model, ```s3://mybucket/```
Setup a Hadoop YARN Cluster
---------------------------
This sections shows how to start a Hadoop YARN cluster from scratch.
You can skip this step if you have already have one.
We will be using [yarn-ec2](https://github.com/tqchen/yarn-ec2) to start the cluster.
( run in 0.242 second using v1.01-cache-2.11-cpan-4d50c553e7e )