XGBoostEstimator

Instance Constructors

new XGBoostEstimator(uid: String)
new XGBoostEstimator(xgboostParams: Map[String, Any])

Value Members

final def !=(arg0: Any): Boolean

Definition Classes

AnyRef → Any
final def ##(): Int

Definition Classes

AnyRef → Any
final def $[T](param: Param[T]): T

Attributes

protected

Definition Classes

Params
final def ==(arg0: Any): Boolean

Definition Classes

AnyRef → Any
val alpha: DoubleParam

L1 regularization term on weights, increase this value will make model more conservative.

L1 regularization term on weights, increase this value will make model more conservative. [default=0]

Definition Classes

BoosterParams
final def asInstanceOf[T0]: T0

Definition Classes

Any
val baseScore: DoubleParam

the initial prediction score of all instances, global bias.

the initial prediction score of all instances, global bias. default=0.5

Definition Classes

LearningTaskParams
val boosterType: Param[String]

Booster to use, options: {'gbtree', 'gblinear', 'dart'}

Booster to use, options: {'gbtree', 'gblinear', 'dart'}

Definition Classes

BoosterParams
final def clear(param: Param[_]): XGBoostEstimator.this.type

Definition Classes

Params
def clone(): AnyRef

Attributes

protected[java.lang]

Definition Classes

AnyRef

Annotations

@throws( ... )
val colSampleByLevel: DoubleParam

subsample ratio of columns for each split, in each level.

subsample ratio of columns for each split, in each level. [default=1] range: (0,1]

Definition Classes

BoosterParams
val colSampleByTree: DoubleParam

subsample ratio of columns when constructing each tree.

subsample ratio of columns when constructing each tree. [default=1] range: (0,1]

Definition Classes

BoosterParams
def copy(extra: ParamMap): XGBoostEstimator

Definition Classes

XGBoostEstimator → Predictor → Estimator → PipelineStage → Params
def copyValues[T <: class="extype" name="org.apache.spark.ml.param.Params">Params](to: T, extra: ParamMap): T

Attributes

protected

Definition Classes

Params
val customEval: Param[EvalTrait]

customized evaluation function provided by user.

customized evaluation function provided by user. default: null

Definition Classes

GeneralParams
val customObj: Param[ObjectiveTrait]

customized objective function provided by user.

customized objective function provided by user. default: null

Definition Classes

GeneralParams
final def defaultCopy[T <: class="extype" name="org.apache.spark.ml.param.Params">Params](extra: ParamMap): T

Attributes

protected

Definition Classes

Params
final def eq(arg0: AnyRef): Boolean

Definition Classes

AnyRef
def equals(arg0: Any): Boolean

Definition Classes

AnyRef → Any
val eta: DoubleParam

step size shrinkage used in update to prevents overfitting.

step size shrinkage used in update to prevents overfitting. After each boosting step, we can directly get the weights of new features and eta actually shrinks the feature weights to make the boosting process more conservative. [default=0.3] range: [0,1]

Definition Classes

BoosterParams
val evalMetric: Param[String]

evaluation metrics for validation data, a default metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking).

evaluation metrics for validation data, a default metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). options: rmse, mae, logloss, error, merror, mlogloss, auc, ndcg, map, gamma-deviance

Definition Classes

LearningTaskParams
def explainParam(param: Param[_]): String

Definition Classes

Params
def explainParams(): String

Explains all params of this instance.

Explains all params of this instance. See explainParam().

Definition Classes

BoosterParams → Params
def extractLabeledPoints(dataset: Dataset[_]): RDD[org.apache.spark.ml.feature.LabeledPoint]

Attributes

protected

Definition Classes

Predictor
final def extractParamMap(): ParamMap

Definition Classes

Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes

Params
final val featuresCol: Param[String]

Definition Classes

HasFeaturesCol
def finalize(): Unit

Attributes

protected[java.lang]

Definition Classes

AnyRef

Annotations

@throws( classOf[java.lang.Throwable] )
def fit(dataset: Dataset[_]): XGBoostModel

Definition Classes

Predictor → Estimator
def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[XGBoostModel]

Definition Classes

Estimator

Annotations

@Since( "2.0.0" )
def fit(dataset: Dataset[_], paramMap: ParamMap): XGBoostModel

Definition Classes

Estimator

Annotations

@Since( "2.0.0" )
def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): XGBoostModel

Definition Classes

Estimator

Annotations

@Since( "2.0.0" ) @varargs()
val gamma: DoubleParam

minimum loss reduction required to make a further partition on a leaf node of the tree.

minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. [default=0] range: [0, Double.MaxValue]

Definition Classes

BoosterParams
final def get[T](param: Param[T]): Option[T]

Definition Classes

Params
final def getClass(): Class[_]

Definition Classes

AnyRef → Any
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes

Params
final def getFeaturesCol: String

Definition Classes

HasFeaturesCol
final def getLabelCol: String

Definition Classes

HasLabelCol
final def getOrDefault[T](param: Param[T]): T

Definition Classes

Params
def getParam(paramName: String): Param[Any]

Definition Classes

Params
final def getPredictionCol: String

Definition Classes

HasPredictionCol
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes

Params
def hasParam(paramName: String): Boolean

Definition Classes

Params
def hashCode(): Int

Definition Classes

AnyRef → Any
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes

protected

Definition Classes

Logging
final def isDefined(param: Param[_]): Boolean

Definition Classes

Params
final def isInstanceOf[T0]: Boolean

Definition Classes

Any
final def isSet(param: Param[_]): Boolean

Definition Classes

Params
def isTraceEnabled(): Boolean

Attributes

protected

Definition Classes

Logging
final val labelCol: Param[String]

Definition Classes

HasLabelCol
val lambda: DoubleParam

L2 regularization term on weights, increase this value will make model more conservative.

L2 regularization term on weights, increase this value will make model more conservative. [default=1]

Definition Classes

BoosterParams
val lambdaBias: DoubleParam

Parameter of linear booster L2 regularization term on bias, default 0(no L1 reg on bias because it is not important)

Parameter of linear booster L2 regularization term on bias, default 0(no L1 reg on bias because it is not important)

Definition Classes

BoosterParams
def log: Logger

Attributes

protected

Definition Classes

Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logDebug(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logError(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logInfo(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logName: String

Attributes

protected

Definition Classes

Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logTrace(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logWarning(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
val maxDeltaStep: DoubleParam

Maximum delta step we allow each tree's weight estimation to be.

Maximum delta step we allow each tree's weight estimation to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update. [default=0] range: [0, Double.MaxValue]

Definition Classes

BoosterParams
val maxDepth: IntParam

maximum depth of a tree, increase this value will make model more complex / likely to be overfitting.

maximum depth of a tree, increase this value will make model more complex / likely to be overfitting. [default=6] range: [1, Int.MaxValue]

Definition Classes

BoosterParams
val minChildWeight: DoubleParam

minimum sum of instance weight(hessian) needed in a child.

minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. [default=1] range: [0, Double.MaxValue]

Definition Classes

BoosterParams
val missing: FloatParam

the value treated as missing.

the value treated as missing. default: Float.NaN

Definition Classes

GeneralParams
val nWorkers: IntParam

number of workers used to train xgboost model.

number of workers used to train xgboost model. default: 1

Definition Classes

GeneralParams
final def ne(arg0: AnyRef): Boolean

Definition Classes

AnyRef
val normalizeType: Param[String]

Parameter of Dart booster.

Parameter of Dart booster. type of normalization algorithm, options: {'tree', 'forest'}. [default="tree"]

Definition Classes

BoosterParams
final def notify(): Unit

Definition Classes

AnyRef
final def notifyAll(): Unit

Definition Classes

AnyRef
val numThreadPerTask: IntParam

number of threads used by per worker.

number of threads used by per worker. default 1

Definition Classes

GeneralParams
val objective: Param[String]

Specify the learning task and the corresponding learning objective.

Specify the learning task and the corresponding learning objective. options: reg:linear, reg:logistic, binary:logistic, binary:logitraw, count:poisson, multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:linear

Definition Classes

LearningTaskParams
lazy val params: Array[Param[_]]

Definition Classes

Params
final val predictionCol: Param[String]

Definition Classes

HasPredictionCol
val rateDrop: DoubleParam

Parameter of Dart booster.

Parameter of Dart booster. dropout rate. [default=0.0] range: [0.0, 1.0]

Definition Classes

BoosterParams
val round: IntParam

The number of rounds for boosting

The number of rounds for boosting

Definition Classes

GeneralParams
val sampleType: Param[String]

Parameter for Dart booster.

Parameter for Dart booster. Type of sampling algorithm. "uniform": dropped trees are selected uniformly. "weighted": dropped trees are selected in proportion to weight. [default="uniform"]

Definition Classes

BoosterParams
val scalePosWeight: DoubleParam

Control the balance of positive and negative weights, useful for unbalanced classes.

Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases). [default=0]

Definition Classes

BoosterParams
final def set(paramPair: ParamPair[_]): XGBoostEstimator.this.type

Attributes

protected

Definition Classes

Params
final def set(param: String, value: Any): XGBoostEstimator.this.type

Attributes

protected

Definition Classes

Params
final def set[T](param: Param[T], value: T): XGBoostEstimator.this.type

Definition Classes

Params
final def setDefault(paramPairs: ParamPair[_]*): XGBoostEstimator.this.type

Attributes

protected

Definition Classes

Params
final def setDefault[T](param: Param[T], value: T): XGBoostEstimator.this.type

Attributes

protected

Definition Classes

Params
def setFeaturesCol(value: String): XGBoostEstimator

Definition Classes

Predictor
def setLabelCol(value: String): XGBoostEstimator

Definition Classes

Predictor
def setPredictionCol(value: String): XGBoostEstimator

Definition Classes

Predictor
val silent: IntParam

0 means printing running messages, 1 means silent mode.

0 means printing running messages, 1 means silent mode. default: 0

Definition Classes

GeneralParams
val sketchEps: DoubleParam

This is only used for approximate greedy algorithm.

This is only used for approximate greedy algorithm. This roughly translated into O(1 / sketch_eps) number of bins. Compared to directly select number of bins, this comes with theoretical guarantee with sketch accuracy. [default=0.03] range: (0, 1)

Definition Classes

BoosterParams
val skipDrop: DoubleParam

Parameter of Dart booster.

Parameter of Dart booster. probability of skip dropout. If a dropout is skipped, new trees are added in the same manner as gbtree. [default=0.0] range: [0.0, 1.0]

Definition Classes

BoosterParams
val subSample: DoubleParam

subsample ratio of the training instance.

subsample ratio of the training instance. Setting it to 0.5 means that XGBoost randomly collected half of the data instances to grow trees and this will prevent overfitting. [default=1] range:(0,1]

Definition Classes

BoosterParams
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes

AnyRef
def toString(): String

Definition Classes

Identifiable → AnyRef → Any
def train(trainingSet: Dataset[_]): XGBoostModel

produce a XGBoostModel by fitting the given dataset

produce a XGBoostModel by fitting the given dataset

Definition Classes

XGBoostEstimator → Predictor
def transformSchema(schema: StructType): StructType

Definition Classes

Predictor → PipelineStage
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes

protected

Definition Classes

PipelineStage

Annotations

@DeveloperApi()
val treeMethod: Param[String]

The tree construction algorithm used in XGBoost.

The tree construction algorithm used in XGBoost. options: {'auto', 'exact', 'approx'} [default='auto']

Definition Classes

BoosterParams
val uid: String

Definition Classes

XGBoostEstimator → Identifiable
val useExternalMemory: BooleanParam

whether to use external memory as cache.

whether to use external memory as cache. default: false

Definition Classes

GeneralParams
def validateAndTransformSchema(schema: StructType, fitting: Boolean, featuresDataType: DataType): StructType

Attributes

protected

Definition Classes

PredictorParams
final def wait(): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )

Deprecated Value Members

def validateParams(): Unit

Definition Classes

Params

Annotations

@deprecated

Deprecated

(Since version 2.0.0) Will be removed in 2.1.0. Checks should be merged into transformSchema.

Related Doc: package spark

class XGBoostEstimator extends Predictor[Vector, XGBoostEstimator, XGBoostModel] with LearningTaskParams with GeneralParams with BoosterParams

Instance Constructors

new XGBoostEstimator(uid: String)

new XGBoostEstimator(xgboostParams: Map[String, Any])

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def $[T](param: Param[T]): T

final def ==(arg0: Any): Boolean

val alpha: DoubleParam

final def asInstanceOf[T0]: T0

val baseScore: DoubleParam

val boosterType: Param[String]

final def clear(param: Param[_]): XGBoostEstimator.this.type

def clone(): AnyRef

val colSampleByLevel: DoubleParam

val colSampleByTree: DoubleParam

def copy(extra: ParamMap): XGBoostEstimator

def copyValues[T <: class="extype" name="org.apache.spark.ml.param.Params">Params](to: T, extra: ParamMap): T

val customEval: Param[EvalTrait]

val customObj: Param[ObjectiveTrait]

final def defaultCopy[T <: class="extype" name="org.apache.spark.ml.param.Params">Params](extra: ParamMap): T

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

val eta: DoubleParam

val evalMetric: Param[String]

def explainParam(param: Param[_]): String

def explainParams(): String

def extractLabeledPoints(dataset: Dataset[_]): RDD[org.apache.spark.ml.feature.LabeledPoint]

final def extractParamMap(): ParamMap

final def extractParamMap(extra: ParamMap): ParamMap

final val featuresCol: Param[String]

def finalize(): Unit

def fit(dataset: Dataset[_]): XGBoostModel

def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[XGBoostModel]

def fit(dataset: Dataset[_], paramMap: ParamMap): XGBoostModel

def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): XGBoostModel

val gamma: DoubleParam

final def get[T](param: Param[T]): Option[T]

final def getClass(): Class[_]

final def getDefault[T](param: Param[T]): Option[T]

final def getFeaturesCol: String

final def getLabelCol: String

final def getOrDefault[T](param: Param[T]): T

def getParam(paramName: String): Param[Any]

final def getPredictionCol: String

final def hasDefault[T](param: Param[T]): Boolean

def hasParam(paramName: String): Boolean

def hashCode(): Int

def initializeLogIfNecessary(isInterpreter: Boolean): Unit

final def isDefined(param: Param[_]): Boolean

final def isInstanceOf[T0]: Boolean

final def isSet(param: Param[_]): Boolean

def isTraceEnabled(): Boolean

final val labelCol: Param[String]

val lambda: DoubleParam

val lambdaBias: DoubleParam

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

val maxDeltaStep: DoubleParam

val maxDepth: IntParam

val minChildWeight: DoubleParam

val missing: FloatParam

val nWorkers: IntParam

final def ne(arg0: AnyRef): Boolean

val normalizeType: Param[String]

final def notify(): Unit

final def notifyAll(): Unit