You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1182 lines
74 KiB
1182 lines
74 KiB
<html><body>
|
|
<style>
|
|
|
|
body, h1, h2, h3, div, span, p, pre, a {
|
|
margin: 0;
|
|
padding: 0;
|
|
border: 0;
|
|
font-weight: inherit;
|
|
font-style: inherit;
|
|
font-size: 100%;
|
|
font-family: inherit;
|
|
vertical-align: baseline;
|
|
}
|
|
|
|
body {
|
|
font-size: 13px;
|
|
padding: 1em;
|
|
}
|
|
|
|
h1 {
|
|
font-size: 26px;
|
|
margin-bottom: 1em;
|
|
}
|
|
|
|
h2 {
|
|
font-size: 24px;
|
|
margin-bottom: 1em;
|
|
}
|
|
|
|
h3 {
|
|
font-size: 20px;
|
|
margin-bottom: 1em;
|
|
margin-top: 1em;
|
|
}
|
|
|
|
pre, code {
|
|
line-height: 1.5;
|
|
font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
|
|
}
|
|
|
|
pre {
|
|
margin-top: 0.5em;
|
|
}
|
|
|
|
h1, h2, h3, p {
|
|
font-family: Arial, sans serif;
|
|
}
|
|
|
|
h1, h2, h3 {
|
|
border-bottom: solid #CCC 1px;
|
|
}
|
|
|
|
.toc_element {
|
|
margin-top: 0.5em;
|
|
}
|
|
|
|
.firstline {
|
|
margin-left: 2 em;
|
|
}
|
|
|
|
.method {
|
|
margin-top: 1em;
|
|
border: solid 1px #CCC;
|
|
padding: 1em;
|
|
background: #EEE;
|
|
}
|
|
|
|
.details {
|
|
font-weight: bold;
|
|
font-size: 14px;
|
|
}
|
|
|
|
</style>
|
|
|
|
<h1><a href="bigquery_v2.html">BigQuery API</a> . <a href="bigquery_v2.models.html">models</a></h1>
|
|
<h2>Instance Methods</h2>
|
|
<p class="toc_element">
|
|
<code><a href="#delete">delete(projectId, datasetId, modelId)</a></code></p>
|
|
<p class="firstline">Deletes the model specified by modelId from the dataset.</p>
|
|
<p class="toc_element">
|
|
<code><a href="#get">get(projectId, datasetId, modelId)</a></code></p>
|
|
<p class="firstline">Gets the specified model resource by model ID.</p>
|
|
<p class="toc_element">
|
|
<code><a href="#list">list(projectId, datasetId, pageToken=None, maxResults=None)</a></code></p>
|
|
<p class="firstline">Lists all models in the specified dataset. Requires the READER dataset</p>
|
|
<p class="toc_element">
|
|
<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
|
|
<p class="firstline">Retrieves the next page of results.</p>
|
|
<p class="toc_element">
|
|
<code><a href="#patch">patch(projectId, datasetId, modelId, body)</a></code></p>
|
|
<p class="firstline">Patch specific fields in the specified model.</p>
|
|
<h3>Method Details</h3>
|
|
<div class="method">
|
|
<code class="details" id="delete">delete(projectId, datasetId, modelId)</code>
|
|
<pre>Deletes the model specified by modelId from the dataset.
|
|
|
|
Args:
|
|
projectId: string, Project ID of the model to delete. (required)
|
|
datasetId: string, Dataset ID of the model to delete. (required)
|
|
modelId: string, Model ID of the model to delete. (required)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="method">
|
|
<code class="details" id="get">get(projectId, datasetId, modelId)</code>
|
|
<pre>Gets the specified model resource by model ID.
|
|
|
|
Args:
|
|
projectId: string, Project ID of the requested model. (required)
|
|
datasetId: string, Dataset ID of the requested model. (required)
|
|
modelId: string, Model ID of the requested model. (required)
|
|
|
|
Returns:
|
|
An object of the form:
|
|
|
|
{
|
|
"labelColumns": [ # Output only. Label columns that were used to train this model.
|
|
# The output of the model will have a "predicted_" prefix to these columns.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"description": "A String", # [Optional] A user-friendly description of this model.
|
|
"trainingRuns": [ # Output only. Information for all training runs in increasing order of
|
|
# start_time.
|
|
{ # Information about a single training query run for the model.
|
|
"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
|
|
# end of training.
|
|
# data or just the eval data based on whether eval data was used during
|
|
# training. These are not present for imported models.
|
|
"clusteringMetrics": { # Evaluation metrics for clustering models. # [Beta] Populated for clustering models.
|
|
"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
|
|
"daviesBouldinIndex": 3.14, # Davies-Bouldin index.
|
|
},
|
|
"regressionMetrics": { # Evaluation metrics for regression models. # Populated for regression models.
|
|
"meanSquaredLogError": 3.14, # Mean squared log error.
|
|
"meanAbsoluteError": 3.14, # Mean absolute error.
|
|
"meanSquaredError": 3.14, # Mean squared error.
|
|
"medianAbsoluteError": 3.14, # Median absolute error.
|
|
"rSquared": 3.14, # R^2 score.
|
|
},
|
|
"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
|
|
"negativeLabel": "A String", # Label representing the negative class.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"positiveLabel": "A String", # Label representing the positive class.
|
|
"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
|
|
{ # Confusion matrix for binary classification models.
|
|
"truePositives": "A String", # Number of true samples predicted as true.
|
|
"recall": 3.14, # Aggregate recall.
|
|
"precision": 3.14, # Aggregate precision.
|
|
"falseNegatives": "A String", # Number of false samples predicted as false.
|
|
"trueNegatives": "A String", # Number of true samples predicted as false.
|
|
"falsePositives": "A String", # Number of false samples predicted as true.
|
|
"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
|
|
},
|
|
],
|
|
},
|
|
"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"confusionMatrixList": [ # Confusion matrix at different thresholds.
|
|
{ # Confusion matrix for multi-class classification models.
|
|
"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
|
|
# confusion matrix.
|
|
"rows": [ # One row per actual label.
|
|
{ # A single row in the confusion matrix.
|
|
"entries": [ # Info describing predicted label distribution.
|
|
{ # A single entry in the confusion matrix.
|
|
"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
|
|
# also add an entry indicating the number of items under the
|
|
# confidence threshold.
|
|
"itemCount": "A String", # Number of items being predicted as this label.
|
|
},
|
|
],
|
|
"actualLabel": "A String", # The original label of this row.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
},
|
|
"results": [ # Output of each iteration run, results.size() <= max_iterations.
|
|
{ # Information about a single iteration of the training run.
|
|
"index": 42, # Index of the iteration, 0 based.
|
|
"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
|
|
"durationMs": "A String", # Time taken to run the iteration in milliseconds.
|
|
"learnRate": 3.14, # Learn rate used for this iteration.
|
|
"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
|
|
"clusterInfos": [ # [Beta] Information about top clusters for clustering models.
|
|
{ # Information about a single cluster for clustering model.
|
|
"centroidId": "A String", # Centroid id.
|
|
"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
|
|
"clusterRadius": 3.14, # Cluster radius, the average distance from centroid
|
|
# to each point assigned to the cluster.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"startTime": "A String", # The start time of this training run.
|
|
"trainingOptions": { # Options that were used for this training run, includes
|
|
# user specified and default options that were used.
|
|
"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
|
|
"inputLabelColumns": [ # Name of input label columns in training data.
|
|
"A String",
|
|
],
|
|
"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
|
|
# training algorithms.
|
|
"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
|
|
# any more (compared to min_relative_progress). Used only for iterative
|
|
# training algorithms.
|
|
"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
|
|
# strategy.
|
|
"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
|
|
# feature.
|
|
# 1. When data_split_method is CUSTOM, the corresponding column should
|
|
# be boolean. The rows with true value tag are eval data, and the false
|
|
# are training data.
|
|
# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
|
|
# rows (from smallest to largest) in the corresponding column are used
|
|
# as training data, and the rest are eval data. It respects the order
|
|
# in Orderable data types:
|
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
|
|
"numClusters": "A String", # [Beta] Number of clusters for clustering models.
|
|
"l1Regularization": 3.14, # L1 regularization coefficient.
|
|
"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
|
|
"distanceType": "A String", # [Beta] Distance type for clustering models.
|
|
"warmStart": True or False, # Whether to train a model from the last checkpoint.
|
|
"labelClassWeights": { # Weights associated with each label class, for rebalancing the
|
|
# training data. Only applicable for classification models.
|
|
"a_key": 3.14,
|
|
},
|
|
"lossType": "A String", # Type of loss function used during training run.
|
|
"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
|
|
# of data will be used as training data. The format should be double.
|
|
# Accurate to two decimal places.
|
|
# Default value is 0.2.
|
|
"l2Regularization": 3.14, # L2 regularization coefficient.
|
|
"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
|
|
# applicable for imported models.
|
|
"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
|
|
"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
|
|
# less than 'min_relative_progress'. Used only for iterative training
|
|
# algorithms.
|
|
"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
|
|
},
|
|
},
|
|
],
|
|
"featureColumns": [ # Output only. Input feature columns that were used to train this model.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"labels": { # [Optional] The labels associated with this model. You can use these to
|
|
# organize and group your models. Label keys and values can be no longer
|
|
# than 63 characters, can only contain lowercase letters, numeric
|
|
# characters, underscores and dashes. International characters are allowed.
|
|
# Label values are optional. Label keys must start with a letter and each
|
|
# label in the list must have a different key.
|
|
"a_key": "A String",
|
|
},
|
|
"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the
|
|
# epoch.
|
|
"modelType": "A String", # Output only. Type of the model resource.
|
|
"modelReference": { # Id path of a model. # Required. Unique identifier for this model.
|
|
"projectId": "A String", # [Required] The ID of the project containing this model.
|
|
"datasetId": "A String", # [Required] The ID of the dataset containing this model.
|
|
"modelId": "A String", # [Required] The ID of the model. The ID must contain only
|
|
# letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
|
|
# length is 1,024 characters.
|
|
},
|
|
"etag": "A String", # Output only. A hash of this resource.
|
|
"location": "A String", # Output only. The geographic location where the model resides. This value
|
|
# is inherited from the dataset.
|
|
"friendlyName": "A String", # [Optional] A descriptive name for this model.
|
|
"expirationTime": "A String", # [Optional] The time when this model expires, in milliseconds since the
|
|
# epoch. If not present, the model will persist indefinitely. Expired models
|
|
# will be deleted and their storage reclaimed. The defaultTableExpirationMs
|
|
# property of the encapsulating dataset can be used to set a default
|
|
# expirationTime on newly created models.
|
|
"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs
|
|
# since the epoch.
|
|
}</pre>
|
|
</div>
|
|
|
|
<div class="method">
|
|
<code class="details" id="list">list(projectId, datasetId, pageToken=None, maxResults=None)</code>
|
|
<pre>Lists all models in the specified dataset. Requires the READER dataset
|
|
role.
|
|
|
|
Args:
|
|
projectId: string, Project ID of the models to list. (required)
|
|
datasetId: string, Dataset ID of the models to list. (required)
|
|
pageToken: string, Page token, returned by a previous call to request the next page of
|
|
results
|
|
maxResults: integer, The maximum number of results per page.
|
|
|
|
Returns:
|
|
An object of the form:
|
|
|
|
{
|
|
"models": [ # Models in the requested dataset. Only the following fields are populated:
|
|
# model_reference, model_type, creation_time, last_modified_time and
|
|
# labels.
|
|
{
|
|
"labelColumns": [ # Output only. Label columns that were used to train this model.
|
|
# The output of the model will have a "predicted_" prefix to these columns.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"description": "A String", # [Optional] A user-friendly description of this model.
|
|
"trainingRuns": [ # Output only. Information for all training runs in increasing order of
|
|
# start_time.
|
|
{ # Information about a single training query run for the model.
|
|
"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
|
|
# end of training.
|
|
# data or just the eval data based on whether eval data was used during
|
|
# training. These are not present for imported models.
|
|
"clusteringMetrics": { # Evaluation metrics for clustering models. # [Beta] Populated for clustering models.
|
|
"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
|
|
"daviesBouldinIndex": 3.14, # Davies-Bouldin index.
|
|
},
|
|
"regressionMetrics": { # Evaluation metrics for regression models. # Populated for regression models.
|
|
"meanSquaredLogError": 3.14, # Mean squared log error.
|
|
"meanAbsoluteError": 3.14, # Mean absolute error.
|
|
"meanSquaredError": 3.14, # Mean squared error.
|
|
"medianAbsoluteError": 3.14, # Median absolute error.
|
|
"rSquared": 3.14, # R^2 score.
|
|
},
|
|
"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
|
|
"negativeLabel": "A String", # Label representing the negative class.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"positiveLabel": "A String", # Label representing the positive class.
|
|
"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
|
|
{ # Confusion matrix for binary classification models.
|
|
"truePositives": "A String", # Number of true samples predicted as true.
|
|
"recall": 3.14, # Aggregate recall.
|
|
"precision": 3.14, # Aggregate precision.
|
|
"falseNegatives": "A String", # Number of false samples predicted as false.
|
|
"trueNegatives": "A String", # Number of true samples predicted as false.
|
|
"falsePositives": "A String", # Number of false samples predicted as true.
|
|
"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
|
|
},
|
|
],
|
|
},
|
|
"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"confusionMatrixList": [ # Confusion matrix at different thresholds.
|
|
{ # Confusion matrix for multi-class classification models.
|
|
"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
|
|
# confusion matrix.
|
|
"rows": [ # One row per actual label.
|
|
{ # A single row in the confusion matrix.
|
|
"entries": [ # Info describing predicted label distribution.
|
|
{ # A single entry in the confusion matrix.
|
|
"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
|
|
# also add an entry indicating the number of items under the
|
|
# confidence threshold.
|
|
"itemCount": "A String", # Number of items being predicted as this label.
|
|
},
|
|
],
|
|
"actualLabel": "A String", # The original label of this row.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
},
|
|
"results": [ # Output of each iteration run, results.size() <= max_iterations.
|
|
{ # Information about a single iteration of the training run.
|
|
"index": 42, # Index of the iteration, 0 based.
|
|
"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
|
|
"durationMs": "A String", # Time taken to run the iteration in milliseconds.
|
|
"learnRate": 3.14, # Learn rate used for this iteration.
|
|
"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
|
|
"clusterInfos": [ # [Beta] Information about top clusters for clustering models.
|
|
{ # Information about a single cluster for clustering model.
|
|
"centroidId": "A String", # Centroid id.
|
|
"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
|
|
"clusterRadius": 3.14, # Cluster radius, the average distance from centroid
|
|
# to each point assigned to the cluster.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"startTime": "A String", # The start time of this training run.
|
|
"trainingOptions": { # Options that were used for this training run, includes
|
|
# user specified and default options that were used.
|
|
"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
|
|
"inputLabelColumns": [ # Name of input label columns in training data.
|
|
"A String",
|
|
],
|
|
"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
|
|
# training algorithms.
|
|
"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
|
|
# any more (compared to min_relative_progress). Used only for iterative
|
|
# training algorithms.
|
|
"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
|
|
# strategy.
|
|
"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
|
|
# feature.
|
|
# 1. When data_split_method is CUSTOM, the corresponding column should
|
|
# be boolean. The rows with true value tag are eval data, and the false
|
|
# are training data.
|
|
# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
|
|
# rows (from smallest to largest) in the corresponding column are used
|
|
# as training data, and the rest are eval data. It respects the order
|
|
# in Orderable data types:
|
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
|
|
"numClusters": "A String", # [Beta] Number of clusters for clustering models.
|
|
"l1Regularization": 3.14, # L1 regularization coefficient.
|
|
"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
|
|
"distanceType": "A String", # [Beta] Distance type for clustering models.
|
|
"warmStart": True or False, # Whether to train a model from the last checkpoint.
|
|
"labelClassWeights": { # Weights associated with each label class, for rebalancing the
|
|
# training data. Only applicable for classification models.
|
|
"a_key": 3.14,
|
|
},
|
|
"lossType": "A String", # Type of loss function used during training run.
|
|
"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
|
|
# of data will be used as training data. The format should be double.
|
|
# Accurate to two decimal places.
|
|
# Default value is 0.2.
|
|
"l2Regularization": 3.14, # L2 regularization coefficient.
|
|
"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
|
|
# applicable for imported models.
|
|
"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
|
|
"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
|
|
# less than 'min_relative_progress'. Used only for iterative training
|
|
# algorithms.
|
|
"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
|
|
},
|
|
},
|
|
],
|
|
"featureColumns": [ # Output only. Input feature columns that were used to train this model.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"labels": { # [Optional] The labels associated with this model. You can use these to
|
|
# organize and group your models. Label keys and values can be no longer
|
|
# than 63 characters, can only contain lowercase letters, numeric
|
|
# characters, underscores and dashes. International characters are allowed.
|
|
# Label values are optional. Label keys must start with a letter and each
|
|
# label in the list must have a different key.
|
|
"a_key": "A String",
|
|
},
|
|
"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the
|
|
# epoch.
|
|
"modelType": "A String", # Output only. Type of the model resource.
|
|
"modelReference": { # Id path of a model. # Required. Unique identifier for this model.
|
|
"projectId": "A String", # [Required] The ID of the project containing this model.
|
|
"datasetId": "A String", # [Required] The ID of the dataset containing this model.
|
|
"modelId": "A String", # [Required] The ID of the model. The ID must contain only
|
|
# letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
|
|
# length is 1,024 characters.
|
|
},
|
|
"etag": "A String", # Output only. A hash of this resource.
|
|
"location": "A String", # Output only. The geographic location where the model resides. This value
|
|
# is inherited from the dataset.
|
|
"friendlyName": "A String", # [Optional] A descriptive name for this model.
|
|
"expirationTime": "A String", # [Optional] The time when this model expires, in milliseconds since the
|
|
# epoch. If not present, the model will persist indefinitely. Expired models
|
|
# will be deleted and their storage reclaimed. The defaultTableExpirationMs
|
|
# property of the encapsulating dataset can be used to set a default
|
|
# expirationTime on newly created models.
|
|
"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs
|
|
# since the epoch.
|
|
},
|
|
],
|
|
"nextPageToken": "A String", # A token to request the next page of results.
|
|
}</pre>
|
|
</div>
|
|
|
|
<div class="method">
|
|
<code class="details" id="list_next">list_next(previous_request, previous_response)</code>
|
|
<pre>Retrieves the next page of results.
|
|
|
|
Args:
|
|
previous_request: The request for the previous page. (required)
|
|
previous_response: The response from the request for the previous page. (required)
|
|
|
|
Returns:
|
|
A request object that you can call 'execute()' on to request the next
|
|
page. Returns None if there are no more items in the collection.
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="method">
|
|
<code class="details" id="patch">patch(projectId, datasetId, modelId, body)</code>
|
|
<pre>Patch specific fields in the specified model.
|
|
|
|
Args:
|
|
projectId: string, Project ID of the model to patch. (required)
|
|
datasetId: string, Dataset ID of the model to patch. (required)
|
|
modelId: string, Model ID of the model to patch. (required)
|
|
body: object, The request body. (required)
|
|
The object takes the form of:
|
|
|
|
{
|
|
"labelColumns": [ # Output only. Label columns that were used to train this model.
|
|
# The output of the model will have a "predicted_" prefix to these columns.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"description": "A String", # [Optional] A user-friendly description of this model.
|
|
"trainingRuns": [ # Output only. Information for all training runs in increasing order of
|
|
# start_time.
|
|
{ # Information about a single training query run for the model.
|
|
"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
|
|
# end of training.
|
|
# data or just the eval data based on whether eval data was used during
|
|
# training. These are not present for imported models.
|
|
"clusteringMetrics": { # Evaluation metrics for clustering models. # [Beta] Populated for clustering models.
|
|
"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
|
|
"daviesBouldinIndex": 3.14, # Davies-Bouldin index.
|
|
},
|
|
"regressionMetrics": { # Evaluation metrics for regression models. # Populated for regression models.
|
|
"meanSquaredLogError": 3.14, # Mean squared log error.
|
|
"meanAbsoluteError": 3.14, # Mean absolute error.
|
|
"meanSquaredError": 3.14, # Mean squared error.
|
|
"medianAbsoluteError": 3.14, # Median absolute error.
|
|
"rSquared": 3.14, # R^2 score.
|
|
},
|
|
"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
|
|
"negativeLabel": "A String", # Label representing the negative class.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"positiveLabel": "A String", # Label representing the positive class.
|
|
"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
|
|
{ # Confusion matrix for binary classification models.
|
|
"truePositives": "A String", # Number of true samples predicted as true.
|
|
"recall": 3.14, # Aggregate recall.
|
|
"precision": 3.14, # Aggregate precision.
|
|
"falseNegatives": "A String", # Number of false samples predicted as false.
|
|
"trueNegatives": "A String", # Number of true samples predicted as false.
|
|
"falsePositives": "A String", # Number of false samples predicted as true.
|
|
"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
|
|
},
|
|
],
|
|
},
|
|
"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"confusionMatrixList": [ # Confusion matrix at different thresholds.
|
|
{ # Confusion matrix for multi-class classification models.
|
|
"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
|
|
# confusion matrix.
|
|
"rows": [ # One row per actual label.
|
|
{ # A single row in the confusion matrix.
|
|
"entries": [ # Info describing predicted label distribution.
|
|
{ # A single entry in the confusion matrix.
|
|
"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
|
|
# also add an entry indicating the number of items under the
|
|
# confidence threshold.
|
|
"itemCount": "A String", # Number of items being predicted as this label.
|
|
},
|
|
],
|
|
"actualLabel": "A String", # The original label of this row.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
},
|
|
"results": [ # Output of each iteration run, results.size() <= max_iterations.
|
|
{ # Information about a single iteration of the training run.
|
|
"index": 42, # Index of the iteration, 0 based.
|
|
"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
|
|
"durationMs": "A String", # Time taken to run the iteration in milliseconds.
|
|
"learnRate": 3.14, # Learn rate used for this iteration.
|
|
"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
|
|
"clusterInfos": [ # [Beta] Information about top clusters for clustering models.
|
|
{ # Information about a single cluster for clustering model.
|
|
"centroidId": "A String", # Centroid id.
|
|
"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
|
|
"clusterRadius": 3.14, # Cluster radius, the average distance from centroid
|
|
# to each point assigned to the cluster.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"startTime": "A String", # The start time of this training run.
|
|
"trainingOptions": { # Options that were used for this training run, includes
|
|
# user specified and default options that were used.
|
|
"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
|
|
"inputLabelColumns": [ # Name of input label columns in training data.
|
|
"A String",
|
|
],
|
|
"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
|
|
# training algorithms.
|
|
"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
|
|
# any more (compared to min_relative_progress). Used only for iterative
|
|
# training algorithms.
|
|
"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
|
|
# strategy.
|
|
"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
|
|
# feature.
|
|
# 1. When data_split_method is CUSTOM, the corresponding column should
|
|
# be boolean. The rows with true value tag are eval data, and the false
|
|
# are training data.
|
|
# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
|
|
# rows (from smallest to largest) in the corresponding column are used
|
|
# as training data, and the rest are eval data. It respects the order
|
|
# in Orderable data types:
|
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
|
|
"numClusters": "A String", # [Beta] Number of clusters for clustering models.
|
|
"l1Regularization": 3.14, # L1 regularization coefficient.
|
|
"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
|
|
"distanceType": "A String", # [Beta] Distance type for clustering models.
|
|
"warmStart": True or False, # Whether to train a model from the last checkpoint.
|
|
"labelClassWeights": { # Weights associated with each label class, for rebalancing the
|
|
# training data. Only applicable for classification models.
|
|
"a_key": 3.14,
|
|
},
|
|
"lossType": "A String", # Type of loss function used during training run.
|
|
"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
|
|
# of data will be used as training data. The format should be double.
|
|
# Accurate to two decimal places.
|
|
# Default value is 0.2.
|
|
"l2Regularization": 3.14, # L2 regularization coefficient.
|
|
"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
|
|
# applicable for imported models.
|
|
"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
|
|
"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
|
|
# less than 'min_relative_progress'. Used only for iterative training
|
|
# algorithms.
|
|
"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
|
|
},
|
|
},
|
|
],
|
|
"featureColumns": [ # Output only. Input feature columns that were used to train this model.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"labels": { # [Optional] The labels associated with this model. You can use these to
|
|
# organize and group your models. Label keys and values can be no longer
|
|
# than 63 characters, can only contain lowercase letters, numeric
|
|
# characters, underscores and dashes. International characters are allowed.
|
|
# Label values are optional. Label keys must start with a letter and each
|
|
# label in the list must have a different key.
|
|
"a_key": "A String",
|
|
},
|
|
"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the
|
|
# epoch.
|
|
"modelType": "A String", # Output only. Type of the model resource.
|
|
"modelReference": { # Id path of a model. # Required. Unique identifier for this model.
|
|
"projectId": "A String", # [Required] The ID of the project containing this model.
|
|
"datasetId": "A String", # [Required] The ID of the dataset containing this model.
|
|
"modelId": "A String", # [Required] The ID of the model. The ID must contain only
|
|
# letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
|
|
# length is 1,024 characters.
|
|
},
|
|
"etag": "A String", # Output only. A hash of this resource.
|
|
"location": "A String", # Output only. The geographic location where the model resides. This value
|
|
# is inherited from the dataset.
|
|
"friendlyName": "A String", # [Optional] A descriptive name for this model.
|
|
"expirationTime": "A String", # [Optional] The time when this model expires, in milliseconds since the
|
|
# epoch. If not present, the model will persist indefinitely. Expired models
|
|
# will be deleted and their storage reclaimed. The defaultTableExpirationMs
|
|
# property of the encapsulating dataset can be used to set a default
|
|
# expirationTime on newly created models.
|
|
"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs
|
|
# since the epoch.
|
|
}
|
|
|
|
|
|
Returns:
|
|
An object of the form:
|
|
|
|
{
|
|
"labelColumns": [ # Output only. Label columns that were used to train this model.
|
|
# The output of the model will have a "predicted_" prefix to these columns.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"description": "A String", # [Optional] A user-friendly description of this model.
|
|
"trainingRuns": [ # Output only. Information for all training runs in increasing order of
|
|
# start_time.
|
|
{ # Information about a single training query run for the model.
|
|
"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
|
|
# end of training.
|
|
# data or just the eval data based on whether eval data was used during
|
|
# training. These are not present for imported models.
|
|
"clusteringMetrics": { # Evaluation metrics for clustering models. # [Beta] Populated for clustering models.
|
|
"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
|
|
"daviesBouldinIndex": 3.14, # Davies-Bouldin index.
|
|
},
|
|
"regressionMetrics": { # Evaluation metrics for regression models. # Populated for regression models.
|
|
"meanSquaredLogError": 3.14, # Mean squared log error.
|
|
"meanAbsoluteError": 3.14, # Mean absolute error.
|
|
"meanSquaredError": 3.14, # Mean squared error.
|
|
"medianAbsoluteError": 3.14, # Median absolute error.
|
|
"rSquared": 3.14, # R^2 score.
|
|
},
|
|
"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
|
|
"negativeLabel": "A String", # Label representing the negative class.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"positiveLabel": "A String", # Label representing the positive class.
|
|
"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
|
|
{ # Confusion matrix for binary classification models.
|
|
"truePositives": "A String", # Number of true samples predicted as true.
|
|
"recall": 3.14, # Aggregate recall.
|
|
"precision": 3.14, # Aggregate precision.
|
|
"falseNegatives": "A String", # Number of false samples predicted as false.
|
|
"trueNegatives": "A String", # Number of true samples predicted as false.
|
|
"falsePositives": "A String", # Number of false samples predicted as true.
|
|
"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
|
|
},
|
|
],
|
|
},
|
|
"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
|
|
"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
|
|
# models, the metrics are either macro-averaged or micro-averaged. When
|
|
# macro-averaged, the metrics are calculated for each label and then an
|
|
# unweighted average is taken of those values. When micro-averaged, the
|
|
# metric is calculated globally by counting the total number of correctly
|
|
# predicted rows.
|
|
"recall": 3.14, # Recall is the fraction of actual positive labels that were given a
|
|
# positive prediction. For multiclass this is a macro-averaged metric.
|
|
"precision": 3.14, # Precision is the fraction of actual positive predictions that had
|
|
# positive actual labels. For multiclass this is a macro-averaged
|
|
# metric treating each class as a binary classifier.
|
|
"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
|
|
"threshold": 3.14, # Threshold at which the metrics are computed. For binary
|
|
# classification models this is the positive class threshold.
|
|
# For multi-class classfication models this is the confidence
|
|
# threshold.
|
|
"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
|
|
# multiclass this is a micro-averaged metric.
|
|
"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
|
|
# this is a macro-averaged metric.
|
|
"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
|
|
# metric.
|
|
},
|
|
"confusionMatrixList": [ # Confusion matrix at different thresholds.
|
|
{ # Confusion matrix for multi-class classification models.
|
|
"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
|
|
# confusion matrix.
|
|
"rows": [ # One row per actual label.
|
|
{ # A single row in the confusion matrix.
|
|
"entries": [ # Info describing predicted label distribution.
|
|
{ # A single entry in the confusion matrix.
|
|
"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
|
|
# also add an entry indicating the number of items under the
|
|
# confidence threshold.
|
|
"itemCount": "A String", # Number of items being predicted as this label.
|
|
},
|
|
],
|
|
"actualLabel": "A String", # The original label of this row.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
},
|
|
"results": [ # Output of each iteration run, results.size() <= max_iterations.
|
|
{ # Information about a single iteration of the training run.
|
|
"index": 42, # Index of the iteration, 0 based.
|
|
"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
|
|
"durationMs": "A String", # Time taken to run the iteration in milliseconds.
|
|
"learnRate": 3.14, # Learn rate used for this iteration.
|
|
"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
|
|
"clusterInfos": [ # [Beta] Information about top clusters for clustering models.
|
|
{ # Information about a single cluster for clustering model.
|
|
"centroidId": "A String", # Centroid id.
|
|
"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
|
|
"clusterRadius": 3.14, # Cluster radius, the average distance from centroid
|
|
# to each point assigned to the cluster.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"startTime": "A String", # The start time of this training run.
|
|
"trainingOptions": { # Options that were used for this training run, includes
|
|
# user specified and default options that were used.
|
|
"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
|
|
"inputLabelColumns": [ # Name of input label columns in training data.
|
|
"A String",
|
|
],
|
|
"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
|
|
# training algorithms.
|
|
"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
|
|
# any more (compared to min_relative_progress). Used only for iterative
|
|
# training algorithms.
|
|
"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
|
|
# strategy.
|
|
"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
|
|
# feature.
|
|
# 1. When data_split_method is CUSTOM, the corresponding column should
|
|
# be boolean. The rows with true value tag are eval data, and the false
|
|
# are training data.
|
|
# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
|
|
# rows (from smallest to largest) in the corresponding column are used
|
|
# as training data, and the rest are eval data. It respects the order
|
|
# in Orderable data types:
|
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
|
|
"numClusters": "A String", # [Beta] Number of clusters for clustering models.
|
|
"l1Regularization": 3.14, # L1 regularization coefficient.
|
|
"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
|
|
"distanceType": "A String", # [Beta] Distance type for clustering models.
|
|
"warmStart": True or False, # Whether to train a model from the last checkpoint.
|
|
"labelClassWeights": { # Weights associated with each label class, for rebalancing the
|
|
# training data. Only applicable for classification models.
|
|
"a_key": 3.14,
|
|
},
|
|
"lossType": "A String", # Type of loss function used during training run.
|
|
"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
|
|
# of data will be used as training data. The format should be double.
|
|
# Accurate to two decimal places.
|
|
# Default value is 0.2.
|
|
"l2Regularization": 3.14, # L2 regularization coefficient.
|
|
"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
|
|
# applicable for imported models.
|
|
"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
|
|
"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
|
|
# less than 'min_relative_progress'. Used only for iterative training
|
|
# algorithms.
|
|
"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
|
|
},
|
|
},
|
|
],
|
|
"featureColumns": [ # Output only. Input feature columns that were used to train this model.
|
|
{ # A field or a column.
|
|
"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
|
|
# specified (e.g., CREATE FUNCTION statement can omit the return type;
|
|
# in this case the output parameter does not have this "type" field).
|
|
# Examples:
|
|
# INT64: {type_kind="INT64"}
|
|
# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
|
|
# STRUCT<x STRING, y ARRAY<DATE>>:
|
|
# {type_kind="STRUCT",
|
|
# struct_type={fields=[
|
|
# {name="x", type={type_kind="STRING"}},
|
|
# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
|
|
# ]}}
|
|
"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
|
|
"fields": [
|
|
# Object with schema name: StandardSqlField
|
|
],
|
|
},
|
|
"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
|
|
"typeKind": "A String", # Required. The top level type of this field.
|
|
# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
|
|
},
|
|
"name": "A String", # Optional. The name of this field. Can be absent for struct fields.
|
|
},
|
|
],
|
|
"labels": { # [Optional] The labels associated with this model. You can use these to
|
|
# organize and group your models. Label keys and values can be no longer
|
|
# than 63 characters, can only contain lowercase letters, numeric
|
|
# characters, underscores and dashes. International characters are allowed.
|
|
# Label values are optional. Label keys must start with a letter and each
|
|
# label in the list must have a different key.
|
|
"a_key": "A String",
|
|
},
|
|
"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the
|
|
# epoch.
|
|
"modelType": "A String", # Output only. Type of the model resource.
|
|
"modelReference": { # Id path of a model. # Required. Unique identifier for this model.
|
|
"projectId": "A String", # [Required] The ID of the project containing this model.
|
|
"datasetId": "A String", # [Required] The ID of the dataset containing this model.
|
|
"modelId": "A String", # [Required] The ID of the model. The ID must contain only
|
|
# letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
|
|
# length is 1,024 characters.
|
|
},
|
|
"etag": "A String", # Output only. A hash of this resource.
|
|
"location": "A String", # Output only. The geographic location where the model resides. This value
|
|
# is inherited from the dataset.
|
|
"friendlyName": "A String", # [Optional] A descriptive name for this model.
|
|
"expirationTime": "A String", # [Optional] The time when this model expires, in milliseconds since the
|
|
# epoch. If not present, the model will persist indefinitely. Expired models
|
|
# will be deleted and their storage reclaimed. The defaultTableExpirationMs
|
|
# property of the encapsulating dataset can be used to set a default
|
|
# expirationTime on newly created models.
|
|
"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs
|
|
# since the epoch.
|
|
}</pre>
|
|
</div>
|
|
|
|
</body></html> |