You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2223 lines
146 KiB
2223 lines
146 KiB
<html><body>
|
|
<style>
|
|
|
|
body, h1, h2, h3, div, span, p, pre, a {
|
|
margin: 0;
|
|
padding: 0;
|
|
border: 0;
|
|
font-weight: inherit;
|
|
font-style: inherit;
|
|
font-size: 100%;
|
|
font-family: inherit;
|
|
vertical-align: baseline;
|
|
}
|
|
|
|
body {
|
|
font-size: 13px;
|
|
padding: 1em;
|
|
}
|
|
|
|
h1 {
|
|
font-size: 26px;
|
|
margin-bottom: 1em;
|
|
}
|
|
|
|
h2 {
|
|
font-size: 24px;
|
|
margin-bottom: 1em;
|
|
}
|
|
|
|
h3 {
|
|
font-size: 20px;
|
|
margin-bottom: 1em;
|
|
margin-top: 1em;
|
|
}
|
|
|
|
pre, code {
|
|
line-height: 1.5;
|
|
font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
|
|
}
|
|
|
|
pre {
|
|
margin-top: 0.5em;
|
|
}
|
|
|
|
h1, h2, h3, p {
|
|
font-family: Arial, sans serif;
|
|
}
|
|
|
|
h1, h2, h3 {
|
|
border-bottom: solid #CCC 1px;
|
|
}
|
|
|
|
.toc_element {
|
|
margin-top: 0.5em;
|
|
}
|
|
|
|
.firstline {
|
|
margin-left: 2 em;
|
|
}
|
|
|
|
.method {
|
|
margin-top: 1em;
|
|
border: solid 1px #CCC;
|
|
padding: 1em;
|
|
background: #EEE;
|
|
}
|
|
|
|
.details {
|
|
font-weight: bold;
|
|
font-size: 14px;
|
|
}
|
|
|
|
</style>
|
|
|
|
<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a> . <a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems</a></h1>
|
|
<h2>Instance Methods</h2>
|
|
<p class="toc_element">
|
|
<code><a href="#lease">lease(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
|
|
<p class="firstline">Leases a dataflow WorkItem to run.</p>
|
|
<p class="toc_element">
|
|
<code><a href="#reportStatus">reportStatus(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
|
|
<p class="firstline">Reports the status of dataflow WorkItems leased by a worker.</p>
|
|
<h3>Method Details</h3>
|
|
<div class="method">
|
|
<code class="details" id="lease">lease(projectId, location, jobId, body, x__xgafv=None)</code>
|
|
<pre>Leases a dataflow WorkItem to run.
|
|
|
|
Args:
|
|
projectId: string, Identifies the project this worker belongs to. (required)
|
|
location: string, The [regional endpoint]
|
|
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
|
|
contains the WorkItem's job. (required)
|
|
jobId: string, Identifies the workflow job this worker belongs to. (required)
|
|
body: object, The request body. (required)
|
|
The object takes the form of:
|
|
|
|
{ # Request to lease WorkItems.
|
|
"workItemTypes": [ # Filter for WorkItem type.
|
|
"A String",
|
|
],
|
|
"workerCapabilities": [ # Worker capabilities. WorkItems might be limited to workers with specific
|
|
# capabilities.
|
|
"A String",
|
|
],
|
|
"currentWorkerTime": "A String", # The current timestamp at the worker.
|
|
"requestedLeaseDuration": "A String", # The initial lease period.
|
|
"workerId": "A String", # Identifies the worker leasing work -- typically the ID of the
|
|
# virtual machine running the worker.
|
|
"unifiedWorkerRequest": { # Untranslated bag-of-bytes WorkRequest from UnifiedWorker.
|
|
"a_key": "", # Properties of the object. Contains field @type with type URL.
|
|
},
|
|
"location": "A String", # The [regional endpoint]
|
|
# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
|
|
# contains the WorkItem's job.
|
|
}
|
|
|
|
x__xgafv: string, V1 error format.
|
|
Allowed values
|
|
1 - v1 error format
|
|
2 - v2 error format
|
|
|
|
Returns:
|
|
An object of the form:
|
|
|
|
{ # Response to a request to lease WorkItems.
|
|
"workItems": [ # A list of the leased WorkItems.
|
|
{ # WorkItem represents basic information about a WorkItem to be executed
|
|
# in the cloud.
|
|
"packages": [ # Any required packages that need to be fetched in order to execute
|
|
# this WorkItem.
|
|
{ # The packages that must be installed in order for a worker to run the
|
|
# steps of the Cloud Dataflow job that will be assigned to its worker
|
|
# pool.
|
|
#
|
|
# This is the mechanism by which the Cloud Dataflow SDK causes code to
|
|
# be loaded onto the workers. For example, the Cloud Dataflow Java SDK
|
|
# might use this to install jars containing the user's code and all of the
|
|
# various dependencies (libraries, data files, etc.) required in order
|
|
# for that code to run.
|
|
"location": "A String", # The resource to read the package from. The supported resource type is:
|
|
#
|
|
# Google Cloud Storage:
|
|
#
|
|
# storage.googleapis.com/{bucket}
|
|
# bucket.storage.googleapis.com/
|
|
"name": "A String", # The name of the package.
|
|
},
|
|
],
|
|
"leaseExpireTime": "A String", # Time when the lease on this Work will expire.
|
|
"seqMapTask": { # Describes a particular function to invoke. # Additional information for SeqMapTask WorkItems.
|
|
"inputs": [ # Information about each of the inputs.
|
|
{ # Information about a side input of a DoFn or an input of a SeqDoFn.
|
|
"sources": [ # The source(s) to read element(s) from to get the value of this side input.
|
|
# If more than one source, then the elements are taken from the
|
|
# sources, in the specified order if order matters.
|
|
# At least one source is required.
|
|
{ # A source that records can be read and decoded from.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
],
|
|
"kind": { # How to interpret the source element(s) as a side input value.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"tag": "A String", # The id of the tag the user code will access this side input by;
|
|
# this should correspond to the tag of some MultiOutputInfo.
|
|
},
|
|
],
|
|
"name": "A String", # The user-provided name of the SeqDo operation.
|
|
"stageName": "A String", # System-defined name of the stage containing the SeqDo operation.
|
|
# Unique across the workflow.
|
|
"systemName": "A String", # System-defined name of the SeqDo operation.
|
|
# Unique across the workflow.
|
|
"userFn": { # The user function to invoke.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"outputInfos": [ # Information about each of the outputs.
|
|
{ # Information about an output of a SeqMapTask.
|
|
"tag": "A String", # The id of the TupleTag the user code will tag the output value by.
|
|
"sink": { # A sink that records can be encoded and written to. # The sink to write the output value to.
|
|
"codec": { # The codec to use to encode data written to the sink.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"spec": { # The sink to write to, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
},
|
|
},
|
|
],
|
|
},
|
|
"projectId": "A String", # Identifies the cloud project this WorkItem belongs to.
|
|
"streamingComputationTask": { # A task which describes what action should be performed for the specified # Additional information for StreamingComputationTask WorkItems.
|
|
# streaming computation ranges.
|
|
"taskType": "A String", # A type of streaming computation task.
|
|
"computationRanges": [ # Contains ranges of a streaming computation this task should apply to.
|
|
{ # Describes full or partial data disk assignment information of the computation
|
|
# ranges.
|
|
"rangeAssignments": [ # Data disk assignments for ranges from this computation.
|
|
{ # Data disk assignment information for a specific key-range of a sharded
|
|
# computation.
|
|
# Currently we only support UTF-8 character splits to simplify encoding into
|
|
# JSON.
|
|
"start": "A String", # The start (inclusive) of the key range.
|
|
"end": "A String", # The end (exclusive) of the key range.
|
|
"dataDisk": "A String", # The name of the data disk where data for this range is stored.
|
|
# This name is local to the Google Cloud Platform project and uniquely
|
|
# identifies the disk within that project, for example
|
|
# "myproject-1014-104817-4c2-harness-0-disk-1".
|
|
},
|
|
],
|
|
"computationId": "A String", # The ID of the computation.
|
|
},
|
|
],
|
|
"dataDisks": [ # Describes the set of data disks this task should apply to.
|
|
{ # Describes mounted data disk.
|
|
"dataDisk": "A String", # The name of the data disk.
|
|
# This name is local to the Google Cloud Platform project and uniquely
|
|
# identifies the disk within that project, for example
|
|
# "myproject-1014-104817-4c2-harness-0-disk-1".
|
|
},
|
|
],
|
|
},
|
|
"initialReportIndex": "A String", # The initial index to use when reporting the status of the WorkItem.
|
|
"mapTask": { # MapTask consists of an ordered set of instructions, each of which # Additional information for MapTask WorkItems.
|
|
# describes one particular low-level operation for the worker to
|
|
# perform in order to accomplish the MapTask's WorkItem.
|
|
#
|
|
# Each instruction must appear in the list before any instructions which
|
|
# depends on its output.
|
|
"systemName": "A String", # System-defined name of this MapTask.
|
|
# Unique across the workflow.
|
|
"counterPrefix": "A String", # Counter prefix that can be used to prefix counters. Not currently used in
|
|
# Dataflow.
|
|
"stageName": "A String", # System-defined name of the stage containing this MapTask.
|
|
# Unique across the workflow.
|
|
"instructions": [ # The instructions in the MapTask.
|
|
{ # Describes a particular operation comprising a MapTask.
|
|
"name": "A String", # User-provided name of this operation.
|
|
"read": { # An instruction that reads records. # Additional information for Read instructions.
|
|
# Takes no inputs, produces one output.
|
|
"source": { # A source that records can be read and decoded from. # The source to read from.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
"outputs": [ # Describes the outputs of the instruction.
|
|
{ # An output of an instruction.
|
|
"name": "A String", # The user-provided name of this output.
|
|
"onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions
|
|
# should only report the key size.
|
|
"codec": { # The codec to use to encode data being written via this output.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"systemName": "A String", # System-defined name of this output.
|
|
# Unique across the workflow.
|
|
"originalName": "A String", # System-defined name for this output in the original workflow graph.
|
|
# Outputs that do not contribute to an original instruction do not set this.
|
|
"onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions
|
|
# should only report the value size.
|
|
},
|
|
],
|
|
"partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions.
|
|
# One input and one output.
|
|
"sideInputs": [ # Zero or more side inputs.
|
|
{ # Information about a side input of a DoFn or an input of a SeqDoFn.
|
|
"sources": [ # The source(s) to read element(s) from to get the value of this side input.
|
|
# If more than one source, then the elements are taken from the
|
|
# sources, in the specified order if order matters.
|
|
# At least one source is required.
|
|
{ # A source that records can be read and decoded from.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
],
|
|
"kind": { # How to interpret the source element(s) as a side input value.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"tag": "A String", # The id of the tag the user code will access this side input by;
|
|
# this should correspond to the tag of some MultiOutputInfo.
|
|
},
|
|
],
|
|
"originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the
|
|
# intermediate store between the GBK and the CombineValues.
|
|
"originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the
|
|
# CombineValues instruction lifted into this instruction.
|
|
"valueCombiningFn": { # The value combining function to invoke.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction.
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
"inputElementCodec": { # The codec to use for interpreting an element in the input PTable.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
},
|
|
"write": { # An instruction that writes records. # Additional information for Write instructions.
|
|
# Takes one input, produces no outputs.
|
|
"input": { # An input of an instruction, as a reference to an output of a # The input.
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
"sink": { # A sink that records can be encoded and written to. # The sink to write to.
|
|
"codec": { # The codec to use to encode data written to the sink.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"spec": { # The sink to write to, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
},
|
|
},
|
|
"systemName": "A String", # System-defined name of this operation.
|
|
# Unique across the workflow.
|
|
"flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions.
|
|
"inputs": [ # Describes the inputs to the flatten instruction.
|
|
{ # An input of an instruction, as a reference to an output of a
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
],
|
|
},
|
|
"originalName": "A String", # System-defined name for the operation in the original workflow graph.
|
|
"parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions.
|
|
# Takes one main input and zero or more side inputs, and produces
|
|
# zero or more outputs.
|
|
# Runs user code.
|
|
"sideInputs": [ # Zero or more side inputs.
|
|
{ # Information about a side input of a DoFn or an input of a SeqDoFn.
|
|
"sources": [ # The source(s) to read element(s) from to get the value of this side input.
|
|
# If more than one source, then the elements are taken from the
|
|
# sources, in the specified order if order matters.
|
|
# At least one source is required.
|
|
{ # A source that records can be read and decoded from.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
],
|
|
"kind": { # How to interpret the source element(s) as a side input value.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"tag": "A String", # The id of the tag the user code will access this side input by;
|
|
# this should correspond to the tag of some MultiOutputInfo.
|
|
},
|
|
],
|
|
"input": { # An input of an instruction, as a reference to an output of a # The input.
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
"multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn.
|
|
{ # Information about an output of a multi-output DoFn.
|
|
"tag": "A String", # The id of the tag the user code will emit to this output by; this
|
|
# should correspond to the tag of some SideInputInfo.
|
|
},
|
|
],
|
|
"numOutputs": 42, # The number of outputs.
|
|
"userFn": { # The user function to invoke.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
},
|
|
},
|
|
],
|
|
},
|
|
"jobId": "A String", # Identifies the workflow job this WorkItem belongs to.
|
|
"reportStatusInterval": "A String", # Recommended reporting interval.
|
|
"sourceOperationTask": { # A work item that represents the different operations that can be # Additional information for source operation WorkItems.
|
|
# performed on a user-defined Source specification.
|
|
"name": "A String", # User-provided name of the Read instruction for this source.
|
|
"stageName": "A String", # System-defined name of the stage containing the source operation.
|
|
# Unique across the workflow.
|
|
"getMetadata": { # A request to compute the SourceMetadata of a Source. # Information about a request to get metadata about a source.
|
|
"source": { # A source that records can be read and decoded from. # Specification of the source whose metadata should be computed.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
"systemName": "A String", # System-defined name of the Read instruction for this source.
|
|
# Unique across the workflow.
|
|
"split": { # Represents the operation to split a high-level Source specification # Information about a request to split a source.
|
|
# into bundles (parts for parallel processing).
|
|
#
|
|
# At a high level, splitting of a source into bundles happens as follows:
|
|
# SourceSplitRequest is applied to the source. If it returns
|
|
# SOURCE_SPLIT_OUTCOME_USE_CURRENT, no further splitting happens and the source
|
|
# is used "as is". Otherwise, splitting is applied recursively to each
|
|
# produced DerivedSource.
|
|
#
|
|
# As an optimization, for any Source, if its does_not_need_splitting is
|
|
# true, the framework assumes that splitting this source would return
|
|
# SOURCE_SPLIT_OUTCOME_USE_CURRENT, and doesn't initiate a SourceSplitRequest.
|
|
# This applies both to the initial source being split and to bundles
|
|
# produced from it.
|
|
"source": { # A source that records can be read and decoded from. # Specification of the source to be split.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
"options": { # Hints for splitting a Source into bundles (parts for parallel # Hints for tuning the splitting process.
|
|
# processing) using SourceSplitRequest.
|
|
"desiredShardSizeBytes": "A String", # DEPRECATED in favor of desired_bundle_size_bytes.
|
|
"desiredBundleSizeBytes": "A String", # The source should be split into a set of bundles where the estimated size
|
|
# of each is approximately this many bytes.
|
|
},
|
|
},
|
|
"originalName": "A String", # System-defined name for the Read instruction for this source
|
|
# in the original workflow graph.
|
|
},
|
|
"streamingSetupTask": { # A task which initializes part of a streaming Dataflow job. # Additional information for StreamingSetupTask WorkItems.
|
|
"snapshotConfig": { # Streaming appliance snapshot configuration. # Configures streaming appliance snapshot.
|
|
"snapshotId": "A String", # If set, indicates the snapshot id for the snapshot being performed.
|
|
"importStateEndpoint": "A String", # Indicates which endpoint is used to import appliance state.
|
|
},
|
|
"workerHarnessPort": 42, # The TCP port used by the worker to communicate with the Dataflow
|
|
# worker harness.
|
|
"drain": True or False, # The user has requested drain.
|
|
"streamingComputationTopology": { # Global topology of the streaming Dataflow job, including all # The global topology of the streaming Dataflow job.
|
|
# computations and their sharded locations.
|
|
"computations": [ # The computations associated with a streaming Dataflow job.
|
|
{ # All configuration data for a particular Computation.
|
|
"inputs": [ # The inputs to the computation.
|
|
{ # Describes a stream of data, either as input to be processed or as
|
|
# output of a streaming Dataflow job.
|
|
"streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current
|
|
# streaming Dataflow job.
|
|
# stage-to-stage communication.
|
|
"streamId": "A String", # Identifies the particular stream within the streaming Dataflow
|
|
# job.
|
|
},
|
|
"pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream.
|
|
# out of a streaming Dataflow job.
|
|
"idLabel": "A String", # If set, contains a pubsub label from which to extract record ids.
|
|
# If left empty, record deduplication will be strictly best effort.
|
|
"timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps.
|
|
# If left empty, record timestamps will be generated upon arrival.
|
|
"dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data.
|
|
"topic": "A String", # A pubsub topic, in the form of
|
|
# "pubsub.googleapis.com/topics/<project-id>/<topic-name>"
|
|
"trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking
|
|
# custom time timestamps for watermark estimation.
|
|
"withAttributes": True or False, # If true, then the client has requested to get pubsub attributes.
|
|
"subscription": "A String", # A pubsub subscription, in the form of
|
|
# "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>"
|
|
},
|
|
"customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source.
|
|
"stateful": True or False, # Whether this source is stateful.
|
|
},
|
|
"sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input.
|
|
"stateFamily": "A String", # Identifies the state family where this side input is stored.
|
|
"tag": "A String", # Identifies the particular side input within the streaming Dataflow job.
|
|
},
|
|
},
|
|
],
|
|
"outputs": [ # The outputs from the computation.
|
|
{ # Describes a stream of data, either as input to be processed or as
|
|
# output of a streaming Dataflow job.
|
|
"streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current
|
|
# streaming Dataflow job.
|
|
# stage-to-stage communication.
|
|
"streamId": "A String", # Identifies the particular stream within the streaming Dataflow
|
|
# job.
|
|
},
|
|
"pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream.
|
|
# out of a streaming Dataflow job.
|
|
"idLabel": "A String", # If set, contains a pubsub label from which to extract record ids.
|
|
# If left empty, record deduplication will be strictly best effort.
|
|
"timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps.
|
|
# If left empty, record timestamps will be generated upon arrival.
|
|
"dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data.
|
|
"topic": "A String", # A pubsub topic, in the form of
|
|
# "pubsub.googleapis.com/topics/<project-id>/<topic-name>"
|
|
"trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking
|
|
# custom time timestamps for watermark estimation.
|
|
"withAttributes": True or False, # If true, then the client has requested to get pubsub attributes.
|
|
"subscription": "A String", # A pubsub subscription, in the form of
|
|
# "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>"
|
|
},
|
|
"customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source.
|
|
"stateful": True or False, # Whether this source is stateful.
|
|
},
|
|
"sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input.
|
|
"stateFamily": "A String", # Identifies the state family where this side input is stored.
|
|
"tag": "A String", # Identifies the particular side input within the streaming Dataflow job.
|
|
},
|
|
},
|
|
],
|
|
"keyRanges": [ # The key ranges processed by the computation.
|
|
{ # Location information for a specific key-range of a sharded computation.
|
|
# Currently we only support UTF-8 character splits to simplify encoding into
|
|
# JSON.
|
|
"deprecatedPersistentDirectory": "A String", # DEPRECATED. The location of the persistent state for this range, as a
|
|
# persistent directory in the worker local filesystem.
|
|
"start": "A String", # The start (inclusive) of the key range.
|
|
"deliveryEndpoint": "A String", # The physical location of this range assignment to be used for
|
|
# streaming computation cross-worker message delivery.
|
|
"end": "A String", # The end (exclusive) of the key range.
|
|
"dataDisk": "A String", # The name of the data disk where data for this range is stored.
|
|
# This name is local to the Google Cloud Platform project and uniquely
|
|
# identifies the disk within that project, for example
|
|
# "myproject-1014-104817-4c2-harness-0-disk-1".
|
|
},
|
|
],
|
|
"computationId": "A String", # The ID of the computation.
|
|
"systemStageName": "A String", # The system stage name.
|
|
"stateFamilies": [ # The state family values.
|
|
{ # State family configuration.
|
|
"stateFamily": "A String", # The state family value.
|
|
"isRead": True or False, # If true, this family corresponds to a read operation.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"dataDiskAssignments": [ # The disks assigned to a streaming Dataflow job.
|
|
{ # Data disk assignment for a given VM instance.
|
|
"vmInstance": "A String", # VM instance name the data disks mounted to, for example
|
|
# "myproject-1014-104817-4c2-harness-0".
|
|
"dataDisks": [ # Mounted data disks. The order is important a data disk's 0-based index in
|
|
# this list defines which persistent directory the disk is mounted to, for
|
|
# example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" },
|
|
# { "myproject-1014-104817-4c2-harness-0-disk-1" }.
|
|
"A String",
|
|
],
|
|
},
|
|
],
|
|
"persistentStateVersion": 42, # Version number for persistent state.
|
|
"userStageToComputationNameMap": { # Maps user stage names to stable computation names.
|
|
"a_key": "A String",
|
|
},
|
|
"forwardingKeyBits": 42, # The size (in bits) of keys that will be assigned to source messages.
|
|
},
|
|
"receiveWorkPort": 42, # The TCP port on which the worker should listen for messages from
|
|
# other streaming computation workers.
|
|
},
|
|
"streamingConfigTask": { # A task that carries configuration information for streaming computations. # Additional information for StreamingConfigTask WorkItems.
|
|
"userStepToStateFamilyNameMap": { # Map from user step names to state families.
|
|
"a_key": "A String",
|
|
},
|
|
"windmillServicePort": "A String", # If present, the worker must use this port to communicate with Windmill
|
|
# Service dispatchers. Only applicable when windmill_service_endpoint is
|
|
# specified.
|
|
"streamingComputationConfigs": [ # Set of computation configuration information.
|
|
{ # Configuration information for a single streaming computation.
|
|
"transformUserNameToStateFamily": { # Map from user name of stateful transforms in this stage to their state
|
|
# family.
|
|
"a_key": "A String",
|
|
},
|
|
"computationId": "A String", # Unique identifier for this computation.
|
|
"systemName": "A String", # System defined name for this computation.
|
|
"stageName": "A String", # Stage name of this computation.
|
|
"instructions": [ # Instructions that comprise the computation.
|
|
{ # Describes a particular operation comprising a MapTask.
|
|
"name": "A String", # User-provided name of this operation.
|
|
"read": { # An instruction that reads records. # Additional information for Read instructions.
|
|
# Takes no inputs, produces one output.
|
|
"source": { # A source that records can be read and decoded from. # The source to read from.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
"outputs": [ # Describes the outputs of the instruction.
|
|
{ # An output of an instruction.
|
|
"name": "A String", # The user-provided name of this output.
|
|
"onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions
|
|
# should only report the key size.
|
|
"codec": { # The codec to use to encode data being written via this output.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"systemName": "A String", # System-defined name of this output.
|
|
# Unique across the workflow.
|
|
"originalName": "A String", # System-defined name for this output in the original workflow graph.
|
|
# Outputs that do not contribute to an original instruction do not set this.
|
|
"onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions
|
|
# should only report the value size.
|
|
},
|
|
],
|
|
"partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions.
|
|
# One input and one output.
|
|
"sideInputs": [ # Zero or more side inputs.
|
|
{ # Information about a side input of a DoFn or an input of a SeqDoFn.
|
|
"sources": [ # The source(s) to read element(s) from to get the value of this side input.
|
|
# If more than one source, then the elements are taken from the
|
|
# sources, in the specified order if order matters.
|
|
# At least one source is required.
|
|
{ # A source that records can be read and decoded from.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
],
|
|
"kind": { # How to interpret the source element(s) as a side input value.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"tag": "A String", # The id of the tag the user code will access this side input by;
|
|
# this should correspond to the tag of some MultiOutputInfo.
|
|
},
|
|
],
|
|
"originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the
|
|
# intermediate store between the GBK and the CombineValues.
|
|
"originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the
|
|
# CombineValues instruction lifted into this instruction.
|
|
"valueCombiningFn": { # The value combining function to invoke.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction.
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
"inputElementCodec": { # The codec to use for interpreting an element in the input PTable.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
},
|
|
"write": { # An instruction that writes records. # Additional information for Write instructions.
|
|
# Takes one input, produces no outputs.
|
|
"input": { # An input of an instruction, as a reference to an output of a # The input.
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
"sink": { # A sink that records can be encoded and written to. # The sink to write to.
|
|
"codec": { # The codec to use to encode data written to the sink.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"spec": { # The sink to write to, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
},
|
|
},
|
|
"systemName": "A String", # System-defined name of this operation.
|
|
# Unique across the workflow.
|
|
"flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions.
|
|
"inputs": [ # Describes the inputs to the flatten instruction.
|
|
{ # An input of an instruction, as a reference to an output of a
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
],
|
|
},
|
|
"originalName": "A String", # System-defined name for the operation in the original workflow graph.
|
|
"parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions.
|
|
# Takes one main input and zero or more side inputs, and produces
|
|
# zero or more outputs.
|
|
# Runs user code.
|
|
"sideInputs": [ # Zero or more side inputs.
|
|
{ # Information about a side input of a DoFn or an input of a SeqDoFn.
|
|
"sources": [ # The source(s) to read element(s) from to get the value of this side input.
|
|
# If more than one source, then the elements are taken from the
|
|
# sources, in the specified order if order matters.
|
|
# At least one source is required.
|
|
{ # A source that records can be read and decoded from.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
],
|
|
"kind": { # How to interpret the source element(s) as a side input value.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"tag": "A String", # The id of the tag the user code will access this side input by;
|
|
# this should correspond to the tag of some MultiOutputInfo.
|
|
},
|
|
],
|
|
"input": { # An input of an instruction, as a reference to an output of a # The input.
|
|
# producer instruction.
|
|
"outputNum": 42, # The output index (origin zero) within the producer.
|
|
"producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces
|
|
# the output to be consumed by this input. This index is relative
|
|
# to the list of instructions in this input's instruction's
|
|
# containing MapTask.
|
|
},
|
|
"multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn.
|
|
{ # Information about an output of a multi-output DoFn.
|
|
"tag": "A String", # The id of the tag the user code will emit to this output by; this
|
|
# should correspond to the tag of some SideInputInfo.
|
|
},
|
|
],
|
|
"numOutputs": 42, # The number of outputs.
|
|
"userFn": { # The user function to invoke.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"maxWorkItemCommitBytes": "A String", # Maximum size for work item commit supported windmill storage layer.
|
|
"windmillServiceEndpoint": "A String", # If present, the worker must use this endpoint to communicate with Windmill
|
|
# Service dispatchers, otherwise the worker must continue to use whatever
|
|
# endpoint it had been using.
|
|
},
|
|
"configuration": "A String", # Work item-specific configuration as an opaque blob.
|
|
"shellTask": { # A task which consists of a shell command for the worker to execute. # Additional information for ShellTask WorkItems.
|
|
"command": "A String", # The shell command to run.
|
|
"exitCode": 42, # Exit code for the task.
|
|
},
|
|
"id": "A String", # Identifies this WorkItem.
|
|
},
|
|
],
|
|
"unifiedWorkerResponse": { # Untranslated bag-of-bytes WorkResponse for UnifiedWorker.
|
|
"a_key": "", # Properties of the object. Contains field @type with type URL.
|
|
},
|
|
}</pre>
|
|
</div>
|
|
|
|
<div class="method">
|
|
<code class="details" id="reportStatus">reportStatus(projectId, location, jobId, body, x__xgafv=None)</code>
|
|
<pre>Reports the status of dataflow WorkItems leased by a worker.
|
|
|
|
Args:
|
|
projectId: string, The project which owns the WorkItem's job. (required)
|
|
location: string, The [regional endpoint]
|
|
(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
|
|
contains the WorkItem's job. (required)
|
|
jobId: string, The job which the WorkItem is part of. (required)
|
|
body: object, The request body. (required)
|
|
The object takes the form of:
|
|
|
|
{ # Request to report the status of WorkItems.
|
|
"workerId": "A String", # The ID of the worker reporting the WorkItem status. If this
|
|
# does not match the ID of the worker which the Dataflow service
|
|
# believes currently has the lease on the WorkItem, the report
|
|
# will be dropped (with an error response).
|
|
"unifiedWorkerRequest": { # Untranslated bag-of-bytes WorkProgressUpdateRequest from UnifiedWorker.
|
|
"a_key": "", # Properties of the object. Contains field @type with type URL.
|
|
},
|
|
"currentWorkerTime": "A String", # The current timestamp at the worker.
|
|
"workItemStatuses": [ # The order is unimportant, except that the order of the
|
|
# WorkItemServiceState messages in the ReportWorkItemStatusResponse
|
|
# corresponds to the order of WorkItemStatus messages here.
|
|
{ # Conveys a worker's progress through the work described by a WorkItem.
|
|
"reportIndex": "A String", # The report index. When a WorkItem is leased, the lease will
|
|
# contain an initial report index. When a WorkItem's status is
|
|
# reported to the system, the report should be sent with
|
|
# that report index, and the response will contain the index the
|
|
# worker should use for the next report. Reports received with
|
|
# unexpected index values will be rejected by the service.
|
|
#
|
|
# In order to preserve idempotency, the worker should not alter the
|
|
# contents of a report, even if the worker must submit the same
|
|
# report multiple times before getting back a response. The worker
|
|
# should not submit a subsequent report until the response for the
|
|
# previous report had been received from the service.
|
|
"errors": [ # Specifies errors which occurred during processing. If errors are
|
|
# provided, and completed = true, then the WorkItem is considered
|
|
# to have failed.
|
|
{ # The `Status` type defines a logical error model that is suitable for
|
|
# different programming environments, including REST APIs and RPC APIs. It is
|
|
# used by [gRPC](https://github.com/grpc). The error model is designed to be:
|
|
#
|
|
# - Simple to use and understand for most users
|
|
# - Flexible enough to meet unexpected needs
|
|
#
|
|
# # Overview
|
|
#
|
|
# The `Status` message contains three pieces of data: error code, error
|
|
# message, and error details. The error code should be an enum value of
|
|
# google.rpc.Code, but it may accept additional error codes if needed. The
|
|
# error message should be a developer-facing English message that helps
|
|
# developers *understand* and *resolve* the error. If a localized user-facing
|
|
# error message is needed, put the localized message in the error details or
|
|
# localize it in the client. The optional error details may contain arbitrary
|
|
# information about the error. There is a predefined set of error detail types
|
|
# in the package `google.rpc` that can be used for common error conditions.
|
|
#
|
|
# # Language mapping
|
|
#
|
|
# The `Status` message is the logical representation of the error model, but it
|
|
# is not necessarily the actual wire format. When the `Status` message is
|
|
# exposed in different client libraries and different wire protocols, it can be
|
|
# mapped differently. For example, it will likely be mapped to some exceptions
|
|
# in Java, but more likely mapped to some error codes in C.
|
|
#
|
|
# # Other uses
|
|
#
|
|
# The error model and the `Status` message can be used in a variety of
|
|
# environments, either with or without APIs, to provide a
|
|
# consistent developer experience across different environments.
|
|
#
|
|
# Example uses of this error model include:
|
|
#
|
|
# - Partial errors. If a service needs to return partial errors to the client,
|
|
# it may embed the `Status` in the normal response to indicate the partial
|
|
# errors.
|
|
#
|
|
# - Workflow errors. A typical workflow has multiple steps. Each step may
|
|
# have a `Status` message for error reporting.
|
|
#
|
|
# - Batch operations. If a client uses batch request and batch response, the
|
|
# `Status` message should be used directly inside batch response, one for
|
|
# each error sub-response.
|
|
#
|
|
# - Asynchronous operations. If an API call embeds asynchronous operation
|
|
# results in its response, the status of those operations should be
|
|
# represented directly using the `Status` message.
|
|
#
|
|
# - Logging. If some API errors are stored in logs, the message `Status` could
|
|
# be used directly after any stripping needed for security/privacy reasons.
|
|
"message": "A String", # A developer-facing error message, which should be in English. Any
|
|
# user-facing error message should be localized and sent in the
|
|
# google.rpc.Status.details field, or localized by the client.
|
|
"code": 42, # The status code, which should be an enum value of google.rpc.Code.
|
|
"details": [ # A list of messages that carry the error details. There is a common set of
|
|
# message types for APIs to use.
|
|
{
|
|
"a_key": "", # Properties of the object. Contains field @type with type URL.
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"sourceOperationResponse": { # The result of a SourceOperationRequest, specified in # If the work item represented a SourceOperationRequest, and the work
|
|
# is completed, contains the result of the operation.
|
|
# ReportWorkItemStatusRequest.source_operation when the work item
|
|
# is completed.
|
|
"getMetadata": { # The result of a SourceGetMetadataOperation. # A response to a request to get metadata about a source.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # The computed metadata.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
"split": { # The response to a SourceSplitRequest. # A response to a request to split a source.
|
|
"outcome": "A String", # Indicates whether splitting happened and produced a list of bundles.
|
|
# If this is USE_CURRENT_SOURCE_AS_IS, the current source should
|
|
# be processed "as is" without splitting. "bundles" is ignored in this case.
|
|
# If this is SPLITTING_HAPPENED, then "bundles" contains a list of
|
|
# bundles into which the source was split.
|
|
"bundles": [ # If outcome is SPLITTING_HAPPENED, then this is a list of bundles
|
|
# into which the source was split. Otherwise this field is ignored.
|
|
# This list can be empty, which means the source represents an empty input.
|
|
{ # Specification of one of the bundles produced as a result of splitting
|
|
# a Source (e.g. when executing a SourceSplitRequest, or when
|
|
# splitting an active task using WorkItemStatus.dynamic_source_split),
|
|
# relative to the source being split.
|
|
"derivationMode": "A String", # What source to base the produced source on (if any).
|
|
"source": { # A source that records can be read and decoded from. # Specification of the source.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
],
|
|
"shards": [ # DEPRECATED in favor of bundles.
|
|
{ # DEPRECATED in favor of DerivedSource.
|
|
"derivationMode": "A String", # DEPRECATED
|
|
"source": { # A source that records can be read and decoded from. # DEPRECATED
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
],
|
|
},
|
|
},
|
|
"stopPosition": { # Position defines a position within a collection of data. The value # A worker may split an active map task in two parts, "primary" and
|
|
# "residual", continuing to process the primary part and returning the
|
|
# residual part into the pool of available work.
|
|
# This event is called a "dynamic split" and is critical to the dynamic
|
|
# work rebalancing feature. The two obtained sub-tasks are called
|
|
# "parts" of the split.
|
|
# The parts, if concatenated, must represent the same input as would
|
|
# be read by the current task if the split did not happen.
|
|
# The exact way in which the original task is decomposed into the two
|
|
# parts is specified either as a position demarcating them
|
|
# (stop_position), or explicitly as two DerivedSources, if this
|
|
# task consumes a user-defined source type (dynamic_source_split).
|
|
#
|
|
# The "current" task is adjusted as a result of the split: after a task
|
|
# with range [A, B) sends a stop_position update at C, its range is
|
|
# considered to be [A, C), e.g.:
|
|
# * Progress should be interpreted relative to the new range, e.g.
|
|
# "75% completed" means "75% of [A, C) completed"
|
|
# * The worker should interpret proposed_stop_position relative to the
|
|
# new range, e.g. "split at 68%" should be interpreted as
|
|
# "split at 68% of [A, C)".
|
|
# * If the worker chooses to split again using stop_position, only
|
|
# stop_positions in [A, C) will be accepted.
|
|
# * Etc.
|
|
# dynamic_source_split has similar semantics: e.g., if a task with
|
|
# source S splits using dynamic_source_split into {P, R}
|
|
# (where P and R must be together equivalent to S), then subsequent
|
|
# progress and proposed_stop_position should be interpreted relative
|
|
# to P, and in a potential subsequent dynamic_source_split into {P', R'},
|
|
# P' and R' must be together equivalent to P, etc.
|
|
# can be either the end position, a key (used with ordered
|
|
# collections), a byte offset, or a record index.
|
|
"end": True or False, # Position is past all other positions. Also useful for the end
|
|
# position of an unbounded range.
|
|
"recordIndex": "A String", # Position is a record index.
|
|
"byteOffset": "A String", # Position is a byte offset.
|
|
"key": "A String", # Position is a string key, ordered lexicographically.
|
|
"concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position.
|
|
# position. A ConcatPosition can be used by a reader of a source that
|
|
# encapsulates a set of other sources.
|
|
"position": # Object with schema name: Position # Position within the inner source.
|
|
"index": 42, # Index of the inner source.
|
|
},
|
|
"shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED
|
|
# sharding).
|
|
},
|
|
"sourceFork": { # DEPRECATED in favor of DynamicSourceSplit. # DEPRECATED in favor of dynamic_source_split.
|
|
"residualSource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED
|
|
# a Source (e.g. when executing a SourceSplitRequest, or when
|
|
# splitting an active task using WorkItemStatus.dynamic_source_split),
|
|
# relative to the source being split.
|
|
"derivationMode": "A String", # What source to base the produced source on (if any).
|
|
"source": { # A source that records can be read and decoded from. # Specification of the source.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
"primarySource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED
|
|
# a Source (e.g. when executing a SourceSplitRequest, or when
|
|
# splitting an active task using WorkItemStatus.dynamic_source_split),
|
|
# relative to the source being split.
|
|
"derivationMode": "A String", # What source to base the produced source on (if any).
|
|
"source": { # A source that records can be read and decoded from. # Specification of the source.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
"primary": { # DEPRECATED in favor of DerivedSource. # DEPRECATED
|
|
"derivationMode": "A String", # DEPRECATED
|
|
"source": { # A source that records can be read and decoded from. # DEPRECATED
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
"residual": { # DEPRECATED in favor of DerivedSource. # DEPRECATED
|
|
"derivationMode": "A String", # DEPRECATED
|
|
"source": { # A source that records can be read and decoded from. # DEPRECATED
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"requestedLeaseDuration": "A String", # Amount of time the worker requests for its lease.
|
|
"completed": True or False, # True if the WorkItem was completed (successfully or unsuccessfully).
|
|
"workItemId": "A String", # Identifies the WorkItem.
|
|
"dynamicSourceSplit": { # When a task splits using WorkItemStatus.dynamic_source_split, this # See documentation of stop_position.
|
|
# message describes the two parts of the split relative to the
|
|
# description of the current task's input.
|
|
"residual": { # Specification of one of the bundles produced as a result of splitting # Residual part (returned to the pool of work).
|
|
# Specified relative to the previously-current source.
|
|
# a Source (e.g. when executing a SourceSplitRequest, or when
|
|
# splitting an active task using WorkItemStatus.dynamic_source_split),
|
|
# relative to the source being split.
|
|
"derivationMode": "A String", # What source to base the produced source on (if any).
|
|
"source": { # A source that records can be read and decoded from. # Specification of the source.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
"primary": { # Specification of one of the bundles produced as a result of splitting # Primary part (continued to be processed by worker).
|
|
# Specified relative to the previously-current source.
|
|
# Becomes current.
|
|
# a Source (e.g. when executing a SourceSplitRequest, or when
|
|
# splitting an active task using WorkItemStatus.dynamic_source_split),
|
|
# relative to the source being split.
|
|
"derivationMode": "A String", # What source to base the produced source on (if any).
|
|
"source": { # A source that records can be read and decoded from. # Specification of the source.
|
|
"codec": { # The codec to use to decode data read from the source.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"baseSpecs": [ # While splitting, sources may specify the produced bundles
|
|
# as differences against another source, in order to save backend-side
|
|
# memory and allow bigger jobs. For details, see SourceSplitRequest.
|
|
# To support this use case, the full set of parameters of the source
|
|
# is logically obtained by taking the latest explicitly specified value
|
|
# of each parameter in the order:
|
|
# base_specs (later items win), spec (overrides anything in base_specs).
|
|
{
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
],
|
|
"spec": { # The source to read from, plus its parameters.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source
|
|
# doesn't need splitting, and using SourceSplitRequest on it would
|
|
# yield SOURCE_SPLIT_OUTCOME_USE_CURRENT.
|
|
#
|
|
# E.g. a file splitter may set this to true when splitting a single file
|
|
# into a set of byte ranges of appropriate size, and set this
|
|
# to false when splitting a filepattern into individual files.
|
|
# However, for efficiency, a file splitter may decide to produce
|
|
# file subranges directly from the filepattern to avoid a splitting
|
|
# round-trip.
|
|
#
|
|
# See SourceSplitRequest for an overview of the splitting process.
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
"metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away,
|
|
# avoiding a SourceGetMetadataOperation roundtrip
|
|
# (see SourceOperationRequest).
|
|
#
|
|
# This field is meaningful only in the Source objects populated
|
|
# by the user (e.g. when filling in a DerivedSource).
|
|
# Source objects supplied by the framework to the user don't have
|
|
# this field populated.
|
|
# and tuning the pipeline, etc.
|
|
"infinite": True or False, # Specifies that the size of this source is known to be infinite
|
|
# (this is a streaming source).
|
|
"estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be
|
|
# read from this source. This estimate is in terms of external storage
|
|
# size, before any decompression or other processing done by the reader.
|
|
"producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with
|
|
# the (encoded) keys in lexicographically sorted order.
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"totalThrottlerWaitTimeSeconds": 3.14, # Total time the worker spent being throttled by external systems.
|
|
"counterUpdates": [ # Worker output counters for this WorkItem.
|
|
{ # An update to a Counter sent from a worker.
|
|
"floatingPointList": { # A metric value representing a list of floating point numbers. # List of floating point numbers, for Set.
|
|
"elements": [ # Elements of the list.
|
|
3.14,
|
|
],
|
|
},
|
|
"floatingPoint": 3.14, # Floating point value for Sum, Max, Min.
|
|
"integerMean": { # A representation of an integer mean metric contribution. # Integer mean aggregation value for Mean.
|
|
"count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
"sum": { # A representation of an int64, n, that is immune to precision loss when # The sum of all values being aggregated.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
},
|
|
"boolean": True or False, # Boolean value for And, Or.
|
|
"integerList": { # A metric value representing a list of integers. # List of integers, for Set.
|
|
"elements": [ # Elements of the list.
|
|
{ # A representation of an int64, n, that is immune to precision loss when
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
],
|
|
},
|
|
"cumulative": True or False, # True if this counter is reported as the total cumulative aggregate
|
|
# value accumulated since the worker started working on this WorkItem.
|
|
# By default this is false, indicating that this counter is reported
|
|
# as a delta.
|
|
"shortId": "A String", # The service-generated short identifier for this counter.
|
|
# The short_id -> (name, metadata) mapping is constant for the lifetime of
|
|
# a job.
|
|
"integerGauge": { # A metric value representing temporal values of a variable. # Gauge data
|
|
"timestamp": "A String", # The time at which this value was measured. Measured as msecs from epoch.
|
|
"value": { # A representation of an int64, n, that is immune to precision loss when # The value of the variable represented by this gauge.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
},
|
|
"floatingPointMean": { # A representation of a floating point mean metric contribution. # Floating point mean aggregation value for Mean.
|
|
"count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
"sum": 3.14, # The sum of all values being aggregated.
|
|
},
|
|
"internal": "", # Value for internally-defined counters used by the Dataflow service.
|
|
"structuredNameAndMetadata": { # A single message which encapsulates structured name and metadata for a given # Counter structured name and metadata.
|
|
# counter.
|
|
"name": { # Identifies a counter within a per-job namespace. Counters whose structured # Structured name of the counter.
|
|
# names are the same get merged into a single value for the job.
|
|
"origin": "A String", # One of the standard Origins defined above.
|
|
"executionStepName": "A String", # Name of the stage. An execution step contains multiple component steps.
|
|
"name": "A String", # Counter name. Not necessarily globally-unique, but unique within the
|
|
# context of the other fields.
|
|
# Required.
|
|
"workerId": "A String", # ID of a particular worker.
|
|
"inputIndex": 42, # Index of an input collection that's being read from/written to as a side
|
|
# input.
|
|
# The index identifies a step's side inputs starting by 1 (e.g. the first
|
|
# side input has input_index 1, the third has input_index 3).
|
|
# Side inputs are identified by a pair of (original_step_name, input_index).
|
|
# This field helps uniquely identify them.
|
|
"originNamespace": "A String", # A string containing a more specific namespace of the counter's origin.
|
|
"originalRequestingStepName": "A String", # The step name requesting an operation, such as GBK.
|
|
# I.e. the ParDo causing a read/write from shuffle to occur, or a
|
|
# read from side inputs.
|
|
"portion": "A String", # Portion of this counter, either key or value.
|
|
"componentStepName": "A String", # Name of the optimized step being executed by the workers.
|
|
"originalStepName": "A String", # System generated name of the original step in the user's graph, before
|
|
# optimization.
|
|
},
|
|
"metadata": { # CounterMetadata includes all static non-name non-value counter attributes. # Metadata associated with a counter
|
|
"standardUnits": "A String", # System defined Units, see above enum.
|
|
"kind": "A String", # Counter aggregation kind.
|
|
"description": "A String", # Human-readable description of the counter semantics.
|
|
"otherUnits": "A String", # A string referring to the unit type.
|
|
},
|
|
},
|
|
"nameAndKind": { # Basic metadata about a counter. # Counter name and aggregation type.
|
|
"kind": "A String", # Counter aggregation kind.
|
|
"name": "A String", # Name of the counter.
|
|
},
|
|
"integer": { # A representation of an int64, n, that is immune to precision loss when # Integer value for Sum, Max, Min.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
"distribution": { # A metric value representing a distribution. # Distribution data
|
|
"count": { # A representation of an int64, n, that is immune to precision loss when # The count of the number of elements present in the distribution.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
"min": { # A representation of an int64, n, that is immune to precision loss when # The minimum value present in the distribution.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
"max": { # A representation of an int64, n, that is immune to precision loss when # The maximum value present in the distribution.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
"sum": { # A representation of an int64, n, that is immune to precision loss when # Use an int64 since we'd prefer the added precision. If overflow is a common
|
|
# problem we can detect it and use an additional int64 or a double.
|
|
# encoded in JSON.
|
|
"lowBits": 42, # The low order bits: n & 0xffffffff.
|
|
"highBits": 42, # The high order bits, including the sign: n >> 32.
|
|
},
|
|
"histogram": { # Histogram of value counts for a distribution. # (Optional) Histogram of value counts for the distribution.
|
|
#
|
|
# Buckets have an inclusive lower bound and exclusive upper bound and use
|
|
# "1,2,5 bucketing": The first bucket range is from [0,1) and all subsequent
|
|
# bucket boundaries are powers of ten multiplied by 1, 2, or 5. Thus, bucket
|
|
# boundaries are 0, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, ...
|
|
# Negative values are not supported.
|
|
"firstBucketOffset": 42, # Starting index of first stored bucket. The non-inclusive upper-bound of
|
|
# the ith bucket is given by:
|
|
# pow(10,(i-first_bucket_offset)/3) * (1,2,5)[(i-first_bucket_offset)%3]
|
|
"bucketCounts": [ # Counts of values in each bucket. For efficiency, prefix and trailing
|
|
# buckets with count = 0 are elided. Buckets can store the full range of
|
|
# values of an unsigned long, with ULLONG_MAX falling into the 59th bucket
|
|
# with range [1e19, 2e19).
|
|
"A String",
|
|
],
|
|
},
|
|
"sumOfSquares": 3.14, # Use a double since the sum of squares is likely to overflow int64.
|
|
},
|
|
"stringList": { # A metric value representing a list of strings. # List of strings, for Set.
|
|
"elements": [ # Elements of the list.
|
|
"A String",
|
|
],
|
|
},
|
|
},
|
|
],
|
|
"progress": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of reported_progress.
|
|
"position": { # Position defines a position within a collection of data. The value # Obsolete.
|
|
# can be either the end position, a key (used with ordered
|
|
# collections), a byte offset, or a record index.
|
|
"end": True or False, # Position is past all other positions. Also useful for the end
|
|
# position of an unbounded range.
|
|
"recordIndex": "A String", # Position is a record index.
|
|
"byteOffset": "A String", # Position is a byte offset.
|
|
"key": "A String", # Position is a string key, ordered lexicographically.
|
|
"concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position.
|
|
# position. A ConcatPosition can be used by a reader of a source that
|
|
# encapsulates a set of other sources.
|
|
"position": # Object with schema name: Position # Position within the inner source.
|
|
"index": 42, # Index of the inner source.
|
|
},
|
|
"shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED
|
|
# sharding).
|
|
},
|
|
"remainingTime": "A String", # Obsolete.
|
|
"percentComplete": 3.14, # Obsolete.
|
|
},
|
|
"metricUpdates": [ # DEPRECATED in favor of counter_updates.
|
|
{ # Describes the state of a metric.
|
|
"meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
|
|
# This holds the count of the aggregated values and is used in combination
|
|
# with mean_sum above to obtain the actual mean aggregate value.
|
|
# The only possible value type is Long.
|
|
"kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are
|
|
# "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
|
|
# The specified aggregation kind is case-insensitive.
|
|
#
|
|
# If omitted, this is not an aggregated value but instead
|
|
# a single metric sample value.
|
|
"set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only
|
|
# possible value type is a list of Values whose type can be Long, Double,
|
|
# or String, according to the metric's type. All Values in the list must
|
|
# be of the same type.
|
|
"name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
|
|
# metric.
|
|
"origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
|
|
# will be "dataflow" for metrics defined by the Dataflow service or SDK.
|
|
"name": "A String", # Worker-defined metric name.
|
|
"context": { # Zero or more labeled fields which identify the part of the job this
|
|
# metric is associated with, such as the name of a step or collection.
|
|
#
|
|
# For example, built-in counters associated with steps will have
|
|
# context['step'] = <step-name>. Counters associated with PCollections
|
|
# in the SDK will have context['pcollection'] = <pcollection-name>.
|
|
"a_key": "A String",
|
|
},
|
|
},
|
|
"meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
|
|
# This holds the sum of the aggregated values and is used in combination
|
|
# with mean_count below to obtain the actual mean aggregate value.
|
|
# The only possible value types are Long and Double.
|
|
"cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
|
|
# value accumulated since the worker started working on this WorkItem.
|
|
# By default this is false, indicating that this metric is reported
|
|
# as a delta that is not associated with any WorkItem.
|
|
"updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
|
|
# reporting work progress; it will be filled in responses from the
|
|
# metrics API.
|
|
"scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
|
|
# "And", and "Or". The possible value types are Long, Double, and Boolean.
|
|
"internal": "", # Worker-computed aggregate value for internal use by the Dataflow
|
|
# service.
|
|
"gauge": "", # A struct value describing properties of a Gauge.
|
|
# Metrics of gauge type show the value of a metric across time, and is
|
|
# aggregated based on the newest value.
|
|
"distribution": "", # A struct value describing properties of a distribution of numeric values.
|
|
},
|
|
],
|
|
"reportedProgress": { # A progress measurement of a WorkItem by a worker. # The worker's progress through this WorkItem.
|
|
"fractionConsumed": 3.14, # Completion as fraction of the input consumed, from 0.0 (beginning, nothing
|
|
# consumed), to 1.0 (end of the input, entire input consumed).
|
|
"position": { # Position defines a position within a collection of data. The value # A Position within the work to represent a progress.
|
|
# can be either the end position, a key (used with ordered
|
|
# collections), a byte offset, or a record index.
|
|
"end": True or False, # Position is past all other positions. Also useful for the end
|
|
# position of an unbounded range.
|
|
"recordIndex": "A String", # Position is a record index.
|
|
"byteOffset": "A String", # Position is a byte offset.
|
|
"key": "A String", # Position is a string key, ordered lexicographically.
|
|
"concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position.
|
|
# position. A ConcatPosition can be used by a reader of a source that
|
|
# encapsulates a set of other sources.
|
|
"position": # Object with schema name: Position # Position within the inner source.
|
|
"index": 42, # Index of the inner source.
|
|
},
|
|
"shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED
|
|
# sharding).
|
|
},
|
|
"remainingParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the input of this task that remains,
|
|
# (i.e. can be delegated to this task and any new tasks via dynamic
|
|
# splitting). Always at least 1 for non-finished work items and 0 for
|
|
# finished.
|
|
#
|
|
# "Amount of parallelism" refers to how many non-empty parts of the input
|
|
# can be read in parallel. This does not necessarily equal number
|
|
# of records. An input that can be read in parallel down to the
|
|
# individual records is called "perfectly splittable".
|
|
# An example of non-perfectly parallelizable input is a block-compressed
|
|
# file format where a block of records has to be read as a whole,
|
|
# but different blocks can be read in parallel.
|
|
#
|
|
# Examples:
|
|
# * If we are processing record #30 (starting at 1) out of 50 in a perfectly
|
|
# splittable 50-record input, this value should be 21 (20 remaining + 1
|
|
# current).
|
|
# * If we are reading through block 3 in a block-compressed file consisting
|
|
# of 5 blocks, this value should be 3 (since blocks 4 and 5 can be
|
|
# processed in parallel by new tasks via dynamic splitting and the current
|
|
# task remains processing block 3).
|
|
# * If we are reading through the last block in a block-compressed file,
|
|
# or reading or processing the last record in a perfectly splittable
|
|
# input, this value should be 1, because apart from the current task, no
|
|
# additional remainder can be split off.
|
|
# reported by the worker.
|
|
"isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is
|
|
# ignored.
|
|
# Infinite parallelism means the service will assume that the work item
|
|
# can always be split into more non-empty work items by dynamic splitting.
|
|
# This is a work-around for lack of support for infinity by the current
|
|
# JSON-based Java RPC stack.
|
|
"value": 3.14, # Specifies the level of parallelism in case it is finite.
|
|
},
|
|
"consumedParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the portion of input of this task that has
|
|
# already been consumed and is no longer active. In the first two examples
|
|
# above (see remaining_parallelism), the value should be 29 or 2
|
|
# respectively. The sum of remaining_parallelism and consumed_parallelism
|
|
# should equal the total amount of parallelism in this work item. If
|
|
# specified, must be finite.
|
|
# reported by the worker.
|
|
"isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is
|
|
# ignored.
|
|
# Infinite parallelism means the service will assume that the work item
|
|
# can always be split into more non-empty work items by dynamic splitting.
|
|
# This is a work-around for lack of support for infinity by the current
|
|
# JSON-based Java RPC stack.
|
|
"value": 3.14, # Specifies the level of parallelism in case it is finite.
|
|
},
|
|
},
|
|
},
|
|
],
|
|
"location": "A String", # The [regional endpoint]
|
|
# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
|
|
# contains the WorkItem's job.
|
|
}
|
|
|
|
x__xgafv: string, V1 error format.
|
|
Allowed values
|
|
1 - v1 error format
|
|
2 - v2 error format
|
|
|
|
Returns:
|
|
An object of the form:
|
|
|
|
{ # Response from a request to report the status of WorkItems.
|
|
"workItemServiceStates": [ # A set of messages indicating the service-side state for each
|
|
# WorkItem whose status was reported, in the same order as the
|
|
# WorkItemStatus messages in the ReportWorkItemStatusRequest which
|
|
# resulting in this response.
|
|
{ # The Dataflow service's idea of the current state of a WorkItem
|
|
# being processed by a worker.
|
|
"reportStatusInterval": "A String", # New recommended reporting interval.
|
|
"suggestedStopPosition": { # Position defines a position within a collection of data. The value # Obsolete, always empty.
|
|
# can be either the end position, a key (used with ordered
|
|
# collections), a byte offset, or a record index.
|
|
"end": True or False, # Position is past all other positions. Also useful for the end
|
|
# position of an unbounded range.
|
|
"recordIndex": "A String", # Position is a record index.
|
|
"byteOffset": "A String", # Position is a byte offset.
|
|
"key": "A String", # Position is a string key, ordered lexicographically.
|
|
"concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position.
|
|
# position. A ConcatPosition can be used by a reader of a source that
|
|
# encapsulates a set of other sources.
|
|
"position": # Object with schema name: Position # Position within the inner source.
|
|
"index": 42, # Index of the inner source.
|
|
},
|
|
"shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED
|
|
# sharding).
|
|
},
|
|
"leaseExpireTime": "A String", # Time at which the current lease will expire.
|
|
"nextReportIndex": "A String", # The index value to use for the next report sent by the worker.
|
|
# Note: If the report call fails for whatever reason, the worker should
|
|
# reuse this index for subsequent report attempts.
|
|
"harnessData": { # Other data returned by the service, specific to the particular
|
|
# worker harness.
|
|
"a_key": "", # Properties of the object.
|
|
},
|
|
"metricShortId": [ # The short ids that workers should use in subsequent metric updates.
|
|
# Workers should strive to use short ids whenever possible, but it is ok
|
|
# to request the short_id again if a worker lost track of it
|
|
# (e.g. if the worker is recovering from a crash).
|
|
# NOTE: it is possible that the response may have short ids for a subset
|
|
# of the metrics.
|
|
{ # The metric short id is returned to the user alongside an offset into
|
|
# ReportWorkItemStatusRequest
|
|
"shortId": "A String", # The service-generated short identifier for the metric.
|
|
"metricIndex": 42, # The index of the corresponding metric in
|
|
# the ReportWorkItemStatusRequest. Required.
|
|
},
|
|
],
|
|
"splitRequest": { # A suggestion by the service to the worker to dynamically split the WorkItem. # The progress point in the WorkItem where the Dataflow service
|
|
# suggests that the worker truncate the task.
|
|
"fractionConsumed": 3.14, # A fraction at which to split the work item, from 0.0 (beginning of the
|
|
# input) to 1.0 (end of the input).
|
|
"position": { # Position defines a position within a collection of data. The value # A Position at which to split the work item.
|
|
# can be either the end position, a key (used with ordered
|
|
# collections), a byte offset, or a record index.
|
|
"end": True or False, # Position is past all other positions. Also useful for the end
|
|
# position of an unbounded range.
|
|
"recordIndex": "A String", # Position is a record index.
|
|
"byteOffset": "A String", # Position is a byte offset.
|
|
"key": "A String", # Position is a string key, ordered lexicographically.
|
|
"concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position.
|
|
# position. A ConcatPosition can be used by a reader of a source that
|
|
# encapsulates a set of other sources.
|
|
"position": # Object with schema name: Position # Position within the inner source.
|
|
"index": 42, # Index of the inner source.
|
|
},
|
|
"shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED
|
|
# sharding).
|
|
},
|
|
"fractionOfRemainder": 3.14, # The fraction of the remainder of work to split the work item at, from 0.0
|
|
# (split at the current position) to 1.0 (end of the input).
|
|
},
|
|
"suggestedStopPoint": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of split_request.
|
|
"position": { # Position defines a position within a collection of data. The value # Obsolete.
|
|
# can be either the end position, a key (used with ordered
|
|
# collections), a byte offset, or a record index.
|
|
"end": True or False, # Position is past all other positions. Also useful for the end
|
|
# position of an unbounded range.
|
|
"recordIndex": "A String", # Position is a record index.
|
|
"byteOffset": "A String", # Position is a byte offset.
|
|
"key": "A String", # Position is a string key, ordered lexicographically.
|
|
"concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position.
|
|
# position. A ConcatPosition can be used by a reader of a source that
|
|
# encapsulates a set of other sources.
|
|
"position": # Object with schema name: Position # Position within the inner source.
|
|
"index": 42, # Index of the inner source.
|
|
},
|
|
"shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED
|
|
# sharding).
|
|
},
|
|
"remainingTime": "A String", # Obsolete.
|
|
"percentComplete": 3.14, # Obsolete.
|
|
},
|
|
},
|
|
],
|
|
"unifiedWorkerResponse": { # Untranslated bag-of-bytes WorkProgressUpdateResponse for UnifiedWorker.
|
|
"a_key": "", # Properties of the object. Contains field @type with type URL.
|
|
},
|
|
}</pre>
|
|
</div>
|
|
|
|
</body></html> |