Performs asynchronous video annotation. Progress and results can be
annotate(body, x__xgafv=None)
Performs asynchronous video annotation. Progress and results can be retrieved through the `google.longrunning.Operations` interface. `Operation.metadata` contains `AnnotateVideoProgress` (progress). `Operation.response` contains `AnnotateVideoResponse` (results). Args: body: object, The request body. (required) The object takes the form of: { # Video annotation request. "videoContext": { # Video context and/or feature-specific parameters. # Additional video context and/or feature-specific parameters. "shotChangeDetectionConfig": { # Config for SHOT_CHANGE_DETECTION. # Config for SHOT_CHANGE_DETECTION. "model": "A String", # Model to use for shot change detection. # Supported values: "builtin/stable" (the default if unset) and # "builtin/latest". }, "textDetectionConfig": { # Config for TEXT_DETECTION. # Config for TEXT_DETECTION. "languageHints": [ # Language hint can be specified if the language to be detected is known a # priori. It can increase the accuracy of the detection. Language hint must # be language code in BCP-47 format. # # Automatic language detection is performed if no hint is provided. "A String", ], "model": "A String", # Model to use for text detection. # Supported values: "builtin/stable" (the default if unset) and # "builtin/latest". }, "speechTranscriptionConfig": { # Config for SPEECH_TRANSCRIPTION. # Config for SPEECH_TRANSCRIPTION. "languageCode": "A String", # *Required* The language of the supplied audio as a # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. # Example: "en-US". # See [Language Support](https://cloud.google.com/speech/docs/languages) # for a list of the currently supported language codes. "filterProfanity": True or False, # *Optional* If set to `true`, the server will attempt to filter out # profanities, replacing all but the initial character in each filtered word # with asterisks, e.g. "f***". If set to `false` or omitted, profanities # won't be filtered out. "enableAutomaticPunctuation": True or False, # *Optional* If 'true', adds punctuation to recognition result hypotheses. # This feature is only available in select languages. Setting this for # requests in other languages has no effect at all. The default 'false' value # does not add punctuation to result hypotheses. NOTE: "This is currently # offered as an experimental service, complimentary to all users. In the # future this may be exclusively available as a premium feature." "enableSpeakerDiarization": True or False, # *Optional* If 'true', enables speaker detection for each recognized word in # the top alternative of the recognition result using a speaker_tag provided # in the WordInfo. # Note: When this is true, we send all the words from the beginning of the # audio for the top alternative in every consecutive responses. # This is done in order to improve our speaker tags as our models learn to # identify the speakers in the conversation over time. "maxAlternatives": 42, # *Optional* Maximum number of recognition hypotheses to be returned. # Specifically, the maximum number of `SpeechRecognitionAlternative` messages # within each `SpeechTranscription`. The server may return fewer than # `max_alternatives`. Valid values are `0`-`30`. A value of `0` or `1` will # return a maximum of one. If omitted, will return a maximum of one. "audioTracks": [ # *Optional* For file formats, such as MXF or MKV, supporting multiple audio # tracks, specify up to two tracks. Default: track 0. 42, ], "diarizationSpeakerCount": 42, # *Optional* # If set, specifies the estimated number of speakers in the conversation. # If not set, defaults to '2'. # Ignored unless enable_speaker_diarization is set to true. "enableWordConfidence": True or False, # *Optional* If `true`, the top result includes a list of words and the # confidence for those words. If `false`, no word-level confidence # information is returned. The default is `false`. "speechContexts": [ # *Optional* A means to provide context to assist the speech recognition. { # Provides "hints" to the speech recognizer to favor specific words and phrases # in the results. "phrases": [ # *Optional* A list of strings containing words and phrases "hints" so that # the speech recognition is more likely to recognize them. This can be used # to improve the accuracy for specific words and phrases, for example, if # specific commands are typically spoken by the user. This can also be used # to add additional words to the vocabulary of the recognizer. See # [usage limits](https://cloud.google.com/speech/limits#content). "A String", ], }, ], }, "segments": [ # Video segments to annotate. The segments may overlap and are not required # to be contiguous or span the whole video. If unspecified, each video is # treated as a single segment. { # Video segment. "endTimeOffset": "A String", # Time-offset, relative to the beginning of the video, # corresponding to the end of the segment (inclusive). "startTimeOffset": "A String", # Time-offset, relative to the beginning of the video, # corresponding to the start of the segment (inclusive). }, ], "labelDetectionConfig": { # Config for LABEL_DETECTION. # Config for LABEL_DETECTION. "labelDetectionMode": "A String", # What labels should be detected with LABEL_DETECTION, in addition to # video-level labels or segment-level labels. # If unspecified, defaults to `SHOT_MODE`. "model": "A String", # Model to use for label detection. # Supported values: "builtin/stable" (the default if unset) and # "builtin/latest". "stationaryCamera": True or False, # Whether the video has been shot from a stationary (i.e. non-moving) camera. # When set to true, might improve detection accuracy for moving objects. # Should be used with `SHOT_AND_FRAME_MODE` enabled. "frameConfidenceThreshold": 3.14, # The confidence threshold we perform filtering on the labels from # frame-level detection. If not set, it is set to 0.4 by default. The valid # range for this threshold is [0.1, 0.9]. Any value set outside of this # range will be clipped. # Note: for best results please follow the default threshold. We will update # the default threshold everytime when we release a new model. "videoConfidenceThreshold": 3.14, # The confidence threshold we perform filtering on the labels from # video-level and shot-level detections. If not set, it is set to 0.3 by # default. The valid range for this threshold is [0.1, 0.9]. Any value set # outside of this range will be clipped. # Note: for best results please follow the default threshold. We will update # the default threshold everytime when we release a new model. }, "explicitContentDetectionConfig": { # Config for EXPLICIT_CONTENT_DETECTION. # Config for EXPLICIT_CONTENT_DETECTION. "model": "A String", # Model to use for explicit content detection. # Supported values: "builtin/stable" (the default if unset) and # "builtin/latest". }, "objectTrackingConfig": { # Config for OBJECT_TRACKING. # Config for OBJECT_TRACKING. "model": "A String", # Model to use for object tracking. # Supported values: "builtin/stable" (the default if unset) and # "builtin/latest". }, }, "features": [ # Requested video annotation features. "A String", ], "inputContent": "A String", # The video data bytes. # If unset, the input video(s) should be specified via `input_uri`. # If set, `input_uri` should be unset. "inputUri": "A String", # Input video location. Currently, only # [Google Cloud Storage](https://cloud.google.com/storage/) URIs are # supported, which must be specified in the following format: # `gs://bucket-id/object-id` (other URI formats return # google.rpc.Code.INVALID_ARGUMENT). For more information, see # [Request URIs](/storage/docs/reference-uris). # A video URI may include wildcards in `object-id`, and thus identify # multiple videos. Supported wildcards: '*' to match 0 or more characters; # '?' to match 1 character. If unset, the input video should be embedded # in the request as `input_content`. If set, `input_content` should be unset. "locationId": "A String", # Optional cloud region where annotation should take place. Supported cloud # regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region # is specified, a region will be determined based on video file location. "outputUri": "A String", # Optional location where the output (in JSON format) should be stored. # Currently, only [Google Cloud Storage](https://cloud.google.com/storage/) # URIs are supported, which must be specified in the following format: # `gs://bucket-id/object-id` (other URI formats return # google.rpc.Code.INVALID_ARGUMENT). For more information, see # [Request URIs](/storage/docs/reference-uris). } x__xgafv: string, V1 error format. Allowed values 1 - v1 error format 2 - v2 error format Returns: An object of the form: { # This resource represents a long-running operation that is the result of a # network API call. "metadata": { # Service-specific metadata associated with the operation. It typically # contains progress information and common metadata such as create time. # Some services might not provide such metadata. Any method that returns a # long-running operation should document the metadata type, if any. "a_key": "", # Properties of the object. Contains field @type with type URL. }, "error": { # The `Status` type defines a logical error model that is suitable for # The error result of the operation in case of failure or cancellation. # different programming environments, including REST APIs and RPC APIs. It is # used by [gRPC](https://github.com/grpc). Each `Status` message contains # three pieces of data: error code, error message, and error details. # # You can find out more about this error model and how to work with it in the # [API Design Guide](https://cloud.google.com/apis/design/errors). "message": "A String", # A developer-facing error message, which should be in English. Any # user-facing error message should be localized and sent in the # google.rpc.Status.details field, or localized by the client. "code": 42, # The status code, which should be an enum value of google.rpc.Code. "details": [ # A list of messages that carry the error details. There is a common set of # message types for APIs to use. { "a_key": "", # Properties of the object. Contains field @type with type URL. }, ], }, "done": True or False, # If the value is `false`, it means the operation is still in progress. # If `true`, the operation is completed, and either `error` or `response` is # available. "response": { # The normal response of the operation in case of success. If the original # method returns no data on success, such as `Delete`, the response is # `google.protobuf.Empty`. If the original method is standard # `Get`/`Create`/`Update`, the response should be the resource. For other # methods, the response should have the type `XxxResponse`, where `Xxx` # is the original method name. For example, if the original method name # is `TakeSnapshot()`, the inferred response type is # `TakeSnapshotResponse`. "a_key": "", # Properties of the object. Contains field @type with type URL. }, "name": "A String", # The server-assigned name, which is only unique within the same service that # originally returns it. If you use the default HTTP mapping, the # `name` should be a resource name ending with `operations/{unique_id}`. }