@@ -67,6 +67,9 @@ class Document(proto.Message):
67
67
representations use base64.
68
68
69
69
This field is a member of `oneof`_ ``source``.
70
+ docid (str):
71
+ Optional. An internal identifier for
72
+ document. Should be loggable (no PII).
70
73
mime_type (str):
71
74
An IANA published `media type (MIME
72
75
type) <https://www.iana.org/assignments/media-types/media-types.xhtml>`__.
@@ -108,6 +111,12 @@ class Document(proto.Message):
108
111
Parsed layout of the document.
109
112
chunked_document (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument):
110
113
Document chunked based on chunking config.
114
+ blob_assets (MutableSequence[google.cloud.documentai_v1beta3.types.Document.BlobAsset]):
115
+ Optional. The blob assets in this document.
116
+ This is used to store the content of the inline
117
+ blobs in this document, e.g. image bytes, such
118
+ that it can be referenced by other fields in the
119
+ document via asset id.
111
120
"""
112
121
113
122
class ShardInfo (proto .Message ):
@@ -1819,6 +1828,20 @@ class TextChange(proto.Message):
1819
1828
message = "Document.Provenance" ,
1820
1829
)
1821
1830
1831
+ class Annotations (proto .Message ):
1832
+ r"""Represents the annotation of a block or a chunk.
1833
+
1834
+ Attributes:
1835
+ description (str):
1836
+ The description of the content with this
1837
+ annotation.
1838
+ """
1839
+
1840
+ description : str = proto .Field (
1841
+ proto .STRING ,
1842
+ number = 1 ,
1843
+ )
1844
+
1822
1845
class DocumentLayout (proto .Message ):
1823
1846
r"""Represents the parsed layout of a document as a collection of
1824
1847
blocks that the document is divided into.
@@ -1851,11 +1874,17 @@ class DocumentLayoutBlock(proto.Message):
1851
1874
list_block (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutListBlock):
1852
1875
Block consisting of list content/structure.
1853
1876
1877
+ This field is a member of `oneof`_ ``block``.
1878
+ image_block (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutImageBlock):
1879
+ Block consisting of image content.
1880
+
1854
1881
This field is a member of `oneof`_ ``block``.
1855
1882
block_id (str):
1856
1883
ID of the block.
1857
1884
page_span (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutPageSpan):
1858
1885
Page span of the block.
1886
+ bounding_box (google.cloud.documentai_v1beta3.types.BoundingPoly):
1887
+ Identifies the bounding box for the block.
1859
1888
"""
1860
1889
1861
1890
class LayoutPageSpan (proto .Message ):
@@ -2028,6 +2057,74 @@ class LayoutListEntry(proto.Message):
2028
2057
message = "Document.DocumentLayout.DocumentLayoutBlock" ,
2029
2058
)
2030
2059
2060
+ class LayoutImageBlock (proto .Message ):
2061
+ r"""Represents an image type block.
2062
+
2063
+ This message has `oneof`_ fields (mutually exclusive fields).
2064
+ For each oneof, at most one member field can be set at the same time.
2065
+ Setting any member of the oneof automatically clears all other
2066
+ members.
2067
+
2068
+ .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
2069
+
2070
+ Attributes:
2071
+ blob_asset_id (str):
2072
+ Optional. Asset id of the inline image. If set, find the
2073
+ image content in the blob_assets field.
2074
+
2075
+ This field is a member of `oneof`_ ``image_source``.
2076
+ gcs_uri (str):
2077
+ Optional. Google Cloud Storage uri of the
2078
+ image.
2079
+
2080
+ This field is a member of `oneof`_ ``image_source``.
2081
+ data_uri (str):
2082
+ Optional. Data uri of the image. It is composed of four
2083
+ parts: a prefix (data:), a MIME type indicating the type of
2084
+ data, an optional base64 token if non-textual, and the data
2085
+ itself: data:[][;base64],
2086
+
2087
+ This field is a member of `oneof`_ ``image_source``.
2088
+ mime_type (str):
2089
+ Mime type of the image. An IANA published [media type (MIME
2090
+ type)]
2091
+ (https://www.iana.org/assignments/media-types/media-types.xhtml).
2092
+ image_text (str):
2093
+ Text extracted from the image using OCR or
2094
+ alt text describing the image.
2095
+ annotations (google.cloud.documentai_v1beta3.types.Document.Annotations):
2096
+ Annotation of the image block.
2097
+ """
2098
+
2099
+ blob_asset_id : str = proto .Field (
2100
+ proto .STRING ,
2101
+ number = 4 ,
2102
+ oneof = "image_source" ,
2103
+ )
2104
+ gcs_uri : str = proto .Field (
2105
+ proto .STRING ,
2106
+ number = 5 ,
2107
+ oneof = "image_source" ,
2108
+ )
2109
+ data_uri : str = proto .Field (
2110
+ proto .STRING ,
2111
+ number = 6 ,
2112
+ oneof = "image_source" ,
2113
+ )
2114
+ mime_type : str = proto .Field (
2115
+ proto .STRING ,
2116
+ number = 1 ,
2117
+ )
2118
+ image_text : str = proto .Field (
2119
+ proto .STRING ,
2120
+ number = 2 ,
2121
+ )
2122
+ annotations : "Document.Annotations" = proto .Field (
2123
+ proto .MESSAGE ,
2124
+ number = 3 ,
2125
+ message = "Document.Annotations" ,
2126
+ )
2127
+
2031
2128
text_block : "Document.DocumentLayout.DocumentLayoutBlock.LayoutTextBlock" = proto .Field (
2032
2129
proto .MESSAGE ,
2033
2130
number = 2 ,
@@ -2046,6 +2143,12 @@ class LayoutListEntry(proto.Message):
2046
2143
oneof = "block" ,
2047
2144
message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutListBlock" ,
2048
2145
)
2146
+ image_block : "Document.DocumentLayout.DocumentLayoutBlock.LayoutImageBlock" = proto .Field (
2147
+ proto .MESSAGE ,
2148
+ number = 7 ,
2149
+ oneof = "block" ,
2150
+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutImageBlock" ,
2151
+ )
2049
2152
block_id : str = proto .Field (
2050
2153
proto .STRING ,
2051
2154
number = 1 ,
@@ -2055,6 +2158,11 @@ class LayoutListEntry(proto.Message):
2055
2158
number = 5 ,
2056
2159
message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutPageSpan" ,
2057
2160
)
2161
+ bounding_box : geometry .BoundingPoly = proto .Field (
2162
+ proto .MESSAGE ,
2163
+ number = 6 ,
2164
+ message = geometry .BoundingPoly ,
2165
+ )
2058
2166
2059
2167
blocks : MutableSequence [
2060
2168
"Document.DocumentLayout.DocumentLayoutBlock"
@@ -2088,6 +2196,8 @@ class Chunk(proto.Message):
2088
2196
Page headers associated with the chunk.
2089
2197
page_footers (MutableSequence[google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.ChunkPageFooter]):
2090
2198
Page footers associated with the chunk.
2199
+ chunk_fields (MutableSequence[google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.ChunkField]):
2200
+ Chunk fields inside this chunk.
2091
2201
"""
2092
2202
2093
2203
class ChunkPageSpan (proto .Message ):
@@ -2149,6 +2259,112 @@ class ChunkPageFooter(proto.Message):
2149
2259
message = "Document.ChunkedDocument.Chunk.ChunkPageSpan" ,
2150
2260
)
2151
2261
2262
+ class ImageChunkField (proto .Message ):
2263
+ r"""The image chunk field in the chunk.
2264
+
2265
+ This message has `oneof`_ fields (mutually exclusive fields).
2266
+ For each oneof, at most one member field can be set at the same time.
2267
+ Setting any member of the oneof automatically clears all other
2268
+ members.
2269
+
2270
+ .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
2271
+
2272
+ Attributes:
2273
+ blob_asset_id (str):
2274
+ Optional. Asset id of the inline image. If set, find the
2275
+ image content in the blob_assets field.
2276
+
2277
+ This field is a member of `oneof`_ ``image_source``.
2278
+ gcs_uri (str):
2279
+ Optional. Google Cloud Storage uri of the
2280
+ image.
2281
+
2282
+ This field is a member of `oneof`_ ``image_source``.
2283
+ data_uri (str):
2284
+ Optional. Data uri of the image. It is composed of four
2285
+ parts: a prefix (data:), a MIME type indicating the type of
2286
+ data, an optional base64 token if non-textual, and the data
2287
+ itself: data:[][;base64],
2288
+
2289
+ This field is a member of `oneof`_ ``image_source``.
2290
+ annotations (google.cloud.documentai_v1beta3.types.Document.Annotations):
2291
+ Annotation of the image chunk field.
2292
+ """
2293
+
2294
+ blob_asset_id : str = proto .Field (
2295
+ proto .STRING ,
2296
+ number = 1 ,
2297
+ oneof = "image_source" ,
2298
+ )
2299
+ gcs_uri : str = proto .Field (
2300
+ proto .STRING ,
2301
+ number = 2 ,
2302
+ oneof = "image_source" ,
2303
+ )
2304
+ data_uri : str = proto .Field (
2305
+ proto .STRING ,
2306
+ number = 3 ,
2307
+ oneof = "image_source" ,
2308
+ )
2309
+ annotations : "Document.Annotations" = proto .Field (
2310
+ proto .MESSAGE ,
2311
+ number = 4 ,
2312
+ message = "Document.Annotations" ,
2313
+ )
2314
+
2315
+ class TableChunkField (proto .Message ):
2316
+ r"""The table chunk field in the chunk.
2317
+
2318
+ Attributes:
2319
+ annotations (google.cloud.documentai_v1beta3.types.Document.Annotations):
2320
+ Annotation of the table chunk field.
2321
+ """
2322
+
2323
+ annotations : "Document.Annotations" = proto .Field (
2324
+ proto .MESSAGE ,
2325
+ number = 1 ,
2326
+ message = "Document.Annotations" ,
2327
+ )
2328
+
2329
+ class ChunkField (proto .Message ):
2330
+ r"""The chunk field in the chunk. A chunk field could be one of
2331
+ the various types (e.g. image, table) supported.
2332
+
2333
+ This message has `oneof`_ fields (mutually exclusive fields).
2334
+ For each oneof, at most one member field can be set at the same time.
2335
+ Setting any member of the oneof automatically clears all other
2336
+ members.
2337
+
2338
+ .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
2339
+
2340
+ Attributes:
2341
+ image_chunk_field (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.ImageChunkField):
2342
+ The image chunk field in the chunk.
2343
+
2344
+ This field is a member of `oneof`_ ``field_type``.
2345
+ table_chunk_field (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.TableChunkField):
2346
+ The table chunk field in the chunk.
2347
+
2348
+ This field is a member of `oneof`_ ``field_type``.
2349
+ """
2350
+
2351
+ image_chunk_field : "Document.ChunkedDocument.Chunk.ImageChunkField" = (
2352
+ proto .Field (
2353
+ proto .MESSAGE ,
2354
+ number = 1 ,
2355
+ oneof = "field_type" ,
2356
+ message = "Document.ChunkedDocument.Chunk.ImageChunkField" ,
2357
+ )
2358
+ )
2359
+ table_chunk_field : "Document.ChunkedDocument.Chunk.TableChunkField" = (
2360
+ proto .Field (
2361
+ proto .MESSAGE ,
2362
+ number = 2 ,
2363
+ oneof = "field_type" ,
2364
+ message = "Document.ChunkedDocument.Chunk.TableChunkField" ,
2365
+ )
2366
+ )
2367
+
2152
2368
chunk_id : str = proto .Field (
2153
2369
proto .STRING ,
2154
2370
number = 1 ,
@@ -2180,13 +2396,50 @@ class ChunkPageFooter(proto.Message):
2180
2396
number = 6 ,
2181
2397
message = "Document.ChunkedDocument.Chunk.ChunkPageFooter" ,
2182
2398
)
2399
+ chunk_fields : MutableSequence [
2400
+ "Document.ChunkedDocument.Chunk.ChunkField"
2401
+ ] = proto .RepeatedField (
2402
+ proto .MESSAGE ,
2403
+ number = 7 ,
2404
+ message = "Document.ChunkedDocument.Chunk.ChunkField" ,
2405
+ )
2183
2406
2184
2407
chunks : MutableSequence ["Document.ChunkedDocument.Chunk" ] = proto .RepeatedField (
2185
2408
proto .MESSAGE ,
2186
2409
number = 1 ,
2187
2410
message = "Document.ChunkedDocument.Chunk" ,
2188
2411
)
2189
2412
2413
+ class BlobAsset (proto .Message ):
2414
+ r"""Represents a blob asset. It's used to store the content of
2415
+ the inline blob in this document, e.g. image bytes, such that it
2416
+ can be referenced by other fields in the document via asset id.
2417
+
2418
+ Attributes:
2419
+ asset_id (str):
2420
+ Optional. The id of the blob asset.
2421
+ content (bytes):
2422
+ Optional. The content of the blob asset, e.g.
2423
+ image bytes.
2424
+ mime_type (str):
2425
+ The mime type of the blob asset. An IANA published `media
2426
+ type (MIME
2427
+ type) <https://www.iana.org/assignments/media-types/media-types.xhtml>`__.
2428
+ """
2429
+
2430
+ asset_id : str = proto .Field (
2431
+ proto .STRING ,
2432
+ number = 1 ,
2433
+ )
2434
+ content : bytes = proto .Field (
2435
+ proto .BYTES ,
2436
+ number = 2 ,
2437
+ )
2438
+ mime_type : str = proto .Field (
2439
+ proto .STRING ,
2440
+ number = 3 ,
2441
+ )
2442
+
2190
2443
uri : str = proto .Field (
2191
2444
proto .STRING ,
2192
2445
number = 1 ,
@@ -2197,6 +2450,10 @@ class ChunkPageFooter(proto.Message):
2197
2450
number = 2 ,
2198
2451
oneof = "source" ,
2199
2452
)
2453
+ docid : str = proto .Field (
2454
+ proto .STRING ,
2455
+ number = 15 ,
2456
+ )
2200
2457
mime_type : str = proto .Field (
2201
2458
proto .STRING ,
2202
2459
number = 3 ,
@@ -2255,6 +2512,11 @@ class ChunkPageFooter(proto.Message):
2255
2512
number = 18 ,
2256
2513
message = ChunkedDocument ,
2257
2514
)
2515
+ blob_assets : MutableSequence [BlobAsset ] = proto .RepeatedField (
2516
+ proto .MESSAGE ,
2517
+ number = 19 ,
2518
+ message = BlobAsset ,
2519
+ )
2258
2520
2259
2521
2260
2522
class RevisionRef (proto .Message ):
0 commit comments