Skip to content

Commit f1fd11c

Browse files
authored
storage: use s3v4 signature for presigning urls (#2611)
Use V4 ('s3v4') signature version for for all presigning URLs to support backblaze, fixes #2472 - add 'access_addressing_style' to be able to choose virtual/path addressing for access endpoint (default to 'virtual' as before) - fix minio presigning with v4 by using 'path' addressing style for minio - if path matches '/data/' for internal minio bucket, then always use 'path' - also make minio access path '/data/' configurable also simplify running in any namespace with default settings: - don't hardcode 'local-minio.default' - in crawlers namespace, add a 'local-minio' externalName service which maps to the main namespace service.
1 parent 4b1e416 commit f1fd11c

File tree

8 files changed

+56
-10
lines changed

8 files changed

+56
-10
lines changed

backend/btrixcloud/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,6 +1666,7 @@ class S3StorageIn(BaseModel):
16661666
endpoint_url: str
16671667
bucket: str
16681668
access_endpoint_url: Optional[str] = None
1669+
access_addressing_style: Literal["virtual", "path"] = "virtual"
16691670
region: str = ""
16701671

16711672

@@ -1680,6 +1681,7 @@ class S3Storage(BaseModel):
16801681
access_key: str
16811682
secret_key: str
16821683
access_endpoint_url: str
1684+
access_addressing_style: Literal["virtual", "path"] = "virtual"
16831685
region: str = ""
16841686

16851687

backend/btrixcloud/storages.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070

7171

7272
# ============================================================================
73-
# pylint: disable=broad-except,raise-missing-from
73+
# pylint: disable=broad-except,raise-missing-from,too-many-instance-attributes
7474
class StorageOps:
7575
"""All storage handling, download/upload operations"""
7676

@@ -104,6 +104,8 @@ def __init__(self, org_ops, crawl_manager, mdb) -> None:
104104
default_namespace = os.environ.get("DEFAULT_NAMESPACE", "default")
105105
self.frontend_origin = f"{frontend_origin}.{default_namespace}"
106106

107+
self.local_minio_access_path = os.environ.get("LOCAL_MINIO_ACCESS_PATH")
108+
107109
with open(os.environ["STORAGES_JSON"], encoding="utf-8") as fh:
108110
storage_list = json.loads(fh.read())
109111

@@ -158,13 +160,18 @@ def _create_s3_storage(self, storage: dict[str, str]) -> S3Storage:
158160

159161
access_endpoint_url = storage.get("access_endpoint_url") or endpoint_url
160162

163+
addressing_style = storage.get("access_addressing_style", "virtual")
164+
if access_endpoint_url == self.local_minio_access_path:
165+
addressing_style = "path"
166+
161167
return S3Storage(
162168
access_key=storage["access_key"],
163169
secret_key=storage["secret_key"],
164170
region=storage.get("region", ""),
165171
endpoint_url=endpoint_url,
166172
endpoint_no_bucket_url=endpoint_no_bucket_url,
167173
access_endpoint_url=access_endpoint_url,
174+
access_addressing_style=addressing_style,
168175
)
169176

170177
async def add_custom_storage(
@@ -189,6 +196,7 @@ async def add_custom_storage(
189196
endpoint_url=endpoint_url,
190197
endpoint_no_bucket_url=endpoint_no_bucket_url,
191198
access_endpoint_url=storagein.access_endpoint_url or storagein.endpoint_url,
199+
access_addressing_style=storagein.access_addressing_style,
192200
)
193201

194202
try:
@@ -291,9 +299,12 @@ async def get_s3_client(
291299

292300
session = aiobotocore.session.get_session()
293301

294-
config = None
302+
s3 = None
303+
295304
if for_presign and storage.access_endpoint_url != storage.endpoint_url:
296-
config = AioConfig(s3={"addressing_style": "virtual"})
305+
s3 = {"addressing_style": storage.access_addressing_style}
306+
307+
config = AioConfig(signature_version="s3v4", s3=s3)
297308

298309
async with session.create_client(
299310
"s3",
@@ -498,9 +509,12 @@ async def get_presigned_url(
498509
s3storage.access_endpoint_url
499510
and s3storage.access_endpoint_url != s3storage.endpoint_url
500511
):
512+
virtual = s3storage.access_addressing_style == "virtual"
501513
parts = urlsplit(s3storage.endpoint_url)
502514
host_endpoint_url = (
503515
f"{parts.scheme}://{bucket}.{parts.netloc}/{orig_key}"
516+
if virtual
517+
else f"{parts.scheme}://{parts.netloc}/{bucket}/{orig_key}"
504518
)
505519
presigned_url = presigned_url.replace(
506520
host_endpoint_url, s3storage.access_endpoint_url

chart/templates/configmap.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ data:
1414

1515
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
1616

17-
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
17+
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}"
1818

1919
DEFAULT_ORG: "{{ .Values.default_org }}"
2020

@@ -53,6 +53,8 @@ data:
5353

5454
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
5555

56+
LOCAL_MINIO_ACCESS_PATH: "{{ .Values.minio_access_path }}"
57+
5658
STORAGES_JSON: "/ops-configs/storages.json"
5759

5860
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"

chart/templates/frontend.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ spec:
4141
value: {{ .Values.name }}-backend
4242

4343
- name: CRAWLER_FQDN_SUFFIX
44-
value: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
44+
value: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}"
4545

4646
- name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE
4747
value: "1"
@@ -60,7 +60,10 @@ spec:
6060

6161
- name: LOCAL_BUCKET
6262
value: "{{ .Values.minio_local_bucket_name }}"
63-
{{- end }}
63+
64+
- name: LOCAL_ACCESS_PATH
65+
value: "{{ .Values.minio_access_path }}"
66+
{{- end }}
6467

6568
{{- if .Values.inject_extra }}
6669
- name: INJECT_EXTRA

chart/templates/minio.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,23 @@ spec:
136136
{{- end }}
137137
name: minio
138138

139+
---
140+
apiVersion: v1
141+
kind: Service
142+
143+
metadata:
144+
namespace: {{ .Values.crawler_namespace }}
145+
name: local-minio
146+
labels:
147+
app: local-minio
148+
149+
spec:
150+
type: ExternalName
151+
externalName: "local-minio.{{ .Release.Namespace }}{{ .Values.fqdn_suffix }}"
152+
ports:
153+
- port: 9000
154+
155+
139156
{{- if .Values.minio_local_console_port }}
140157
---
141158
apiVersion: v1

chart/values.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,9 @@ minio_pull_policy: "IfNotPresent"
398398

399399
minio_local_bucket_name: &local_bucket_name "btrix-data"
400400

401+
# path for serving from local minio bucket
402+
minio_access_path: &minio_access_path "/data/"
403+
401404
minio_cpu: "10m"
402405
minio_memory: "1024Mi"
403406

@@ -413,8 +416,8 @@ storages:
413416
secret_key: "PASSW0RD"
414417
bucket_name: *local_bucket_name
415418

416-
endpoint_url: "http://local-minio.default:9000/"
417-
access_endpoint_url: "/data/"
419+
endpoint_url: "http://local-minio:9000/"
420+
access_endpoint_url: *minio_access_path
418421

419422

420423
# optional: duration in minutes for WACZ download links to be valid
@@ -495,6 +498,9 @@ signer_memory: "50Mi"
495498
# Other Settings
496499
# =========================================
497500

501+
# default FQDN suffix, shouldn't need to change
502+
fqdn_suffix: .svc.cluster.local
503+
498504
# Optional: configure load balancing annotations
499505
# service:
500506
# annotations:

frontend/00-browsertrix-nginx-init.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ if [ -z "$LOCAL_MINIO_HOST" ]; then
77
echo "no local minio, clearing out minio route"
88
echo "" >/etc/nginx/includes/minio.conf
99
else
10-
echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\""
10+
LOCAL_ACCESS_PATH=$(printf '%s\n' "$LOCAL_ACCESS_PATH" | sed -e 's/[\/&]/\\&/g')
11+
echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\", \$LOCAL_ACCESS_PATH with \"$LOCAL_ACCESS_PATH\""
12+
sed -i "s/\$LOCAL_ACCESS_PATH/$LOCAL_ACCESS_PATH/g" /etc/nginx/includes/minio.conf
1113
sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf
1214
sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf
1315
fi

frontend/minio.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
location /data/ {
1+
location $LOCAL_ACCESS_PATH {
22
proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/;
33
proxy_redirect off;
44
proxy_buffering off;

0 commit comments

Comments
 (0)