Home Download Docs Code Community
     1	/*
     2	Copyright 2018 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	package s3
    18	
    19	import (
    20		"bytes"
    21		"context"
    22		"encoding/xml"
    23		"fmt"
    24		"io"
    25		"log"
    26		"net/http"
    27		"net/url"
    28		"regexp"
    29		"strings"
    30	
    31		"github.com/aws/aws-sdk-go/aws"
    32		"github.com/aws/aws-sdk-go/aws/awserr"
    33		"github.com/aws/aws-sdk-go/aws/client"
    34		"github.com/aws/aws-sdk-go/aws/endpoints"
    35		"github.com/aws/aws-sdk-go/aws/request"
    36		"github.com/aws/aws-sdk-go/service/s3"
    37		"github.com/aws/aws-sdk-go/service/s3/s3iface"
    38		"github.com/aws/aws-sdk-go/service/s3/s3manager"
    39	)
    40	
    41	type bucketInfo struct {
    42		endpoint string
    43		region   string
    44		isAWS    bool
    45	}
    46	
    47	// normalizeBucketLocation determines the best endpoint and region to use for
    48	// the given endpoint + bucket. It performs preflight checks to determine the
    49	// appropriate region and endpoint.
    50	// This is required for backwards compatibility. Normally, using
    51	// s3manager.GetBucketRegion would be sufficient for common mistakes, but the
    52	// initial implementation of the s3 blobserver also allowed inputing the s3
    53	// website hostname of a bucket to configure it.
    54	// The AWS SDK provides no way to determine the correct bucket+endpoint from
    55	// this url because, frankly, it's not meant to be used in the way this
    56	// blobserver initially did (where it's punned on to be both an api endpoint
    57	// and the s3-website hostname).
    58	//
    59	// For a concrete example, this bucket might be given an input of
    60	// "test.s3-us-west-1.amazonaws.com", and it would return
    61	// endpoint=s3.us-west-1.amazonaws.com, isAWS=true, region=us-west-1 (assuming,
    62	// of course, the bucket is in us-west-1).
    63	func normalizeBucketLocation(ctx context.Context, cfg client.ConfigProvider, endpoint string, bucket string, configRegion string) (bucketInfo, error) {
    64		if strings.HasPrefix(endpoint, "https://") || strings.HasPrefix(endpoint, "http://") {
    65			return bucketInfo{}, fmt.Errorf("invalid s3 endpoint: must not include uri scheme")
    66		}
    67	
    68		svc := s3.New(cfg)
    69		endpoint, region, err := determineEndpoint(ctx, svc, endpoint, bucket, configRegion)
    70		if err != nil {
    71			return bucketInfo{}, err
    72		}
    73		if region != "" {
    74			svc.Config.WithRegion(region)
    75		}
    76		isAWS, endpoint, err := endpointIsOfficial(endpoint)
    77		if err != nil {
    78			return bucketInfo{}, err
    79		}
    80		// if isAWS is false, the target also supports AWS s3 API
    81		if !isAWS {
    82			return bucketInfo{
    83				endpoint: endpoint,
    84				isAWS:    isAWS,
    85				region:   region,
    86			}, nil
    87		}
    88		// the endpoint should be corrected before being used to determine a region
    89		// or else the region request can fail spuriously
    90		svc.Config.WithEndpoint(endpoint)
    91		region, err = s3manager.GetBucketRegion(ctx, cfg, bucket, region)
    92		// ignore errors if it's not an official endpoint since it's not unusual for
    93		// non-AWS S3 endpoints to not support the get bucket location API
    94		if isAWS && err != nil {
    95			return bucketInfo{}, err
    96		}
    97		return bucketInfo{
    98			endpoint: endpoint,
    99			isAWS:    isAWS,
   100			region:   region,
   101		}, nil
   102	}
   103	
   104	// determineEndpoint makes a preflight request to AWS against the provided
   105	// endpoint+bucket+region to see if they ask us to use a different endpoint
   106	// instead.
   107	// This can occur if either the endpoint, region, or pair of them is incorrect.
   108	// It returns the endpoint and region (in that order) that should be used
   109	// according to AWS.
   110	func determineEndpoint(ctx context.Context, svc s3iface.S3API, endpoint, bucket, region string) (string, string, error) {
   111		req, _ := svc.ListObjectsV2Request(&s3.ListObjectsV2Input{
   112			Bucket:  &bucket,
   113			MaxKeys: aws.Int64(1),
   114		})
   115		if region != "" {
   116			req.ClientInfo.SigningRegion = region
   117		}
   118		req.Config.S3ForcePathStyle = aws.Bool(true)
   119		req.DisableFollowRedirects = true
   120		req.SetContext(ctx)
   121	
   122		var determinedEndpoint string
   123		req.Handlers.UnmarshalError.PushFront(func(r *request.Request) {
   124			if r.HTTPResponse.StatusCode != http.StatusMovedPermanently {
   125				return
   126			}
   127			var b bytes.Buffer
   128			if _, err := io.Copy(&b, r.HTTPResponse.Body); err != nil {
   129				r.Error = fmt.Errorf("error reading body: %v", err)
   130				return
   131			}
   132	
   133			type endpointErr struct {
   134				Endpoint string `xml:"Endpoint"`
   135			}
   136	
   137			var epErr endpointErr
   138			err := xml.NewDecoder(&b).Decode(&epErr)
   139			if err != nil {
   140				r.Error = err
   141				return
   142			}
   143			determinedEndpoint = epErr.Endpoint
   144			r.HTTPResponse.Body = io.NopCloser(&b)
   145		})
   146		err := req.Send()
   147		if determinedEndpoint == "" && err != nil {
   148			if region == "" {
   149				// only recurse once; region == "" only happens with a call depth of 1 here
   150				if newRegion := regionFromMalformedAuthHeaderError(err); newRegion != "" {
   151					// retry with the correct region
   152					return determineEndpoint(ctx, svc, endpoint, bucket, newRegion)
   153				}
   154			}
   155			return "", "", fmt.Errorf("s3: could not determine endpoint: %v", err)
   156		}
   157		// this indicates the UnmarshalError handler wasn't called, and since the
   158		// above branch didn't happen there wasn't an error. That means our current
   159		// endpoint + region combo works fine.
   160		if determinedEndpoint == "" {
   161			return endpoint, region, nil
   162		}
   163		// the 'Endpoint' object needlessly includes the bucket name at the beginning
   164		determinedEndpoint = strings.TrimPrefix(determinedEndpoint, bucket+".")
   165		return determinedEndpoint, region, nil
   166	}
   167	
   168	// endpointIsOfficial determines if the endpoint is an aws one.
   169	// If it is an aws one, it will return the aws canonical endpoint as well,
   170	// otherwise it will return the endpoint unmodified.
   171	// For example, "s3.amazonaws.com" is the canonical us-east-1 endpoint,
   172	// 's3.us-west-2.amazonaws.com' is the canonical us-west-2 endpoint, and so on.
   173	// There are additional official endpoints, such as
   174	// 's3-us-west-2.amazonaws.com', but these endpoints are not exposed by the
   175	// SDK's public interfaces, so we can't test against those.
   176	// For more information on these endpoints, see AWS's list:
   177	// https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
   178	// In any case where a user has one of these endpoints set, it will not be
   179	// recognized as an AWS endpoint correctly, but should still function just fine
   180	// since this detection is for the purpose of relaxing some constraints on
   181	// non-AWS endpoints, so it's perfectly fine to have false negatives.
   182	func endpointIsOfficial(endpoint string) (bool, string, error) {
   183		for _, partition := range endpoints.DefaultPartitions() {
   184			for _, region := range partition.Regions() {
   185				s3Endpoint, err := region.ResolveEndpoint(endpoints.S3ServiceID)
   186				if err != nil {
   187					// S3 isn't available in this region yet; unlikely to ever happen
   188					continue
   189				}
   190				p, err := url.Parse(s3Endpoint.URL)
   191				if err != nil {
   192					return false, endpoint, err
   193				}
   194	
   195				if strings.HasSuffix(endpoint, p.Host) {
   196					return true, p.Host, nil
   197				}
   198			}
   199		}
   200		return false, endpoint, nil
   201	}
   202	
   203	var malformedAuthHeaderMessageRegexp = regexp.MustCompile("region '[^']+' is wrong; expecting '([^']+)'")
   204	
   205	// backwards compatibility: perkeep used to use v2 signing. v2 signing knows
   206	// nothing about regions, so users could configure their perkeep instance with
   207	// region = us-east-1, but endpoint = us-west-2.s3.amazonaws.com, and things
   208	// would work before.
   209	// In an attempt to retain that functionality, we parse the error message
   210	// telling us we hit the wrong region and auto-correct it.
   211	func regionFromMalformedAuthHeaderError(err error) string {
   212		if aerr, ok := err.(awserr.Error); ok && aerr.Code() == "AuthorizationHeaderMalformed" {
   213			matches := malformedAuthHeaderMessageRegexp.FindStringSubmatch(aerr.Message())
   214			if len(matches) == 2 {
   215				return matches[1]
   216			}
   217			log.Printf("s3: got AuthorizationHeaderMalformed, but couldn't parse message: %v", aerr.Message())
   218		}
   219		return ""
   220	}
Website layout inspired by memcached.
Content by the authors.