Home Download Docs Code Community
     1	/*
     2	Copyright 2017 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	package gphotos
    18	
    19	import (
    20		"context"
    21		"errors"
    22		"io"
    23		"log"
    24		"net/http"
    25		"strings"
    26		"time"
    27	
    28		"golang.org/x/time/rate"
    29		"google.golang.org/api/drive/v3"
    30		"google.golang.org/api/googleapi"
    31	)
    32	
    33	var scopeURLs = []string{drive.DriveReadonlyScope}
    34	
    35	const (
    36		// maximum number of results returned per response page
    37		batchSize = 1000
    38	
    39		// defaultRateLimit is the request rate limiting we start with at the beginning of an importer run.
    40		// It is the default value for the drive API (that can be adjusted in the developers console):
    41		// 1000 queries/100 seconds/user.
    42		// The rate limiting is then dynamically adjusted during the importer run.
    43		defaultRateLimit = rate.Limit(10)
    44	)
    45	
    46	// getUser returns the authenticated Google Drive user's User value,
    47	// containing their name, email address, and "permission ID",
    48	// which is the "The user's ID as visible in Permission resources" according
    49	// to https://developers.google.com/drive/v3/reference/about#resource
    50	// The permission ID becomes the "userID" (AcctAttrUserID) value on the
    51	// account's "importerAccount" permanode.
    52	func getUser(ctx context.Context, client *http.Client) (*drive.User, error) {
    53		srv, err := drive.New(client)
    54		if err != nil {
    55			return nil, err
    56		}
    57		about, err := srv.About.Get().
    58			Context(ctx).
    59			Fields("user(displayName,emailAddress,permissionId)").Do()
    60		if err != nil {
    61			return nil, err
    62		}
    63		return about.User, nil
    64	}
    65	
    66	type downloader struct {
    67		// rate is the download rate limiter.
    68		rate *rate.Limiter
    69	
    70		*drive.Service
    71	}
    72	
    73	// newDownloader returns a downloader with the given http.Client
    74	// to download photos.
    75	//
    76	// The client must be authenticated for drive.DrivePhotosReadonlyScope
    77	// ("https://www.googleapis.com/auth/drive.photos.readonly")..
    78	func newDownloader(client *http.Client) (*downloader, error) {
    79		srv, err := drive.New(client)
    80		if err != nil {
    81			return nil, err
    82		}
    83		return &downloader{
    84			rate:    rate.NewLimiter(defaultRateLimit, 1),
    85			Service: srv,
    86		}, nil
    87	}
    88	
    89	// foreachPhoto runs fn on each photo. If f returns an error, iteration
    90	// stops with that error.
    91	//
    92	// If sinceToken is provided, only photos modified or created after sinceToken are sent.
    93	// Typically, sinceToken is empty on the first importer run,
    94	// and the returned token is saved by the importer,
    95	// to be passed as the sinceToken in the next photos() call.
    96	//
    97	// Returns a new token to watch future changes.
    98	func (dl *downloader) foreachPhoto(ctx context.Context, sinceToken string, fn func(context.Context, *photo) error) (nextToken string, err error) {
    99	
   100		if sinceToken != "" {
   101			return dl.foreachPhotoFromChanges(ctx, sinceToken, fn)
   102		}
   103	
   104		// Get a start page token *before* we enumerate the world, so
   105		// if there are changes during the import, we won't miss
   106		// anything.
   107		var sr *drive.StartPageToken
   108		if err := dl.rateLimit(ctx, func() error {
   109			var err error
   110			sr, err = dl.Service.Changes.GetStartPageToken().Do()
   111			return err
   112		}); err != nil {
   113			return "", err
   114		}
   115		nextToken = sr.StartPageToken
   116		if nextToken == "" {
   117			return "", errors.New("unexpected gdrive Changes.GetStartPageToken response with empty StartPageToken")
   118		}
   119	
   120		if err := dl.foreachPhotoFromScratch(ctx, fn); err != nil {
   121			return "", err
   122		}
   123		return nextToken, nil
   124	}
   125	
   126	const fields = "id,name,size,spaces,mimeType,description,starred,properties,version,webContentLink,createdTime,modifiedTime,originalFilename,imageMediaMetadata(location,time)"
   127	
   128	func (dl *downloader) foreachPhotoFromScratch(ctx context.Context, fn func(context.Context, *photo) error) error {
   129		var token string
   130		for {
   131			select {
   132			case <-ctx.Done():
   133				return ctx.Err()
   134			default:
   135			}
   136	
   137			var r *drive.FileList
   138			if err := dl.rateLimit(ctx, func() error {
   139				var err error
   140				listCall := dl.Service.Files.List().
   141					Context(ctx).
   142					Fields("nextPageToken, files(" + fields + ")").
   143					// If users ran the Picasa importer and they hit the 10000 images limit
   144					// bug, they're missing their most recent photos, so we start by importing
   145					// the most recent ones, since they should already have the oldest ones.
   146					// However, https://developers.google.com/drive/v3/reference/files/list
   147					// states OrderBy does not work for > 1e6 files.
   148					OrderBy("createdTime desc,folder").
   149					// Apparently (as of January 2018) asking for the "photos" space does not return
   150					// anything anymore. So we just ask for all files. Fortunately, we can still
   151					// request the Spaces property of the file, and we can filter out all of the ones
   152					// not within "photos".
   153					Spaces("drive").
   154					PageSize(batchSize).
   155					PageToken(token)
   156				r, err = listCall.Do()
   157				return err
   158			}); err != nil {
   159				return err
   160			}
   161	
   162			logf("got gdrive API response of batch of %d files", len(r.Files))
   163			for _, f := range r.Files {
   164				if f == nil {
   165					// Can this happen? Was in the code before.
   166					logf("unexpected nil entry in gdrive file list response")
   167					continue
   168				}
   169				ph := dl.fileAsPhoto(f)
   170				if ph == nil {
   171					// file is not a photo
   172					continue
   173				}
   174				if err := fn(ctx, ph); err != nil {
   175					return err
   176				}
   177			}
   178			token = r.NextPageToken
   179			if token == "" {
   180				return nil
   181			}
   182		}
   183	}
   184	
   185	func (dl *downloader) foreachPhotoFromChanges(ctx context.Context, sinceToken string, fn func(context.Context, *photo) error) (nextToken string, err error) {
   186		token := sinceToken
   187		for {
   188			select {
   189			case <-ctx.Done():
   190				return "", err
   191			default:
   192			}
   193	
   194			var r *drive.ChangeList
   195			if err := dl.rateLimit(ctx, func() error {
   196				logf("importing changes from token point %q", token)
   197				var err error
   198				r, err = dl.Service.Changes.List(token).
   199					Context(ctx).
   200					Fields("nextPageToken,newStartPageToken, changes(file(" + fields + "))").
   201					// Apparently (as of January 2018) asking for the "photos" space does not return
   202					// anything anymore. So we just ask for all files. Fortunately, we can still
   203					// request the Spaces property of the file, and we can filter out all of the ones
   204					// not within "photos".
   205					Spaces("drive").
   206					PageSize(batchSize).
   207					RestrictToMyDrive(true).
   208					IncludeRemoved(false).Do()
   209				return err
   210			}); err != nil {
   211				return "", err
   212			}
   213			for _, c := range r.Changes {
   214				if c.File == nil {
   215					// Can this happen? Was in the code before.
   216					logf("unexpected nil entry in gdrive changes response")
   217					continue
   218				}
   219				ph := dl.fileAsPhoto(c.File)
   220				if ph == nil {
   221					// file is not a photo
   222					continue
   223				}
   224				if err := fn(ctx, ph); err != nil {
   225					return "", err
   226				}
   227			}
   228			token = r.NextPageToken
   229			if token == "" {
   230				nextToken = r.NewStartPageToken
   231				if nextToken == "" {
   232					return "", errors.New("unexpected gdrive changes response with both NextPageToken and NewStartPageToken empty")
   233				}
   234				return nextToken, nil
   235			}
   236		}
   237	}
   238	
   239	type photo struct {
   240		ID                          string
   241		Name, MimeType, Description string
   242		Starred                     bool
   243		Properties                  map[string]string
   244		WebContentLink              string
   245		CreatedTime, ModifiedTime   time.Time
   246		OriginalFilename            string
   247		Version                     int64
   248		drive.FileImageMediaMetadata
   249	}
   250	
   251	func (dl *downloader) openPhoto(ctx context.Context, photo photo) (io.ReadCloser, error) {
   252		logf("importing media from %v", photo.WebContentLink)
   253		var resp *http.Response
   254		err := dl.rateLimit(ctx, func() error {
   255			var err error
   256			resp, err = dl.Service.Files.Get(photo.ID).Context(ctx).Download()
   257			return err
   258		})
   259		if err != nil {
   260			return nil, err
   261		}
   262		return resp.Body, err
   263	}
   264	
   265	// TODO: works for now since the Spaces for each file are still provided, but it
   266	// probably won't last. So this will have to be rethought.
   267	func inPhotoSpace(f *drive.File) bool {
   268		for _, v := range f.Spaces {
   269			if v == "photos" {
   270				return true
   271			}
   272		}
   273		return false
   274	}
   275	
   276	// fileAsPhoto returns a photo populated with the information found about f,
   277	// or nil if f is not actually a photo from Google Photos.
   278	//
   279	// The returned photo contains only the metadata;
   280	// the content of the photo can be downloaded with dl.openPhoto.
   281	func (dl *downloader) fileAsPhoto(f *drive.File) *photo {
   282		if f == nil {
   283			return nil
   284		}
   285		if f.Size == 0 {
   286			// anything non-binary can't be a photo, so skip it.
   287			return nil
   288		}
   289		if !inPhotoSpace(f) {
   290			// not a photo
   291			return nil
   292		}
   293		p := &photo{
   294			ID:               f.Id,
   295			Name:             f.Name,
   296			Starred:          f.Starred,
   297			Version:          f.Version,
   298			MimeType:         f.MimeType,
   299			Properties:       f.Properties,
   300			Description:      f.Description,
   301			WebContentLink:   f.WebContentLink,
   302			OriginalFilename: f.OriginalFilename,
   303		}
   304		if f.ImageMediaMetadata != nil {
   305			p.FileImageMediaMetadata = *f.ImageMediaMetadata
   306		}
   307		if f.CreatedTime != "" {
   308			p.CreatedTime, _ = time.Parse(time.RFC3339, f.CreatedTime)
   309		}
   310		if f.ModifiedTime != "" {
   311			p.ModifiedTime, _ = time.Parse(time.RFC3339, f.ModifiedTime)
   312		}
   313	
   314		return p
   315	}
   316	
   317	// rateLimit calls f obeying the global Rate limit.
   318	// On "Rate Limit Exceeded" error, it sleeps and tries later.
   319	func (dl *downloader) rateLimit(ctx context.Context, f func() error) error {
   320		const (
   321			msgRateLimitExceeded          = "Rate Limit Exceeded"
   322			msgUserRateLimitExceeded      = "User Rate Limit Exceeded"
   323			msgUserRateLimitExceededShort = "userRateLimitExceeded"
   324		)
   325	
   326		// Ensure a 1 minute try limit.
   327		ctx, cancel := context.WithTimeout(ctx, time.Minute)
   328		defer cancel()
   329		for {
   330			if err := dl.rate.Wait(ctx); err != nil {
   331				log.Printf("gphotos: rate limit failure: %v", err)
   332				return err
   333			}
   334			err := f()
   335			if err == nil {
   336				return nil
   337			}
   338			ge, ok := err.(*googleapi.Error)
   339			if !ok || ge.Code != http.StatusForbidden {
   340				return err
   341			}
   342			if ge.Message == "" {
   343				var ok bool
   344				for _, e := range ge.Errors {
   345					if ok = e.Reason == msgUserRateLimitExceededShort; ok {
   346						break
   347					}
   348				}
   349				// For some cases, googleapi does not parse the returned JSON
   350				// properly, so we have to fall back to check the original text.
   351				//
   352				// Usually this is a "User Rate Limit Exceeded", but that's
   353				// also a "Rate Limit Exceeded", and we're interested just in the
   354				// fact, not the cause.
   355				if !ok && !strings.Contains(ge.Body, msgRateLimitExceeded) {
   356					return err
   357				}
   358			}
   359			// Some arbitrary sleep.
   360			log.Printf("gphotos: sleeping for 5s after 403 error, presumably due to a rate limit")
   361			time.Sleep(5 * time.Second)
   362			log.Printf("gphotos: retrying after sleep...")
   363		}
   364	}
Website layout inspired by memcached.
Content by the authors.