     1	/*
     2	Copyright 2014 The Perkeep Authors
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     8	     http://www.apache.org/licenses/LICENSE-2.0
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    17	// Package picasa implements an importer for picasa.com accounts.
    18	package picasa // import "perkeep.org/pkg/importer/picasa"
    20	// TODO: removing camliPath from gallery permanode when pic deleted from gallery
    22	import (
    23		"context"
    24		"errors"
    25		"fmt"
    26		"io"
    27		"log"
    28		"net/http"
    29		"net/url"
    30		"os"
    31		"strconv"
    32		"strings"
    33		"time"
    35		"github.com/tgulacsi/picago"
    36		"go4.org/ctxutil"
    37		"go4.org/syncutil"
    38		"golang.org/x/oauth2"
    39		"golang.org/x/oauth2/google"
    40		"perkeep.org/internal/httputil"
    41		"perkeep.org/pkg/blob"
    42		"perkeep.org/pkg/importer"
    43		"perkeep.org/pkg/schema"
    44		"perkeep.org/pkg/schema/nodeattr"
    45		"perkeep.org/pkg/search"
    46	)
    48	const (
    49		scopeURL = "https://picasaweb.google.com/data/"
    51		// runCompleteVersion is a cache-busting version number of the
    52		// importer code. It should be incremented whenever the
    53		// behavior of this importer is updated enough to warrant a
    54		// complete run.  Otherwise, if the importer runs to
    55		// completion, this version number is recorded on the account
    56		// permanode and subsequent importers can stop early.
    57		runCompleteVersion = "4"
    59		// attrPicasaId is used for both picasa photo IDs and gallery IDs.
    60		attrPicasaId = "picasaId"
    62		// acctAttrOAuthToken stores access + " " + refresh + " " + expiry
    63		// See encodeToken and decodeToken.
    64		acctAttrOAuthToken = "oauthToken"
    66		// AttrMediaURL is an attribute set on each picasa photo permanode. It
    67		// is the public URL for fetching the contents of the photo file.
    68		AttrMediaURL = "picasaMediaURL"
    69	)
    71	var (
    72		_ importer.Importer            = imp{}
    73		_ importer.ImporterSetupHTMLer = imp{}
    74	)
    76	func init() {
    77		importer.Register("picasa", imp{})
    78	}
    80	// imp is the implementation of the Picasa importer.
    81	type imp struct {
    82		importer.OAuth2
    83	}
    85	func (imp) Properties() importer.Properties {
    86		return importer.Properties{
    87			Title:               "Google Photos (via Picasa API)",
    88			Description:         "import your photos from Google Photos. (limited to 10,000 photos per Google Photos API bug for now)",
    89			SupportsIncremental: true,
    90			NeedsAPIKey:         true,
    91		}
    92	}
    94	type userInfo struct {
    95		ID   string // numeric picasa user ID ("11583474931002155675")
    96		Name string // "Jane Smith"
    97	}
    99	func (imp) getUserInfo(ctx context.Context) (*userInfo, error) {
   100		u, err := picago.GetUser(ctxutil.Client(ctx), "default")
   101		if err != nil {
   102			return nil, err
   103		}
   104		return &userInfo{ID: u.ID, Name: u.Name}, nil
   105	}
   107	func (imp) IsAccountReady(acctNode *importer.Object) (ok bool, err error) {
   108		if acctNode.Attr(importer.AcctAttrUserID) != "" && acctNode.Attr(acctAttrOAuthToken) != "" {
   109			return true, nil
   110		}
   111		return false, nil
   112	}
   114	func (im imp) SummarizeAccount(acct *importer.Object) string {
   115		ok, err := im.IsAccountReady(acct)
   116		if err != nil || !ok {
   117			return ""
   118		}
   119		if acct.Attr(importer.AcctAttrGivenName) == "" && acct.Attr(importer.AcctAttrFamilyName) == "" {
   120			return fmt.Sprintf("userid %s", acct.Attr(importer.AcctAttrUserID))
   121		}
   122		return fmt.Sprintf("userid %s (%s %s)",
   123			acct.Attr(importer.AcctAttrUserID),
   124			acct.Attr(importer.AcctAttrGivenName),
   125			acct.Attr(importer.AcctAttrFamilyName))
   126	}
   128	func (im imp) ServeSetup(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) error {
   129		oauthConfig, err := im.auth(ctx)
   130		if err == nil {
   131			// we will get back this with the token, so use it for preserving account info
   132			state := "acct:" + ctx.AccountNode.PermanodeRef().String()
   133			// AccessType needs to be "offline", as the user is not here all the time;
   134			// ApprovalPrompt needs to be "force" to be able to get a RefreshToken
   135			// everytime, even for Re-logins, too.
   136			//
   137			// Source: https://developers.google.com/youtube/v3/guides/authentication#server-side-apps
   138			http.Redirect(w, r, oauthConfig.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.ApprovalForce), http.StatusFound)
   139		}
   140		return err
   141	}
   143	// CallbackURLParameters returns the needed callback parameters - empty for Google Picasa.
   144	func (im imp) CallbackURLParameters(acctRef blob.Ref) url.Values {
   145		return url.Values{}
   146	}
   148	func (im imp) ServeCallback(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) {
   149		oauthConfig, err := im.auth(ctx)
   150		if err != nil {
   151			httputil.ServeError(w, r, fmt.Errorf("Error getting oauth config: %v", err))
   152			return
   153		}
   155		if r.Method != "GET" {
   156			http.Error(w, "Expected a GET", http.StatusBadRequest)
   157			return
   158		}
   159		code := r.FormValue("code")
   160		if code == "" {
   161			http.Error(w, "Expected a code", http.StatusBadRequest)
   162			return
   163		}
   165		token, err := oauthConfig.Exchange(ctx, code)
   166		if err != nil {
   167			log.Printf("importer/picasa: token exchange error: %v", err)
   168			httputil.ServeError(w, r, fmt.Errorf("token exchange error: %v", err))
   169			return
   170		}
   172		log.Printf("importer/picasa: got exchange token.")
   173		picagoCtx := context.WithValue(ctx, ctxutil.HTTPClient, oauthConfig.Client(ctx, token))
   175		userInfo, err := im.getUserInfo(picagoCtx)
   176		if err != nil {
   177			log.Printf("Couldn't get username: %v", err)
   178			httputil.ServeError(w, r, fmt.Errorf("can't get username: %v", err))
   179			return
   180		}
   182		if err := ctx.AccountNode.SetAttrs(
   183			importer.AcctAttrUserID, userInfo.ID,
   184			importer.AcctAttrName, userInfo.Name,
   185			acctAttrOAuthToken, encodeToken(token),
   186		); err != nil {
   187			httputil.ServeError(w, r, fmt.Errorf("Error setting attribute: %v", err))
   188			return
   189		}
   190		http.Redirect(w, r, ctx.AccountURL(), http.StatusFound)
   191	}
   193	// encodeToken encodes the oauth2.Token as
   194	// AccessToken + " " + RefreshToken + " " + Expiry.Unix()
   195	func encodeToken(token *oauth2.Token) string {
   196		if token == nil {
   197			return ""
   198		}
   199		var seconds int64
   200		if !token.Expiry.IsZero() {
   201			seconds = token.Expiry.Unix()
   202		}
   203		return token.AccessToken + " " + token.RefreshToken + " " + strconv.FormatInt(seconds, 10)
   204	}
   206	// decodeToken parses an access token, refresh token, and optional
   207	// expiry unix timestamp separated by spaces into an oauth2.Token.
   208	// It returns as much as it can.
   209	func decodeToken(encoded string) *oauth2.Token {
   210		t := new(oauth2.Token)
   211		f := strings.Fields(encoded)
   212		if len(f) > 0 {
   213			t.AccessToken = f[0]
   214		}
   215		if len(f) > 1 {
   216			t.RefreshToken = f[1]
   217		}
   218		if len(f) > 2 && f[2] != "0" {
   219			sec, err := strconv.ParseInt(f[2], 10, 64)
   220			if err == nil {
   221				t.Expiry = time.Unix(sec, 0)
   222			}
   223		}
   224		return t
   225	}
   227	func (im imp) auth(ctx *importer.SetupContext) (*oauth2.Config, error) {
   228		clientID, secret, err := ctx.Credentials()
   229		if err != nil {
   230			return nil, err
   231		}
   232		conf := &oauth2.Config{
   233			Endpoint:     google.Endpoint,
   234			RedirectURL:  ctx.CallbackURL(),
   235			ClientID:     clientID,
   236			ClientSecret: secret,
   237			Scopes:       []string{scopeURL},
   238		}
   239		return conf, nil
   240	}
   242	func (imp) AccountSetupHTML(host *importer.Host) string {
   243		// Picasa doesn't allow a path in the origin. Remove it.
   244		origin := host.ImporterBaseURL()
   245		if u, err := url.Parse(origin); err == nil {
   246			u.Path = ""
   247			origin = u.String()
   248		}
   250		callback := host.ImporterBaseURL() + "picasa/callback"
   251		gphotosURL := host.ImporterBaseURL() + "gphotos"
   252		return fmt.Sprintf(`
   253	<h1>Configuring Picasa</h1>
   254	<p>Please note that because of a bug in the Picasa API, you cannot retrieve more than 10000 photos. If you have more than 10000 photos, you should use the <a href='%s'>Google Photos importer</a> instead.</p>
   255	<p>Visit <a href='https://console.developers.google.com/'>https://console.developers.google.com/</a>
   256	and click <b>"Create Project"</b>.</p>
   257	<p>Then under "APIs & Auth" in the left sidebar, click on "Credentials", then click the button <b>"Create new Client ID"</b>.</p>
   258	<p>Use the following settings:</p>
   259	<ul>
   260	  <li>Web application</li>
   261	  <li>Authorized JavaScript origins: <b>%s</b></li>
   262	  <li>Authorized Redirect URI: <b>%s</b></li>
   263	</ul>
   264	<p>Click "Create Client ID".  Copy the "Client ID" and "Client Secret" into the boxes above.</p>
   265	`, gphotosURL, origin, callback)
   266	}
   268	// A run is our state for a given run of the importer.
   269	type run struct {
   270		*importer.RunContext
   271		incremental bool // whether we've completed a run in the past
   272		photoGate   *syncutil.Gate
   273	}
   275	var forceFullImport, _ = strconv.ParseBool(os.Getenv("CAMLI_PICASA_FULL_IMPORT"))
   277	func (imp) Run(rctx *importer.RunContext) error {
   278		clientID, secret, err := rctx.Credentials()
   279		if err != nil {
   280			return err
   281		}
   282		acctNode := rctx.AccountNode()
   284		ocfg := &oauth2.Config{
   285			Endpoint:     google.Endpoint,
   286			ClientID:     clientID,
   287			ClientSecret: secret,
   288			Scopes:       []string{scopeURL},
   289		}
   291		token := decodeToken(acctNode.Attr(acctAttrOAuthToken))
   292		baseCtx := rctx.Context()
   293		ctx := context.WithValue(baseCtx, ctxutil.HTTPClient, ocfg.Client(baseCtx, token))
   295		root := rctx.RootNode()
   296		if root.Attr(nodeattr.Title) == "" {
   297			if err := root.SetAttr(
   298				nodeattr.Title,
   299				fmt.Sprintf("%s - Google Photos", acctNode.Attr(importer.AcctAttrName)),
   300			); err != nil {
   301				return err
   302			}
   303		}
   305		r := &run{
   306			RunContext:  rctx,
   307			incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion,
   308			photoGate:   syncutil.NewGate(3),
   309		}
   310		if err := r.importAlbums(ctx); err != nil {
   311			return err
   312		}
   314		if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil {
   315			return err
   316		}
   318		return nil
   319	}
   321	func (r *run) importAlbums(ctx context.Context) error {
   322		albums, err := picago.GetAlbums(ctxutil.Client(ctx), "default")
   323		if err != nil {
   324			return fmt.Errorf("importAlbums: error listing albums: %v", err)
   325		}
   326		albumsNode, err := r.getTopLevelNode("albums", "Albums")
   327		for _, album := range albums {
   328			select {
   329			case <-ctx.Done():
   330				return ctx.Err()
   331			default:
   332			}
   333			if err := r.importAlbum(ctx, albumsNode, album); err != nil {
   334				return fmt.Errorf("picasa importer: error importing album %s: %v", album, err)
   335			}
   336		}
   337		return nil
   338	}
   340	func (r *run) importAlbum(ctx context.Context, albumsNode *importer.Object, album picago.Album) (ret error) {
   341		if album.ID == "" {
   342			return errors.New("album has no ID")
   343		}
   344		albumNode, err := albumsNode.ChildPathObject(album.ID)
   345		if err != nil {
   346			return fmt.Errorf("importAlbum: error listing album: %v", err)
   347		}
   349		dateMod := schema.RFC3339FromTime(album.Updated)
   351		// Data reference: https://developers.google.com/picasa-web/docs/2.0/reference
   352		// TODO(tgulacsi): add more album info
   353		changes, err := albumNode.SetAttrs2(
   354			attrPicasaId, album.ID,
   355			nodeattr.Type, "picasaweb.google.com:album",
   356			nodeattr.Title, album.Title,
   357			nodeattr.DatePublished, schema.RFC3339FromTime(album.Published),
   358			nodeattr.LocationText, album.Location,
   359			nodeattr.Description, album.Description,
   360			nodeattr.URL, album.URL,
   361		)
   362		if err != nil {
   363			return fmt.Errorf("error setting album attributes: %v", err)
   364		}
   365		if !changes && r.incremental && albumNode.Attr(nodeattr.DateModified) == dateMod {
   366			return nil
   367		}
   368		defer func() {
   369			// Don't update DateModified on the album node until
   370			// we've successfully imported all the photos.
   371			if ret == nil {
   372				ret = albumNode.SetAttr(nodeattr.DateModified, dateMod)
   373			}
   374		}()
   376		log.Printf("Importing album %v: %v/%v (published %v, updated %v)", album.ID, album.Name, album.Title, album.Published, album.Updated)
   378		// TODO(bradfitz): GetPhotos does multiple HTTP requests to
   379		// return a slice of all photos. My "InstantUpload/Auto
   380		// Backup" album has 6678 photos (and growing) and this
   381		// currently takes like 40 seconds. Fix.
   382		photos, err := picago.GetPhotos(ctxutil.Client(ctx), "default", album.ID)
   383		if err != nil {
   384			return err
   385		}
   387		log.Printf("Importing %d photos from album %q (%s)", len(photos), albumNode.Attr(nodeattr.Title),
   388			albumNode.PermanodeRef())
   390		var grp syncutil.Group
   391		for i := range photos {
   392			select {
   393			case <-ctx.Done():
   394				return ctx.Err()
   395			default:
   396			}
   397			photo := photos[i]
   398			r.photoGate.Start()
   399			grp.Go(func() error {
   400				defer r.photoGate.Done()
   401				return r.updatePhotoInAlbum(ctx, albumNode, photo)
   402			})
   403		}
   404		return grp.Err()
   405	}
   407	func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) {
   408		if photo.ID == "" {
   409			return errors.New("photo has no ID")
   410		}
   412		getMediaBytes := func() (io.ReadCloser, error) {
   413			log.Printf("Importing media from %v", photo.URL)
   414			resp, err := ctxutil.Client(ctx).Get(photo.URL)
   415			if err != nil {
   416				return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err)
   417			}
   418			if resp.StatusCode != http.StatusOK {
   419				resp.Body.Close()
   420				return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode)
   421			}
   422			return resp.Body, nil
   423		}
   425		var fileRefStr string
   426		idFilename := photo.ID + "-" + photo.Filename
   427		photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) {
   428			h := blob.NewHash()
   429			rc, err := getMediaBytes()
   430			if err != nil {
   431				return nil, err
   432			}
   433			fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, io.TeeReader(rc, h))
   434			if err != nil {
   435				return nil, err
   436			}
   437			fileRefStr = fileRef.String()
   438			wholeRef := blob.RefFromHash(h)
   439			if pn, err := findExistingPermanode(r.Context(), r.Host.Searcher(), wholeRef); err == nil {
   440				return r.Host.ObjectFromRef(pn)
   441			}
   442			return r.Host.NewObject()
   443		})
   444		if err != nil {
   445			return err
   446		}
   448		if fileRefStr == "" {
   449			fileRefStr = photoNode.Attr(nodeattr.CamliContent)
   450			// Only re-download the source photo if its URL has changed.
   451			// Empirically this seems to work: cropping a photo in the
   452			// photos.google.com UI causes its URL to change. And it makes
   453			// sense, looking at the ugliness of the URLs with all their
   454			// encoded/signed state.
   455			if !mediaURLsEqual(photoNode.Attr(AttrMediaURL), photo.URL) {
   456				rc, err := getMediaBytes()
   457				if err != nil {
   458					return err
   459				}
   460				fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, rc)
   461				rc.Close()
   462				if err != nil {
   463					return err
   464				}
   465				fileRefStr = fileRef.String()
   466			}
   467		}
   469		title := strings.TrimSpace(photo.Description)
   470		if strings.Contains(title, "\n") {
   471			title = title[:strings.Index(title, "\n")]
   472		}
   473		if title == "" && schema.IsInterestingTitle(photo.Filename) {
   474			title = photo.Filename
   475		}
   477		// TODO(tgulacsi): add more attrs (comments ?)
   478		// for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork
   479		attrs := []string{
   480			nodeattr.CamliContent, fileRefStr,
   481			attrPicasaId, photo.ID,
   482			nodeattr.Title, title,
   483			nodeattr.Description, photo.Description,
   484			nodeattr.LocationText, photo.Location,
   485			nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated),
   486			nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published),
   487			nodeattr.URL, photo.PageURL,
   488		}
   489		if photo.Latitude != 0 || photo.Longitude != 0 {
   490			attrs = append(attrs,
   491				nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude),
   492				nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude),
   493			)
   494		}
   495		if err := photoNode.SetAttrs(attrs...); err != nil {
   496			return err
   497		}
   498		if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil {
   499			return err
   500		}
   501		if photo.Position > 0 {
   502			if err := albumNode.SetAttr(
   503				nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1),
   504				photoNode.PermanodeRef().String()); err != nil {
   505				return err
   506			}
   507		}
   509		// Do this last, after we're sure the "camliContent" attribute
   510		// has been saved successfully, because this is the one that
   511		// causes us to do it again in the future or not.
   512		if err := photoNode.SetAttrs(AttrMediaURL, photo.URL); err != nil {
   513			return err
   514		}
   515		return nil
   516	}
   518	var testTopLevelNode *importer.Object
   520	func (r *run) getTopLevelNode(path string, title string) (*importer.Object, error) {
   521		if testTopLevelNode != nil {
   522			return testTopLevelNode, nil
   523		}
   524		childObject, err := r.RootNode().ChildPathObject(path)
   525		if err != nil {
   526			return nil, err
   527		}
   529		if err := childObject.SetAttr(nodeattr.Title, title); err != nil {
   530			return nil, err
   531		}
   532		return childObject, nil
   533	}
   535	var sensitiveAttrs = []string{
   536		nodeattr.Type,
   537		attrPicasaId,
   538		nodeattr.Title,
   539		nodeattr.DateModified,
   540		nodeattr.DatePublished,
   541		nodeattr.Latitude,
   542		nodeattr.Longitude,
   543		nodeattr.Description,
   544	}
   546	// findExistingPermanode finds an existing permanode that has a
   547	// camliContent pointing to a file with the provided wholeRef and
   548	// doesn't have any conflicting attributes that would prevent the
   549	// picasa importer from re-using that permanode for its own use.
   550	func findExistingPermanode(ctx context.Context, qs search.QueryDescriber, wholeRef blob.Ref) (pn blob.Ref, err error) {
   551		res, err := qs.Query(ctx, &search.SearchQuery{
   552			Constraint: &search.Constraint{
   553				Permanode: &search.PermanodeConstraint{
   554					Attr: "camliContent",
   555					ValueInSet: &search.Constraint{
   556						File: &search.FileConstraint{
   557							WholeRef: wholeRef,
   558						},
   559					},
   560				},
   561			},
   562			Describe: &search.DescribeRequest{
   563				Depth: 1,
   564			},
   565		})
   566		if err != nil {
   567			return
   568		}
   569		if res.Describe == nil {
   570			return pn, os.ErrNotExist
   571		}
   572	Res:
   573		for _, resBlob := range res.Blobs {
   574			br := resBlob.Blob
   575			desBlob, ok := res.Describe.Meta[br.String()]
   576			if !ok || desBlob.Permanode == nil {
   577				continue
   578			}
   579			attrs := desBlob.Permanode.Attr
   580			for _, attr := range sensitiveAttrs {
   581				if attrs.Get(attr) != "" {
   582					continue Res
   583				}
   584			}
   585			return br, nil
   586		}
   587		return pn, os.ErrNotExist
   588	}
   590	func mediaURLsEqual(a, b string) bool {
   591		const sub = ".googleusercontent.com/"
   592		ai := strings.Index(a, sub)
   593		bi := strings.Index(b, sub)
   594		if ai >= 0 && bi >= 0 {
   595			return a[ai:] == b[bi:]
   596		}
   597		return a == b
   598	}
