Home Download Docs Code Community
     1	/*
     2	Copyright 2014 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	// Package picasa implements an importer for picasa.com accounts.
    18	package picasa // import "perkeep.org/pkg/importer/picasa"
    19	
    20	// TODO: removing camliPath from gallery permanode when pic deleted from gallery
    21	
    22	import (
    23		"context"
    24		"errors"
    25		"fmt"
    26		"io"
    27		"log"
    28		"net/http"
    29		"net/url"
    30		"os"
    31		"strconv"
    32		"strings"
    33		"time"
    34	
    35		"github.com/tgulacsi/picago"
    36		"go4.org/ctxutil"
    37		"go4.org/syncutil"
    38		"golang.org/x/oauth2"
    39		"golang.org/x/oauth2/google"
    40		"perkeep.org/internal/httputil"
    41		"perkeep.org/pkg/blob"
    42		"perkeep.org/pkg/importer"
    43		"perkeep.org/pkg/schema"
    44		"perkeep.org/pkg/schema/nodeattr"
    45		"perkeep.org/pkg/search"
    46	)
    47	
    48	const (
    49		scopeURL = "https://picasaweb.google.com/data/"
    50	
    51		// runCompleteVersion is a cache-busting version number of the
    52		// importer code. It should be incremented whenever the
    53		// behavior of this importer is updated enough to warrant a
    54		// complete run.  Otherwise, if the importer runs to
    55		// completion, this version number is recorded on the account
    56		// permanode and subsequent importers can stop early.
    57		runCompleteVersion = "4"
    58	
    59		// attrPicasaId is used for both picasa photo IDs and gallery IDs.
    60		attrPicasaId = "picasaId"
    61	
    62		// acctAttrOAuthToken stores access + " " + refresh + " " + expiry
    63		// See encodeToken and decodeToken.
    64		acctAttrOAuthToken = "oauthToken"
    65	
    66		// AttrMediaURL is an attribute set on each picasa photo permanode. It
    67		// is the public URL for fetching the contents of the photo file.
    68		AttrMediaURL = "picasaMediaURL"
    69	)
    70	
    71	var (
    72		_ importer.Importer            = imp{}
    73		_ importer.ImporterSetupHTMLer = imp{}
    74	)
    75	
    76	func init() {
    77		importer.Register("picasa", imp{})
    78	}
    79	
    80	// imp is the implementation of the Picasa importer.
    81	type imp struct {
    82		importer.OAuth2
    83	}
    84	
    85	func (imp) Properties() importer.Properties {
    86		return importer.Properties{
    87			Title:               "Google Photos (via Picasa API)",
    88			Description:         "import your photos from Google Photos. (limited to 10,000 photos per Google Photos API bug for now)",
    89			SupportsIncremental: true,
    90			NeedsAPIKey:         true,
    91		}
    92	}
    93	
    94	type userInfo struct {
    95		ID   string // numeric picasa user ID ("11583474931002155675")
    96		Name string // "Jane Smith"
    97	}
    98	
    99	func (imp) getUserInfo(ctx context.Context) (*userInfo, error) {
   100		u, err := picago.GetUser(ctxutil.Client(ctx), "default")
   101		if err != nil {
   102			return nil, err
   103		}
   104		return &userInfo{ID: u.ID, Name: u.Name}, nil
   105	}
   106	
   107	func (imp) IsAccountReady(acctNode *importer.Object) (ok bool, err error) {
   108		if acctNode.Attr(importer.AcctAttrUserID) != "" && acctNode.Attr(acctAttrOAuthToken) != "" {
   109			return true, nil
   110		}
   111		return false, nil
   112	}
   113	
   114	func (im imp) SummarizeAccount(acct *importer.Object) string {
   115		ok, err := im.IsAccountReady(acct)
   116		if err != nil || !ok {
   117			return ""
   118		}
   119		if acct.Attr(importer.AcctAttrGivenName) == "" && acct.Attr(importer.AcctAttrFamilyName) == "" {
   120			return fmt.Sprintf("userid %s", acct.Attr(importer.AcctAttrUserID))
   121		}
   122		return fmt.Sprintf("userid %s (%s %s)",
   123			acct.Attr(importer.AcctAttrUserID),
   124			acct.Attr(importer.AcctAttrGivenName),
   125			acct.Attr(importer.AcctAttrFamilyName))
   126	}
   127	
   128	func (im imp) ServeSetup(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) error {
   129		oauthConfig, err := im.auth(ctx)
   130		if err == nil {
   131			// we will get back this with the token, so use it for preserving account info
   132			state := "acct:" + ctx.AccountNode.PermanodeRef().String()
   133			// AccessType needs to be "offline", as the user is not here all the time;
   134			// ApprovalPrompt needs to be "force" to be able to get a RefreshToken
   135			// everytime, even for Re-logins, too.
   136			//
   137			// Source: https://developers.google.com/youtube/v3/guides/authentication#server-side-apps
   138			http.Redirect(w, r, oauthConfig.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.ApprovalForce), http.StatusFound)
   139		}
   140		return err
   141	}
   142	
   143	// CallbackURLParameters returns the needed callback parameters - empty for Google Picasa.
   144	func (im imp) CallbackURLParameters(acctRef blob.Ref) url.Values {
   145		return url.Values{}
   146	}
   147	
   148	func (im imp) ServeCallback(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) {
   149		oauthConfig, err := im.auth(ctx)
   150		if err != nil {
   151			httputil.ServeError(w, r, fmt.Errorf("Error getting oauth config: %v", err))
   152			return
   153		}
   154	
   155		if r.Method != "GET" {
   156			http.Error(w, "Expected a GET", http.StatusBadRequest)
   157			return
   158		}
   159		code := r.FormValue("code")
   160		if code == "" {
   161			http.Error(w, "Expected a code", http.StatusBadRequest)
   162			return
   163		}
   164	
   165		token, err := oauthConfig.Exchange(ctx, code)
   166		if err != nil {
   167			log.Printf("importer/picasa: token exchange error: %v", err)
   168			httputil.ServeError(w, r, fmt.Errorf("token exchange error: %v", err))
   169			return
   170		}
   171	
   172		log.Printf("importer/picasa: got exchange token.")
   173		picagoCtx := context.WithValue(ctx, ctxutil.HTTPClient, oauthConfig.Client(ctx, token))
   174	
   175		userInfo, err := im.getUserInfo(picagoCtx)
   176		if err != nil {
   177			log.Printf("Couldn't get username: %v", err)
   178			httputil.ServeError(w, r, fmt.Errorf("can't get username: %v", err))
   179			return
   180		}
   181	
   182		if err := ctx.AccountNode.SetAttrs(
   183			importer.AcctAttrUserID, userInfo.ID,
   184			importer.AcctAttrName, userInfo.Name,
   185			acctAttrOAuthToken, encodeToken(token),
   186		); err != nil {
   187			httputil.ServeError(w, r, fmt.Errorf("Error setting attribute: %v", err))
   188			return
   189		}
   190		http.Redirect(w, r, ctx.AccountURL(), http.StatusFound)
   191	}
   192	
   193	// encodeToken encodes the oauth2.Token as
   194	// AccessToken + " " + RefreshToken + " " + Expiry.Unix()
   195	func encodeToken(token *oauth2.Token) string {
   196		if token == nil {
   197			return ""
   198		}
   199		var seconds int64
   200		if !token.Expiry.IsZero() {
   201			seconds = token.Expiry.Unix()
   202		}
   203		return token.AccessToken + " " + token.RefreshToken + " " + strconv.FormatInt(seconds, 10)
   204	}
   205	
   206	// decodeToken parses an access token, refresh token, and optional
   207	// expiry unix timestamp separated by spaces into an oauth2.Token.
   208	// It returns as much as it can.
   209	func decodeToken(encoded string) *oauth2.Token {
   210		t := new(oauth2.Token)
   211		f := strings.Fields(encoded)
   212		if len(f) > 0 {
   213			t.AccessToken = f[0]
   214		}
   215		if len(f) > 1 {
   216			t.RefreshToken = f[1]
   217		}
   218		if len(f) > 2 && f[2] != "0" {
   219			sec, err := strconv.ParseInt(f[2], 10, 64)
   220			if err == nil {
   221				t.Expiry = time.Unix(sec, 0)
   222			}
   223		}
   224		return t
   225	}
   226	
   227	func (im imp) auth(ctx *importer.SetupContext) (*oauth2.Config, error) {
   228		clientID, secret, err := ctx.Credentials()
   229		if err != nil {
   230			return nil, err
   231		}
   232		conf := &oauth2.Config{
   233			Endpoint:     google.Endpoint,
   234			RedirectURL:  ctx.CallbackURL(),
   235			ClientID:     clientID,
   236			ClientSecret: secret,
   237			Scopes:       []string{scopeURL},
   238		}
   239		return conf, nil
   240	}
   241	
   242	func (imp) AccountSetupHTML(host *importer.Host) string {
   243		// Picasa doesn't allow a path in the origin. Remove it.
   244		origin := host.ImporterBaseURL()
   245		if u, err := url.Parse(origin); err == nil {
   246			u.Path = ""
   247			origin = u.String()
   248		}
   249	
   250		callback := host.ImporterBaseURL() + "picasa/callback"
   251		gphotosURL := host.ImporterBaseURL() + "gphotos"
   252		return fmt.Sprintf(`
   253	<h1>Configuring Picasa</h1>
   254	<p>Please note that because of a bug in the Picasa API, you cannot retrieve more than 10000 photos. If you have more than 10000 photos, you should use the <a href='%s'>Google Photos importer</a> instead.</p>
   255	<p>Visit <a href='https://console.developers.google.com/'>https://console.developers.google.com/</a>
   256	and click <b>"Create Project"</b>.</p>
   257	<p>Then under "APIs & Auth" in the left sidebar, click on "Credentials", then click the button <b>"Create new Client ID"</b>.</p>
   258	<p>Use the following settings:</p>
   259	<ul>
   260	  <li>Web application</li>
   261	  <li>Authorized JavaScript origins: <b>%s</b></li>
   262	  <li>Authorized Redirect URI: <b>%s</b></li>
   263	</ul>
   264	<p>Click "Create Client ID".  Copy the "Client ID" and "Client Secret" into the boxes above.</p>
   265	`, gphotosURL, origin, callback)
   266	}
   267	
   268	// A run is our state for a given run of the importer.
   269	type run struct {
   270		*importer.RunContext
   271		incremental bool // whether we've completed a run in the past
   272		photoGate   *syncutil.Gate
   273	}
   274	
   275	var forceFullImport, _ = strconv.ParseBool(os.Getenv("CAMLI_PICASA_FULL_IMPORT"))
   276	
   277	func (imp) Run(rctx *importer.RunContext) error {
   278		clientID, secret, err := rctx.Credentials()
   279		if err != nil {
   280			return err
   281		}
   282		acctNode := rctx.AccountNode()
   283	
   284		ocfg := &oauth2.Config{
   285			Endpoint:     google.Endpoint,
   286			ClientID:     clientID,
   287			ClientSecret: secret,
   288			Scopes:       []string{scopeURL},
   289		}
   290	
   291		token := decodeToken(acctNode.Attr(acctAttrOAuthToken))
   292		baseCtx := rctx.Context()
   293		ctx := context.WithValue(baseCtx, ctxutil.HTTPClient, ocfg.Client(baseCtx, token))
   294	
   295		root := rctx.RootNode()
   296		if root.Attr(nodeattr.Title) == "" {
   297			if err := root.SetAttr(
   298				nodeattr.Title,
   299				fmt.Sprintf("%s - Google Photos", acctNode.Attr(importer.AcctAttrName)),
   300			); err != nil {
   301				return err
   302			}
   303		}
   304	
   305		r := &run{
   306			RunContext:  rctx,
   307			incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion,
   308			photoGate:   syncutil.NewGate(3),
   309		}
   310		if err := r.importAlbums(ctx); err != nil {
   311			return err
   312		}
   313	
   314		if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil {
   315			return err
   316		}
   317	
   318		return nil
   319	}
   320	
   321	func (r *run) importAlbums(ctx context.Context) error {
   322		albums, err := picago.GetAlbums(ctxutil.Client(ctx), "default")
   323		if err != nil {
   324			return fmt.Errorf("importAlbums: error listing albums: %v", err)
   325		}
   326		albumsNode, err := r.getTopLevelNode("albums", "Albums")
   327		for _, album := range albums {
   328			select {
   329			case <-ctx.Done():
   330				return ctx.Err()
   331			default:
   332			}
   333			if err := r.importAlbum(ctx, albumsNode, album); err != nil {
   334				return fmt.Errorf("picasa importer: error importing album %s: %v", album, err)
   335			}
   336		}
   337		return nil
   338	}
   339	
   340	func (r *run) importAlbum(ctx context.Context, albumsNode *importer.Object, album picago.Album) (ret error) {
   341		if album.ID == "" {
   342			return errors.New("album has no ID")
   343		}
   344		albumNode, err := albumsNode.ChildPathObject(album.ID)
   345		if err != nil {
   346			return fmt.Errorf("importAlbum: error listing album: %v", err)
   347		}
   348	
   349		dateMod := schema.RFC3339FromTime(album.Updated)
   350	
   351		// Data reference: https://developers.google.com/picasa-web/docs/2.0/reference
   352		// TODO(tgulacsi): add more album info
   353		changes, err := albumNode.SetAttrs2(
   354			attrPicasaId, album.ID,
   355			nodeattr.Type, "picasaweb.google.com:album",
   356			nodeattr.Title, album.Title,
   357			nodeattr.DatePublished, schema.RFC3339FromTime(album.Published),
   358			nodeattr.LocationText, album.Location,
   359			nodeattr.Description, album.Description,
   360			nodeattr.URL, album.URL,
   361		)
   362		if err != nil {
   363			return fmt.Errorf("error setting album attributes: %v", err)
   364		}
   365		if !changes && r.incremental && albumNode.Attr(nodeattr.DateModified) == dateMod {
   366			return nil
   367		}
   368		defer func() {
   369			// Don't update DateModified on the album node until
   370			// we've successfully imported all the photos.
   371			if ret == nil {
   372				ret = albumNode.SetAttr(nodeattr.DateModified, dateMod)
   373			}
   374		}()
   375	
   376		log.Printf("Importing album %v: %v/%v (published %v, updated %v)", album.ID, album.Name, album.Title, album.Published, album.Updated)
   377	
   378		// TODO(bradfitz): GetPhotos does multiple HTTP requests to
   379		// return a slice of all photos. My "InstantUpload/Auto
   380		// Backup" album has 6678 photos (and growing) and this
   381		// currently takes like 40 seconds. Fix.
   382		photos, err := picago.GetPhotos(ctxutil.Client(ctx), "default", album.ID)
   383		if err != nil {
   384			return err
   385		}
   386	
   387		log.Printf("Importing %d photos from album %q (%s)", len(photos), albumNode.Attr(nodeattr.Title),
   388			albumNode.PermanodeRef())
   389	
   390		var grp syncutil.Group
   391		for i := range photos {
   392			select {
   393			case <-ctx.Done():
   394				return ctx.Err()
   395			default:
   396			}
   397			photo := photos[i]
   398			r.photoGate.Start()
   399			grp.Go(func() error {
   400				defer r.photoGate.Done()
   401				return r.updatePhotoInAlbum(ctx, albumNode, photo)
   402			})
   403		}
   404		return grp.Err()
   405	}
   406	
   407	func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) {
   408		if photo.ID == "" {
   409			return errors.New("photo has no ID")
   410		}
   411	
   412		getMediaBytes := func() (io.ReadCloser, error) {
   413			log.Printf("Importing media from %v", photo.URL)
   414			resp, err := ctxutil.Client(ctx).Get(photo.URL)
   415			if err != nil {
   416				return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err)
   417			}
   418			if resp.StatusCode != http.StatusOK {
   419				resp.Body.Close()
   420				return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode)
   421			}
   422			return resp.Body, nil
   423		}
   424	
   425		var fileRefStr string
   426		idFilename := photo.ID + "-" + photo.Filename
   427		photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) {
   428			h := blob.NewHash()
   429			rc, err := getMediaBytes()
   430			if err != nil {
   431				return nil, err
   432			}
   433			fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, io.TeeReader(rc, h))
   434			if err != nil {
   435				return nil, err
   436			}
   437			fileRefStr = fileRef.String()
   438			wholeRef := blob.RefFromHash(h)
   439			if pn, err := findExistingPermanode(r.Context(), r.Host.Searcher(), wholeRef); err == nil {
   440				return r.Host.ObjectFromRef(pn)
   441			}
   442			return r.Host.NewObject()
   443		})
   444		if err != nil {
   445			return err
   446		}
   447	
   448		if fileRefStr == "" {
   449			fileRefStr = photoNode.Attr(nodeattr.CamliContent)
   450			// Only re-download the source photo if its URL has changed.
   451			// Empirically this seems to work: cropping a photo in the
   452			// photos.google.com UI causes its URL to change. And it makes
   453			// sense, looking at the ugliness of the URLs with all their
   454			// encoded/signed state.
   455			if !mediaURLsEqual(photoNode.Attr(AttrMediaURL), photo.URL) {
   456				rc, err := getMediaBytes()
   457				if err != nil {
   458					return err
   459				}
   460				fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, rc)
   461				rc.Close()
   462				if err != nil {
   463					return err
   464				}
   465				fileRefStr = fileRef.String()
   466			}
   467		}
   468	
   469		title := strings.TrimSpace(photo.Description)
   470		if strings.Contains(title, "\n") {
   471			title = title[:strings.Index(title, "\n")]
   472		}
   473		if title == "" && schema.IsInterestingTitle(photo.Filename) {
   474			title = photo.Filename
   475		}
   476	
   477		// TODO(tgulacsi): add more attrs (comments ?)
   478		// for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork
   479		attrs := []string{
   480			nodeattr.CamliContent, fileRefStr,
   481			attrPicasaId, photo.ID,
   482			nodeattr.Title, title,
   483			nodeattr.Description, photo.Description,
   484			nodeattr.LocationText, photo.Location,
   485			nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated),
   486			nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published),
   487			nodeattr.URL, photo.PageURL,
   488		}
   489		if photo.Latitude != 0 || photo.Longitude != 0 {
   490			attrs = append(attrs,
   491				nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude),
   492				nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude),
   493			)
   494		}
   495		if err := photoNode.SetAttrs(attrs...); err != nil {
   496			return err
   497		}
   498		if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil {
   499			return err
   500		}
   501		if photo.Position > 0 {
   502			if err := albumNode.SetAttr(
   503				nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1),
   504				photoNode.PermanodeRef().String()); err != nil {
   505				return err
   506			}
   507		}
   508	
   509		// Do this last, after we're sure the "camliContent" attribute
   510		// has been saved successfully, because this is the one that
   511		// causes us to do it again in the future or not.
   512		if err := photoNode.SetAttrs(AttrMediaURL, photo.URL); err != nil {
   513			return err
   514		}
   515		return nil
   516	}
   517	
   518	var testTopLevelNode *importer.Object
   519	
   520	func (r *run) getTopLevelNode(path string, title string) (*importer.Object, error) {
   521		if testTopLevelNode != nil {
   522			return testTopLevelNode, nil
   523		}
   524		childObject, err := r.RootNode().ChildPathObject(path)
   525		if err != nil {
   526			return nil, err
   527		}
   528	
   529		if err := childObject.SetAttr(nodeattr.Title, title); err != nil {
   530			return nil, err
   531		}
   532		return childObject, nil
   533	}
   534	
   535	var sensitiveAttrs = []string{
   536		nodeattr.Type,
   537		attrPicasaId,
   538		nodeattr.Title,
   539		nodeattr.DateModified,
   540		nodeattr.DatePublished,
   541		nodeattr.Latitude,
   542		nodeattr.Longitude,
   543		nodeattr.Description,
   544	}
   545	
   546	// findExistingPermanode finds an existing permanode that has a
   547	// camliContent pointing to a file with the provided wholeRef and
   548	// doesn't have any conflicting attributes that would prevent the
   549	// picasa importer from re-using that permanode for its own use.
   550	func findExistingPermanode(ctx context.Context, qs search.QueryDescriber, wholeRef blob.Ref) (pn blob.Ref, err error) {
   551		res, err := qs.Query(ctx, &search.SearchQuery{
   552			Constraint: &search.Constraint{
   553				Permanode: &search.PermanodeConstraint{
   554					Attr: "camliContent",
   555					ValueInSet: &search.Constraint{
   556						File: &search.FileConstraint{
   557							WholeRef: wholeRef,
   558						},
   559					},
   560				},
   561			},
   562			Describe: &search.DescribeRequest{
   563				Depth: 1,
   564			},
   565		})
   566		if err != nil {
   567			return
   568		}
   569		if res.Describe == nil {
   570			return pn, os.ErrNotExist
   571		}
   572	Res:
   573		for _, resBlob := range res.Blobs {
   574			br := resBlob.Blob
   575			desBlob, ok := res.Describe.Meta[br.String()]
   576			if !ok || desBlob.Permanode == nil {
   577				continue
   578			}
   579			attrs := desBlob.Permanode.Attr
   580			for _, attr := range sensitiveAttrs {
   581				if attrs.Get(attr) != "" {
   582					continue Res
   583				}
   584			}
   585			return br, nil
   586		}
   587		return pn, os.ErrNotExist
   588	}
   589	
   590	func mediaURLsEqual(a, b string) bool {
   591		const sub = ".googleusercontent.com/"
   592		ai := strings.Index(a, sub)
   593		bi := strings.Index(b, sub)
   594		if ai >= 0 && bi >= 0 {
   595			return a[ai:] == b[bi:]
   596		}
   597		return a == b
   598	}
Website layout inspired by memcached.
Content by the authors.