Home Download Docs Code Community
     1	/*
     2	Copyright 2013 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	// Package flickr implements an importer for flickr.com accounts.
    18	package flickr // import "perkeep.org/pkg/importer/flickr"
    19	
    20	import (
    21		"bytes"
    22		"encoding/json"
    23		"errors"
    24		"fmt"
    25		"log"
    26		"net/http"
    27		"net/url"
    28		"strconv"
    29		"time"
    30	
    31		"perkeep.org/internal/httputil"
    32		"perkeep.org/pkg/importer"
    33		"perkeep.org/pkg/schema"
    34		"perkeep.org/pkg/schema/nodeattr"
    35	
    36		"github.com/garyburd/go-oauth/oauth"
    37	
    38		"go4.org/ctxutil"
    39	)
    40	
    41	const (
    42		apiURL                        = "https://api.flickr.com/services/rest/"
    43		temporaryCredentialRequestURL = "https://www.flickr.com/services/oauth/request_token"
    44		resourceOwnerAuthorizationURL = "https://www.flickr.com/services/oauth/authorize"
    45		tokenRequestURL               = "https://www.flickr.com/services/oauth/access_token"
    46	
    47		photosetsAPIPath = "flickr.photosets.getList"
    48		photosetAPIPath  = "flickr.photosets.getPhotos"
    49		photosAPIPath    = "flickr.people.getPhotos"
    50	
    51		attrFlickrId = "flickrId"
    52	)
    53	
    54	var oAuthURIs = importer.OAuthURIs{
    55		TemporaryCredentialRequestURI: temporaryCredentialRequestURL,
    56		ResourceOwnerAuthorizationURI: resourceOwnerAuthorizationURL,
    57		TokenRequestURI:               tokenRequestURL,
    58	}
    59	
    60	func init() {
    61		importer.Register("flickr", imp{})
    62	}
    63	
    64	var _ importer.ImporterSetupHTMLer = imp{}
    65	
    66	type imp struct {
    67		importer.OAuth1 // for CallbackRequestAccount and CallbackURLParameters
    68	}
    69	
    70	func (imp) Properties() importer.Properties {
    71		return importer.Properties{
    72			Title:               "Flickr",
    73			Description:         "import your photos from Flickr.com",
    74			SupportsIncremental: false,
    75			NeedsAPIKey:         true,
    76		}
    77	}
    78	
    79	func (imp) IsAccountReady(acctNode *importer.Object) (ok bool, err error) {
    80		return acctNode.Attr(importer.AcctAttrUserName) != "" && acctNode.Attr(importer.AcctAttrAccessToken) != "", nil
    81	}
    82	
    83	func (im imp) SummarizeAccount(acct *importer.Object) string {
    84		ok, err := im.IsAccountReady(acct)
    85		if err != nil || !ok {
    86			return ""
    87		}
    88		return acct.Attr(importer.AcctAttrUserName)
    89	}
    90	
    91	func (imp) AccountSetupHTML(host *importer.Host) string {
    92		base := host.ImporterBaseURL() + "flickr"
    93		return fmt.Sprintf(`
    94	<h1>Configuring Flickr</h1>
    95	<p>Visit <a href='http://www.flickr.com/services/apps/create/noncommercial/'>http://www.flickr.com/services/apps/create/noncommercial/</a>, fill out whatever's needed, and click on SUBMIT.</p>
    96	<p>From your newly created app's main page, go to "Edit the authentication flow", use the following settings:</p>
    97	<ul>
    98	  <li>App Type: Web Application</li>
    99	  <li>Callback URL: <b>%s</b></li>
   100	</ul>
   101	<p> and SAVE CHANGES </p>
   102	<p>Then go to "View the API Key for this app", and copy the "Key" and "Secret" into the "Client ID" and "Client Secret" boxes above.</p>
   103	`, base+"/callback")
   104	}
   105	
   106	// A run is our state for a given run of the importer.
   107	type run struct {
   108		userID string
   109		*importer.RunContext
   110		oauthClient *oauth.Client      // No need to guard, used read-only.
   111		accessCreds *oauth.Credentials // No need to guard, used read-only.
   112	
   113		// primaryPhoto maps an album id to the id of its primary photo.
   114		// If some concurrency is added to some of the importing routines,
   115		// it will need some guarding.
   116		primaryPhoto map[string]string
   117	}
   118	
   119	func (imp) Run(ctx *importer.RunContext) error {
   120		clientID, secret, err := ctx.Credentials()
   121		if err != nil {
   122			return fmt.Errorf("no API credentials: %v", err)
   123		}
   124		accountNode := ctx.AccountNode()
   125		accessToken := accountNode.Attr(importer.AcctAttrAccessToken)
   126		accessSecret := accountNode.Attr(importer.AcctAttrAccessTokenSecret)
   127		if accessToken == "" || accessSecret == "" {
   128			return errors.New("access credentials not found")
   129		}
   130		userID := ctx.AccountNode().Attr(importer.AcctAttrUserID)
   131		if userID == "" {
   132			return errors.New("userID hasn't been set by account setup")
   133		}
   134		r := &run{
   135			userID:     userID,
   136			RunContext: ctx,
   137			oauthClient: &oauth.Client{
   138				TemporaryCredentialRequestURI: temporaryCredentialRequestURL,
   139				ResourceOwnerAuthorizationURI: resourceOwnerAuthorizationURL,
   140				TokenRequestURI:               tokenRequestURL,
   141				Credentials: oauth.Credentials{
   142					Token:  clientID,
   143					Secret: secret,
   144				},
   145			},
   146			accessCreds: &oauth.Credentials{
   147				Token:  accessToken,
   148				Secret: accessSecret,
   149			},
   150			primaryPhoto: make(map[string]string),
   151		}
   152	
   153		if err := r.importPhotosets(); err != nil {
   154			return err
   155		}
   156		if err := r.importPhotos(); err != nil {
   157			return err
   158		}
   159		return nil
   160	}
   161	
   162	type photosetList struct {
   163		Page     jsonInt
   164		Pages    jsonInt
   165		PerPage  jsonInt
   166		Photoset []*photosetInfo
   167	}
   168	
   169	type photosetInfo struct {
   170		Id             string `json:"id"`
   171		PrimaryPhotoId string `json:"primary"`
   172		Title          contentString
   173		Description    contentString
   174	}
   175	
   176	type photosetItems struct {
   177		Id    string `json:"id"`
   178		Page  jsonInt
   179		Pages jsonInt
   180		Photo []struct {
   181			Id             string
   182			OriginalFormat string
   183		}
   184	}
   185	
   186	func (r *run) importPhotosets() error {
   187		resp := struct {
   188			Photosets photosetList
   189		}{}
   190		if err := r.flickrAPIRequest(&resp,
   191			photosetsAPIPath, "user_id", r.userID); err != nil {
   192			return err
   193		}
   194	
   195		setsNode, err := r.getTopLevelNode("sets", "Sets")
   196		if err != nil {
   197			return err
   198		}
   199		log.Printf("Importing %d sets", len(resp.Photosets.Photoset))
   200	
   201		for _, item := range resp.Photosets.Photoset {
   202			select {
   203			case <-r.Context().Done():
   204				log.Printf("Flickr importer: interrupted")
   205				return r.Context().Err()
   206			default:
   207			}
   208			for page := 1; page >= 1; {
   209				page, err = r.importPhotoset(setsNode, item, page)
   210				if err != nil {
   211					log.Printf("Flickr importer: error importing photoset %s: %s", item.Id, err)
   212					continue
   213				}
   214			}
   215		}
   216		return nil
   217	}
   218	
   219	func (r *run) importPhotoset(parent *importer.Object, photoset *photosetInfo, page int) (int, error) {
   220		photosetNode, err := parent.ChildPathObject(photoset.Id)
   221		if err != nil {
   222			return 0, err
   223		}
   224	
   225		if err := photosetNode.SetAttrs(
   226			attrFlickrId, photoset.Id,
   227			nodeattr.Title, photoset.Title.Content,
   228			nodeattr.Description, photoset.Description.Content); err != nil {
   229			return 0, err
   230		}
   231		// keep track of primary photo so we can set the fileRef of the photo as CamliContentImage
   232		// on photosetNode when we eventually know that fileRef.
   233		r.primaryPhoto[photoset.Id] = photoset.PrimaryPhotoId
   234	
   235		resp := struct {
   236			Photoset photosetItems
   237		}{}
   238		if err := r.flickrAPIRequest(&resp, photosetAPIPath, "user_id", r.userID,
   239			"page", fmt.Sprintf("%d", page), "photoset_id", photoset.Id, "extras", "original_format"); err != nil {
   240			return 0, err
   241		}
   242	
   243		log.Printf("Importing page %d from photoset %s", page, photoset.Id)
   244	
   245		photosNode, err := r.getPhotosNode()
   246		if err != nil {
   247			return 0, err
   248		}
   249	
   250		for _, item := range resp.Photoset.Photo {
   251			filename := fmt.Sprintf("%s.%s", item.Id, item.OriginalFormat)
   252			photoNode, err := photosNode.ChildPathObject(filename)
   253			if err != nil {
   254				log.Printf("Flickr importer: error finding photo node %s for addition to photoset %s: %s",
   255					item.Id, photoset.Id, err)
   256				continue
   257			}
   258			if err := photosetNode.SetAttr("camliPath:"+filename, photoNode.PermanodeRef().String()); err != nil {
   259				log.Printf("Flickr importer: error adding photo %s to photoset %s: %s",
   260					item.Id, photoset.Id, err)
   261			}
   262		}
   263	
   264		if resp.Photoset.Page < resp.Photoset.Pages {
   265			return page + 1, nil
   266		}
   267		return 0, nil
   268	}
   269	
   270	type photosSearch struct {
   271		Photos struct {
   272			Page    jsonInt
   273			Pages   jsonInt
   274			Perpage jsonInt
   275			Total   jsonInt
   276			Photo   []*photosSearchItem
   277		}
   278	
   279		Stat string
   280	}
   281	
   282	type photosSearchItem struct {
   283		Id             string `json:"id"`
   284		Title          string
   285		IsPublic       jsonInt
   286		IsFriend       jsonInt
   287		IsFamily       jsonInt
   288		Description    contentString
   289		DateUpload     string // Unix timestamp, in GMT.
   290		DateTaken      string // formatted as "2006-01-02 15:04:05", so no timezone info.
   291		OriginalFormat string
   292		LastUpdate     string // Unix timestamp.
   293		Latitude       jsonFloat
   294		Longitude      jsonFloat
   295		Tags           string
   296		MachineTags    string `json:"machine_tags"`
   297		Views          string
   298		Media          string
   299		URL            string `json:"url_o"`
   300	}
   301	
   302	type contentString struct {
   303		Content string `json:"_content"`
   304	}
   305	
   306	// jsonInt is for unmarshaling quoted and unquoted integers ("0" and 0), too.
   307	type jsonInt int
   308	
   309	func (jf jsonInt) MarshalJSON() ([]byte, error) {
   310		return json.Marshal(int(jf))
   311	}
   312	func (jf *jsonInt) UnmarshalJSON(p []byte) error {
   313		return json.Unmarshal(bytes.Trim(p, `"`), (*int)(jf))
   314	}
   315	
   316	// jsonFloat is for unmarshaling quoted and unquoted numbers ("0" and 0), too.
   317	type jsonFloat float32
   318	
   319	func (jf jsonFloat) MarshalJSON() ([]byte, error) {
   320		return json.Marshal(float32(jf))
   321	}
   322	func (jf *jsonFloat) UnmarshalJSON(p []byte) error {
   323		if len(p) == 1 && p[0] == '0' { // shortcut
   324			*jf = 0
   325			return nil
   326		}
   327		return json.Unmarshal(bytes.Trim(p, `"`), (*float32)(jf))
   328	}
   329	
   330	func (r *run) importPhotos() error {
   331		for page := 1; page >= 1; {
   332			var err error
   333			page, err = r.importPhotosPage(page)
   334			if err != nil {
   335				return err
   336			}
   337		}
   338		return nil
   339	}
   340	
   341	func (r *run) importPhotosPage(page int) (int, error) {
   342		resp := photosSearch{}
   343		if err := r.flickrAPIRequest(&resp, photosAPIPath, "user_id", r.userID, "page", fmt.Sprintf("%d", page),
   344			"extras", "description,date_upload,date_taken,original_format,last_update,geo,tags,machine_tags,views,media,url_o"); err != nil {
   345			return 0, err
   346		}
   347	
   348		photosNode, err := r.getPhotosNode()
   349		if err != nil {
   350			return 0, err
   351		}
   352		log.Printf("Importing %d photos on page %d of %d", len(resp.Photos.Photo), page, resp.Photos.Pages)
   353	
   354		for _, item := range resp.Photos.Photo {
   355			if err := r.importPhoto(photosNode, item); err != nil {
   356				log.Printf("Flickr importer: error importing %s: %s", item.Id, err)
   357				continue
   358			}
   359		}
   360	
   361		if resp.Photos.Pages > resp.Photos.Page {
   362			return page + 1, nil
   363		}
   364		return 0, nil
   365	}
   366	
   367	// TODO(aa):
   368	// * Parallelize: http://golang.org/doc/effective_go.html#concurrency
   369	// * Do more than one "page" worth of results
   370	// * Report progress and errors back through host interface
   371	// * All the rest of the metadata (see photoMeta)
   372	// * Conflicts: For all metadata changes, prefer any non-imported claims
   373	// * Test!
   374	func (r *run) importPhoto(parent *importer.Object, photo *photosSearchItem) error {
   375		filename := fmt.Sprintf("%s.%s", photo.Id, photo.OriginalFormat)
   376		photoNode, err := parent.ChildPathObject(filename)
   377		if err != nil {
   378			return err
   379		}
   380	
   381		// https://www.flickr.com/services/api/misc.dates.html
   382		dateTaken, err := time.ParseInLocation("2006-01-02 15:04:05", photo.DateTaken, schema.UnknownLocation)
   383		if err != nil {
   384			// default to the published date otherwise
   385			log.Printf("Flickr importer: problem with date taken of photo %v, defaulting to published date instead.", photo.Id)
   386			seconds, err := strconv.ParseInt(photo.DateUpload, 10, 64)
   387			if err != nil {
   388				return fmt.Errorf("could not parse date upload time %q for image %v: %v", photo.DateUpload, photo.Id, err)
   389			}
   390			dateTaken = time.Unix(seconds, 0)
   391		}
   392	
   393		attrs := []string{
   394			attrFlickrId, photo.Id,
   395			nodeattr.DateCreated, schema.RFC3339FromTime(dateTaken),
   396			nodeattr.Description, photo.Description.Content,
   397		}
   398		if schema.IsInterestingTitle(photo.Title) {
   399			attrs = append(attrs, nodeattr.Title, photo.Title)
   400		}
   401		// Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run.
   402		// And this way if we add more things to import, they will get picked up.
   403		if err := photoNode.SetAttrs(attrs...); err != nil {
   404			return err
   405		}
   406	
   407		// Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed.
   408		// lastupdate is a Unix timestamp according to https://www.flickr.com/services/api/flickr.photos.getInfo.html
   409		seconds, err := strconv.ParseInt(photo.LastUpdate, 10, 64)
   410		if err != nil {
   411			return fmt.Errorf("could not parse lastupdate time for image %v: %v", photo.Id, err)
   412		}
   413		lastUpdate := time.Unix(seconds, 0)
   414		if lastUpdateString := photoNode.Attr(nodeattr.DateModified); lastUpdateString != "" {
   415			oldLastUpdate, err := time.Parse(time.RFC3339, lastUpdateString)
   416			if err != nil {
   417				return fmt.Errorf("could not parse last stored update time for image %v: %v", photo.Id, err)
   418			}
   419			if lastUpdate.Equal(oldLastUpdate) {
   420				if err := r.updatePrimaryPhoto(photoNode); err != nil {
   421					return err
   422				}
   423				return nil
   424			}
   425		}
   426		form := url.Values{}
   427		form.Set("user_id", r.userID)
   428		res, err := r.fetch(photo.URL, form)
   429		if err != nil {
   430			log.Printf("Flickr importer: Could not fetch %s: %s", photo.URL, err)
   431			return err
   432		}
   433		defer res.Body.Close()
   434	
   435		fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), filename, res.Body)
   436		if err != nil {
   437			return err
   438		}
   439		if err := photoNode.SetAttr(nodeattr.CamliContent, fileRef.String()); err != nil {
   440			return err
   441		}
   442		if err := r.updatePrimaryPhoto(photoNode); err != nil {
   443			return err
   444		}
   445		// Write lastupdate last, so that if any of the preceding fails, we will try again next time.
   446		if err := photoNode.SetAttr(nodeattr.DateModified, schema.RFC3339FromTime(lastUpdate)); err != nil {
   447			return err
   448		}
   449	
   450		return nil
   451	}
   452	
   453	// updatePrimaryPhoto uses the camliContent of photoNode to set the
   454	// camliContentImage of any album for which photoNode is the primary photo.
   455	func (r *run) updatePrimaryPhoto(photoNode *importer.Object) error {
   456		photoId := photoNode.Attr(attrFlickrId)
   457		for album, photo := range r.primaryPhoto {
   458			if photoId != photo {
   459				continue
   460			}
   461			setsNode, err := r.getTopLevelNode("sets", "Sets")
   462			if err != nil {
   463				return fmt.Errorf("could not set %v as primary photo of %v, no root sets: %v", photoId, album, err)
   464			}
   465			setNode, err := setsNode.ChildPathObject(album)
   466			if err != nil {
   467				return fmt.Errorf("could not set %v as primary photo of %v, no album: %v", photoId, album, err)
   468			}
   469			fileRef := photoNode.Attr(nodeattr.CamliContent)
   470			if fileRef == "" {
   471				return fmt.Errorf("could not set %v as primary photo of %v: fileRef of photo is unknown", photoId, album)
   472			}
   473			if err := setNode.SetAttr(nodeattr.CamliContentImage, fileRef); err != nil {
   474				return fmt.Errorf("could not set %v as primary photo of %v: %v", photoId, album, err)
   475			}
   476			delete(r.primaryPhoto, album)
   477		}
   478		return nil
   479	}
   480	
   481	func (r *run) getPhotosNode() (*importer.Object, error) {
   482		return r.getTopLevelNode("photos", "Photos")
   483	}
   484	
   485	func (r *run) getTopLevelNode(path string, title string) (*importer.Object, error) {
   486		photos, err := r.RootNode().ChildPathObject(path)
   487		if err != nil {
   488			return nil, err
   489		}
   490	
   491		if err := photos.SetAttr(nodeattr.Title, title); err != nil {
   492			return nil, err
   493		}
   494		return photos, nil
   495	}
   496	
   497	func (r *run) flickrAPIRequest(result interface{}, method string, keyval ...string) error {
   498		keyval = append([]string{"method", method, "format", "json", "nojsoncallback", "1"}, keyval...)
   499		return importer.OAuthContext{
   500			Ctx:    r.Context(),
   501			Client: r.oauthClient,
   502			Creds:  r.accessCreds,
   503		}.PopulateJSONFromURL(result, http.MethodGet, apiURL, keyval...)
   504	}
   505	
   506	func (r *run) fetch(url string, form url.Values) (*http.Response, error) {
   507		return importer.OAuthContext{
   508			Ctx:    r.Context(),
   509			Client: r.oauthClient,
   510			Creds:  r.accessCreds,
   511		}.Get(url, form)
   512	}
   513	
   514	// TODO(mpl): same in twitter. refactor. Except for the additional perms in AuthorizationURL call.
   515	func (imp) ServeSetup(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) error {
   516		oauthClient, err := ctx.NewOAuthClient(oAuthURIs)
   517		if err != nil {
   518			err = fmt.Errorf("error getting OAuth client: %v", err)
   519			httputil.ServeError(w, r, err)
   520			return err
   521		}
   522		tempCred, err := oauthClient.RequestTemporaryCredentials(ctxutil.Client(ctx), ctx.CallbackURL(), nil)
   523		if err != nil {
   524			err = fmt.Errorf("Error getting temp cred: %v", err)
   525			httputil.ServeError(w, r, err)
   526			return err
   527		}
   528		if err := ctx.AccountNode.SetAttrs(
   529			importer.AcctAttrTempToken, tempCred.Token,
   530			importer.AcctAttrTempSecret, tempCred.Secret,
   531		); err != nil {
   532			err = fmt.Errorf("Error saving temp creds: %v", err)
   533			httputil.ServeError(w, r, err)
   534			return err
   535		}
   536	
   537		authURL := oauthClient.AuthorizationURL(tempCred, url.Values{"perms": {"read"}})
   538		http.Redirect(w, r, authURL, http.StatusFound)
   539		return nil
   540	}
   541	
   542	func (imp) ServeCallback(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) {
   543		tempToken := ctx.AccountNode.Attr(importer.AcctAttrTempToken)
   544		tempSecret := ctx.AccountNode.Attr(importer.AcctAttrTempSecret)
   545		if tempToken == "" || tempSecret == "" {
   546			log.Printf("flicker: no temp creds in callback")
   547			httputil.BadRequestError(w, "no temp creds in callback")
   548			return
   549		}
   550		if tempToken != r.FormValue("oauth_token") {
   551			log.Printf("unexpected oauth_token: got %v, want %v", r.FormValue("oauth_token"), tempToken)
   552			httputil.BadRequestError(w, "unexpected oauth_token")
   553			return
   554		}
   555		oauthClient, err := ctx.NewOAuthClient(oAuthURIs)
   556		if err != nil {
   557			err = fmt.Errorf("error getting OAuth client: %v", err)
   558			httputil.ServeError(w, r, err)
   559			return
   560		}
   561		tokenCred, vals, err := oauthClient.RequestToken(
   562			ctxutil.Client(ctx),
   563			&oauth.Credentials{
   564				Token:  tempToken,
   565				Secret: tempSecret,
   566			},
   567			r.FormValue("oauth_verifier"),
   568		)
   569		if err != nil {
   570			httputil.ServeError(w, r, fmt.Errorf("Error getting request token: %v ", err))
   571			return
   572		}
   573		userID := vals.Get("user_nsid")
   574		if userID == "" {
   575			httputil.ServeError(w, r, fmt.Errorf("Couldn't get user id: %v", err))
   576			return
   577		}
   578		username := vals.Get("username")
   579		if username == "" {
   580			httputil.ServeError(w, r, fmt.Errorf("Couldn't get user name: %v", err))
   581			return
   582		}
   583	
   584		// TODO(mpl): get a few more bits of info (first name, last name etc) like I did for twitter, if possible.
   585		if err := ctx.AccountNode.SetAttrs(
   586			importer.AcctAttrAccessToken, tokenCred.Token,
   587			importer.AcctAttrAccessTokenSecret, tokenCred.Secret,
   588			importer.AcctAttrUserID, userID,
   589			importer.AcctAttrUserName, username,
   590		); err != nil {
   591			httputil.ServeError(w, r, fmt.Errorf("Error setting basic account attributes: %v", err))
   592			return
   593		}
   594		http.Redirect(w, r, ctx.AccountURL(), http.StatusFound)
   595	}
Website layout inspired by memcached.
Content by the authors.