Home Download Docs Code Community
     1	/*
     2	Copyright 2018 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	// Package mastodon provides an importer for servers using the Mastodon API.
    18	package mastodon // import "perkeep.org/pkg/importer/mastodon"
    19	
    20	import (
    21		"context"
    22		"errors"
    23		"fmt"
    24		"html/template"
    25		"log"
    26		"net/http"
    27		"net/url"
    28		"os"
    29		"path"
    30		"strconv"
    31		"strings"
    32		"sync"
    33	
    34		"perkeep.org/internal/httputil"
    35		"perkeep.org/pkg/importer"
    36		"perkeep.org/pkg/schema"
    37		"perkeep.org/pkg/schema/nodeattr"
    38	
    39		"github.com/mattn/go-mastodon"
    40		"go4.org/ctxutil"
    41		"go4.org/syncutil"
    42		"golang.org/x/oauth2"
    43	)
    44	
    45	const (
    46		// clientName is the name we report to the server when registering an app
    47		clientName = "Perkeep"
    48	
    49		// runCompleteVersion is a cache-busting version number of the
    50		// importer code. It should be incremented whenever the
    51		// behavior of this importer is updated enough to warrant a
    52		// complete run.  Otherwise, if the importer runs to
    53		// completion, this version number is recorded on the account
    54		// permanode and subsequent importers can stop early.
    55		runCompleteVersion = "0"
    56	
    57		authorizationPath = "/oauth/authorize"
    58		tokenPath         = "/oauth/token"
    59	
    60		acctAttrInstanceURL  = "instanceURL"
    61		acctAttrClientID     = "oauthClientID"
    62		acctAttrClientSecret = "oauthClientSecret"
    63	
    64		// status URI. This is an ActivityPub globally unique identifier. May or may
    65		// not be the same as the URL for the human-readable version of the status.
    66		attrURI = "uri"
    67	
    68		// content warnings in Mastodon UI, represented as 'summary' in ActivityPub
    69		attrSpoilerText = "spoilerText"
    70	
    71		// Name of the child node which contains references to all the statuses
    72		nodeStatuses = "statuses"
    73	
    74		importAtOnce = 10 // number of statuses to import at once
    75	
    76	)
    77	
    78	type imp struct {
    79		importer.OAuth2
    80	}
    81	
    82	func init() {
    83		importer.Register("mastodon", &imp{})
    84	}
    85	
    86	func (*imp) Properties() importer.Properties {
    87		return importer.Properties{
    88			Title:       "Mastodon",
    89			Description: "import posts from a Mastodon or Pleroma account",
    90	
    91			// While the API does use client_id and client_secret, there is an
    92			// API endpoint for obtaining these automatically
    93			NeedsAPIKey:         false,
    94			SupportsIncremental: true,
    95		}
    96	}
    97	
    98	func (im *imp) IsAccountReady(acctNode *importer.Object) (ok bool, err error) {
    99		if acctNode.Attr(importer.AcctAttrAccessToken) != "" &&
   100			acctNode.Attr(acctAttrInstanceURL) != "" &&
   101			acctNode.Attr(acctAttrClientID) != "" &&
   102			acctNode.Attr(acctAttrClientSecret) != "" {
   103			return true, nil
   104		}
   105		return false, nil
   106	}
   107	
   108	func (im *imp) SummarizeAccount(acct *importer.Object) string {
   109		ok, err := im.IsAccountReady(acct)
   110		if err != nil {
   111			return "Not configured; error = " + err.Error()
   112		}
   113		if !ok {
   114			return "Not configured"
   115		}
   116	
   117		expandedAddr, err := getExpandedAddress(acct.Attr(importer.AcctAttrUserName), acct.Attr(acctAttrInstanceURL))
   118		if err != nil {
   119			return "Misconfigured; error = " + err.Error()
   120		}
   121	
   122		// "display name (@username@example.com)"
   123		return fmt.Sprintf("%s (%s)", acct.Attr(importer.AcctAttrName), expandedAddr)
   124	}
   125	
   126	var promptURLTmpl = template.Must(template.New("root").Parse(`
   127	{{define "promptURL"}}
   128	<h1>Configuring Mastodon or Pleroma account</h1>
   129	<p>Enter the base URL of your instance.</p>
   130	<form method="post" action="{{ .AccountURL }}">
   131		<input type="hidden" name="mode" value="login">
   132		<label>Instance URL <input type="url" name="instanceURL" size="40" placeholder="https://example.com"></label>
   133		<input type="submit" value="Add">
   134	</form>
   135	{{end}}
   136	`))
   137	
   138	func (im *imp) ServeSetup(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) error {
   139		// Since this importer works with arbitrary servers, it needs the user to
   140		// input an URL before it can send the user off to the OAuth authorization
   141		// endpoint. To accomplish this, this method is invoked twice during setup.
   142	
   143		instanceURL := r.FormValue("instanceURL")
   144		if instanceURL == "" {
   145			// First step: the user hasn't provided an instance URL yet, so we ask
   146			// them for it and send them back to this method. The template includes
   147			// mode=login, so that the importer code redirects us back here.
   148	
   149			return promptURLTmpl.ExecuteTemplate(w, "promptURL", ctx)
   150		}
   151	
   152		// Second step: User just typed in their instance URL
   153	
   154		app, err := mastodon.RegisterApp(ctx, &mastodon.AppConfig{
   155			Server:       instanceURL,
   156			ClientName:   clientName,
   157			Scopes:       "read",
   158			RedirectURIs: im.RedirectURL(im, ctx),
   159		})
   160		if err != nil {
   161			httputil.ServeError(w, r, err)
   162			return err
   163		}
   164	
   165		// These aren't enough to log in. We fill in the rest with ServeCallback()
   166		if err := ctx.AccountNode.SetAttrs(
   167			acctAttrInstanceURL, instanceURL,
   168			acctAttrClientID, app.ClientID,
   169			acctAttrClientSecret, app.ClientSecret,
   170		); err != nil {
   171			httputil.ServeError(w, r, err)
   172			return err
   173		}
   174	
   175		authConfig, err := im.auth(ctx)
   176		if err != nil {
   177			httputil.ServeError(w, r, err)
   178			return err
   179		}
   180	
   181		state, err := im.RedirectState(im, ctx)
   182		if err != nil {
   183			httputil.ServeError(w, r, err)
   184			return err
   185		}
   186	
   187		http.Redirect(w, r, authConfig.AuthCodeURL(state), http.StatusFound)
   188		return nil
   189	}
   190	
   191	func (im *imp) ServeCallback(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) {
   192	
   193		code := r.FormValue("code")
   194		if code == "" {
   195			http.Error(w, "request contained no code", http.StatusBadRequest)
   196			return
   197		}
   198	
   199		auth, err := im.auth(ctx)
   200		if err != nil {
   201			httputil.ServeError(w, r, err)
   202			return
   203		}
   204	
   205		authToken, err := auth.Exchange(ctx, code)
   206		if err != nil {
   207			http.Error(w, "failed to obtain oauth token", http.StatusInternalServerError)
   208			log.Printf("Mastodon token exchange failed with error: %s", err)
   209			return
   210		}
   211	
   212		if err := ctx.AccountNode.SetAttr(importer.AcctAttrAccessToken, authToken.AccessToken); err != nil {
   213			httputil.ServeError(w, r, err)
   214			return
   215		}
   216	
   217		cl := createMastodonClient(ctx.Context, ctx.AccountNode)
   218		mastoAccount, err := cl.GetAccountCurrentUser(ctx)
   219		if err != nil {
   220			http.Error(w, "failed to fetch account info", http.StatusInternalServerError)
   221			return
   222		}
   223	
   224		userAddress, err := getExpandedAddress(mastoAccount.Acct, ctx.AccountNode.Attr(acctAttrInstanceURL))
   225		if err != nil {
   226			http.Error(w, "failed to determine user's address", http.StatusInternalServerError)
   227			log.Printf("failed to determine user's address: %s", err)
   228			return
   229		}
   230	
   231		acctTitle := fmt.Sprintf("%s's Mastodon account", userAddress)
   232	
   233		if err := ctx.AccountNode.SetAttrs(
   234			importer.AcctAttrUserID, string(mastoAccount.ID),
   235			importer.AcctAttrUserName, mastoAccount.Acct,
   236			importer.AcctAttrName, mastoAccount.DisplayName,
   237			nodeattr.Title, acctTitle,
   238		); err != nil {
   239			httputil.ServeError(w, r, err)
   240			return
   241		}
   242	
   243		http.Redirect(w, r, ctx.AccountURL(), http.StatusFound)
   244	}
   245	
   246	type run struct {
   247		*importer.RunContext
   248	
   249		incremental bool // true if importing only part
   250		cl          *mastodon.Client
   251	
   252		userAddress string // address in the form of user@example.com, used in logs
   253	}
   254	
   255	var fullImportOverride, _ = strconv.ParseBool(os.Getenv("PERKEEP_MASTODON_FULL_IMPORT"))
   256	
   257	func (im *imp) Run(ctx *importer.RunContext) error {
   258		acct := ctx.AccountNode()
   259		userAddress, err := getExpandedAddress(acct.Attr(importer.AcctAttrUserName), acct.Attr(acctAttrInstanceURL))
   260		if err != nil {
   261			return err
   262		}
   263	
   264		r := &run{
   265			RunContext:  ctx,
   266			incremental: !fullImportOverride && acct.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion,
   267			cl:          createMastodonClient(ctx.Context(), acct),
   268			userAddress: userAddress,
   269		}
   270	
   271		rootTitle := fmt.Sprintf("%s's Mastodon data", userAddress)
   272		if err := r.RootNode().SetAttr(nodeattr.Title, rootTitle); err != nil {
   273			return err
   274		}
   275	
   276		userID := mastodon.ID(acct.Attr(importer.AcctAttrUserID))
   277		if userID == "" {
   278			return errors.New("missing user ID")
   279		}
   280	
   281		if err := r.importStatuses(userID); err != nil {
   282			return err
   283		}
   284	
   285		if err := acct.SetAttr(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil {
   286			return err
   287		}
   288	
   289		return nil
   290	}
   291	
   292	// importStatuses imports statuses for the given user into the store
   293	func (r *run) importStatuses(userID mastodon.ID) error {
   294		statusesNode, err := r.RootNode().ChildPathObject(nodeStatuses)
   295		if err != nil {
   296			return err
   297		}
   298	
   299		nodeTitle := fmt.Sprintf("Mastodon statuses for %s", r.userAddress)
   300		if err := statusesNode.SetAttr(nodeattr.Title, nodeTitle); err != nil {
   301			return err
   302		}
   303	
   304		log.Printf("mastodon: Beginning statuses import for %s", r.userAddress)
   305	
   306		var pg mastodon.Pagination
   307	
   308		for {
   309			select {
   310			case <-r.Context().Done():
   311				return r.Context().Err()
   312			default:
   313			}
   314	
   315			if pg.MaxID != "" {
   316				log.Printf("mastodon: fetching batch for %s, from %s", r.userAddress, pg.MaxID)
   317			} else {
   318				log.Printf("mastodon: fetching batch for %s", r.userAddress)
   319			}
   320	
   321			batch, err := r.cl.GetAccountStatuses(r.Context(), userID, &pg)
   322			if err != nil {
   323				return err
   324			}
   325	
   326			if len(batch) == 0 {
   327				log.Printf("mastodon: got empty batch, assuming end of statuses for %s", r.userAddress)
   328				return nil
   329			}
   330	
   331			gate := syncutil.NewGate(importAtOnce)
   332			var grp syncutil.Group
   333			allReblogs := true
   334			anyNew := false
   335			var anyNewMu sync.Mutex
   336	
   337			for i := range batch {
   338				st := batch[i]
   339	
   340				// If an entry is a reblog, we ignore it and move on. However, the
   341				// whole batch being all reblogs does not mean there is nothing new
   342				// on the next page. If everything on this page was a reblog, we
   343				// move on to the next page regardless.
   344				if st.Reblog != nil {
   345					continue
   346				}
   347	
   348				allReblogs = false
   349	
   350				gate.Start()
   351				grp.Go(func() error {
   352					defer gate.Done()
   353					alreadyHad, err := r.importStatus(statusesNode, st)
   354					if err != nil {
   355						return fmt.Errorf("error importing status %s: %v", st.URI, err)
   356					}
   357	
   358					if !alreadyHad {
   359						anyNewMu.Lock()
   360						anyNew = true
   361						anyNewMu.Unlock()
   362					}
   363	
   364					return nil
   365	
   366				})
   367			}
   368	
   369			if err := grp.Err(); err != nil {
   370				return err
   371			}
   372	
   373			if !anyNew && !allReblogs {
   374				log.Printf("mastodon: reached the end for incremental import for %s", r.userAddress)
   375				return nil
   376			}
   377	
   378			if pg.MaxID == "" {
   379				log.Printf("mastodon: reached the end of statuses for %s", r.userAddress)
   380				return nil
   381			}
   382	
   383		}
   384	}
   385	
   386	// importStatus imports a single status, also adding it to the statuses node.
   387	// Returns true if we already had the status in the database.
   388	func (r *run) importStatus(listNode *importer.Object, st *mastodon.Status) (bool, error) {
   389		select {
   390		case <-r.Context().Done():
   391			return false, r.Context().Err()
   392		default:
   393		}
   394	
   395		// We store child nodes by their URI, since the URI is supposed to be an
   396		// unchanging, globally unique identifier for the status
   397		statusNode, err := listNode.ChildPathObject(st.URI)
   398		if err != nil {
   399			return false, err
   400		}
   401	
   402		if r.incremental && statusNode.Attr(attrURI) == st.URI {
   403			return true, nil
   404		}
   405	
   406		attrs := []string{
   407			nodeattr.Type, "mastodon:status",
   408			attrURI, st.URI,
   409			nodeattr.URL, st.URL,
   410			nodeattr.Content, st.Content,
   411			nodeattr.StartDate, schema.RFC3339FromTime(st.CreatedAt),
   412		}
   413	
   414		if st.SpoilerText != "" {
   415			attrs = append(attrs, attrSpoilerText, st.SpoilerText)
   416		}
   417	
   418		filenames := make(map[string]int)
   419	
   420		for i, att := range st.MediaAttachments {
   421			// All media for a local user will be local
   422			resp, err := ctxutil.Client(r.Context()).Get(att.URL)
   423			if err != nil {
   424				return false, err
   425			}
   426	
   427			if resp.StatusCode != http.StatusOK {
   428				return false, fmt.Errorf("failed fetching attachment %s with HTTP status %s", att.URL, resp.Status)
   429			}
   430	
   431			fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), "", resp.Body)
   432			resp.Body.Close()
   433			if err != nil {
   434				return false, err
   435			}
   436	
   437			filename := path.Base(att.URL)
   438			filenames[filename]++
   439	
   440			// A status can have several different attachments with the same
   441			// filename. We add numbers to the path to diffirentiate them if that's
   442			// the case
   443			if filenames[filename] > 1 {
   444				ext := path.Ext(filename)
   445				filename = fmt.Sprintf("%s%d%s", strings.TrimSuffix(filename, ext), filenames[filename], ext)
   446			}
   447	
   448			attrs = append(attrs, fmt.Sprintf("camliPath:%v", filename), fileRef.String())
   449	
   450			// The first image gets to be the preview image for the node
   451			if i == 0 {
   452				attrs = append(attrs, "camliContentImage", fileRef.String())
   453			}
   454	
   455			log.Printf("mastodon: adding attachment %s to permanode %s for status %s", fileRef.String(), statusNode.PermanodeRef(), st.URI)
   456	
   457		}
   458	
   459		changed, err := statusNode.SetAttrs2(attrs...)
   460		if err == nil && changed {
   461			log.Printf("mastodon: Imported status %s to %s", st.URI, statusNode.PermanodeRef())
   462		}
   463	
   464		return !changed, err
   465	
   466	}
   467	
   468	// auth returns the appropriate oauth2.Config for this account
   469	func (im *imp) auth(ctx *importer.SetupContext) (*oauth2.Config, error) {
   470		baseURL, err := url.Parse(ctx.AccountNode.Attr(acctAttrInstanceURL))
   471		if err != nil {
   472			return nil, err
   473		}
   474	
   475		tokenURL := *baseURL
   476		tokenURL.Path = path.Join(tokenURL.Path, tokenPath)
   477	
   478		authURL := *baseURL
   479		authURL.Path = path.Join(authURL.Path, authorizationPath)
   480	
   481		return &oauth2.Config{
   482			ClientID:     ctx.AccountNode.Attr(acctAttrClientID),
   483			ClientSecret: ctx.AccountNode.Attr(acctAttrClientSecret),
   484			RedirectURL:  im.RedirectURL(im, ctx),
   485			Endpoint: oauth2.Endpoint{
   486				AuthURL:  authURL.String(),
   487				TokenURL: tokenURL.String(),
   488			},
   489		}, nil
   490	}
   491	
   492	// createMastodonClient returns a new Client configured for the provided
   493	// account. It does not check if the account has the needed fields filled.
   494	func createMastodonClient(ctx context.Context, acct *importer.Object) *mastodon.Client {
   495	
   496		// Although the client can take client_id and client_secret, we won't need
   497		// those for token auth
   498		cl := mastodon.NewClient(&mastodon.Config{
   499			Server:      acct.Attr(acctAttrInstanceURL),
   500			AccessToken: acct.Attr(importer.AcctAttrAccessToken),
   501		})
   502	
   503		cl.Client = *ctxutil.Client(ctx)
   504		return cl
   505	}
   506	
   507	// getExpandedAddress returns the address for the account in the @user@example.com form
   508	func getExpandedAddress(user, instanceURL string) (string, error) {
   509	
   510		if user == "" || instanceURL == "" {
   511			return "", errors.New("some required account data is missing")
   512		}
   513	
   514		parsedURL, err := url.Parse(instanceURL)
   515		if err != nil {
   516			return "", err
   517		}
   518	
   519		return fmt.Sprintf("@%s@%s", user, parsedURL.Host), nil
   520	}
Website layout inspired by memcached.
Content by the authors.