1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
16
17
18 package twitter
19
20 import (
21 "archive/zip"
22 "bufio"
23 "bytes"
24 "encoding/json"
25 "errors"
26 "fmt"
27 "html"
28 "io"
29 "log"
30 "net/http"
31 "net/url"
32 "os"
33 "path"
34 "regexp"
35 "strconv"
36 "strings"
37 "sync"
38 "time"
39
40 "perkeep.org/internal/httputil"
41 "perkeep.org/pkg/blob"
42 "perkeep.org/pkg/importer"
43 "perkeep.org/pkg/schema"
44 "perkeep.org/pkg/schema/nodeattr"
45
46 "github.com/garyburd/go-oauth/oauth"
47
48 "go4.org/ctxutil"
49 "go4.org/syncutil"
50 )
51
52 const (
53 apiURL = "https://api.twitter.com/1.1/"
54 temporaryCredentialRequestURL = "https://api.twitter.com/oauth/request_token"
55 resourceOwnerAuthorizationURL = "https://api.twitter.com/oauth/authorize"
56 tokenRequestURL = "https://api.twitter.com/oauth/access_token"
57 userInfoAPIPath = "account/verify_credentials.json"
58 userTimeLineAPIPath = "statuses/user_timeline.json"
59 userLikesAPIPath = "favorites/list.json"
60
61
62
63
64
65
66
67 runCompleteVersion = "5"
68
69
70
71
72
73
74
75
76
77 acctAttrTweetZip = "twitterArchiveZipFileRef"
78
79
80
81
82
83
84 acctAttrImportLikes = "twitterImportLikes"
85
86
87
88 acctAttrZipDoneVersion = "twitterZipDoneVersion"
89
90
91 attrImportMethod = "twitterImportMethod"
92
93 tweetRequestLimit = 200
94 tweetsAtOnce = 20
95
96
97 nodeTypeTweet = "twitter.com:tweet"
98
99
100 nodeTypeLike = "twitter.com:like"
101 )
102
103 var oAuthURIs = importer.OAuthURIs{
104 TemporaryCredentialRequestURI: temporaryCredentialRequestURL,
105 ResourceOwnerAuthorizationURI: resourceOwnerAuthorizationURL,
106 TokenRequestURI: tokenRequestURL,
107 }
108
109 func init() {
110 importer.Register("twitter", &imp{})
111 }
112
113 var _ importer.ImporterSetupHTMLer = (*imp)(nil)
114
115 type imp struct {
116 importer.OAuth1
117 }
118
119 func (*imp) Properties() importer.Properties {
120 return importer.Properties{
121 Title: "Twitter",
122 Description: "import tweets and media from tweets",
123
124 SupportsIncremental: true,
125 NeedsAPIKey: true,
126 }
127 }
128
129 func (im *imp) IsAccountReady(acctNode *importer.Object) (ok bool, err error) {
130 if acctNode.Attr(importer.AcctAttrUserID) != "" && acctNode.Attr(importer.AcctAttrAccessToken) != "" {
131 return true, nil
132 }
133 return false, nil
134 }
135
136 func (im *imp) SummarizeAccount(acct *importer.Object) string {
137 ok, err := im.IsAccountReady(acct)
138 if err != nil {
139 return "Not configured; error = " + err.Error()
140 }
141 if !ok {
142 return "Not configured"
143 }
144 s := fmt.Sprintf("@%s (%s), twitter id %s",
145 acct.Attr(importer.AcctAttrUserName),
146 acct.Attr(importer.AcctAttrName),
147 acct.Attr(importer.AcctAttrUserID),
148 )
149 if acct.Attr(acctAttrTweetZip) != "" {
150 s += " + zip file"
151 }
152 return s
153 }
154
155 func (im *imp) AccountSetupHTML(host *importer.Host) string {
156 base := host.ImporterBaseURL() + "twitter"
157 return fmt.Sprintf(`
158 <h1>Configuring Twitter</h1>
159 <p>Visit <a href='https://apps.twitter.com/'>https://apps.twitter.com/</a> and click "Create New App".</p>
160 <p>Use the following settings:</p>
161 <ul>
162 <li>Name: Does not matter. (camlistore-importer).</li>
163 <li>Description: Does not matter. (imports twitter data into camlistore).</li>
164 <li>Website: <b>%s</b></li>
165 <li>Callback URL: <b>%s</b></li>
166 </ul>
167 <!-- TODO(mpl): use CSS to style it to 80 chars wide instead of doing it in source -->
168 <p>
169 Click "Create your Twitter application".You should be redirected to the</br>
170 Application Management page of your newly created application.</br>
171 Go to the "Keys and Access Tokens" tab. Copy the "Consumer Key (API Key)" and</br>
172 "Consumer Secret (API Secret)" into the "Client ID" and "Client Secret" boxes</br>
173 above.
174 </p>
175 <p>
176 Note that the twitter API prevents us from getting more than 3200 tweets<br>
177 (including retweets) through your user timeline. So if you have more than that<br>
178 limit (and want to get them all), after you have configured this account, you<br>
179 need to download all your data as a zip first. Which you can do on your twitter<br>
180 page, at: "Settings and Privacy", "Your Twitter data", "Download your Twitter<br>
181 data". Then upload it to your instance with "pk-put file tweets.zip" (this will<br>
182 return the zip-fileref), and signal the twitter importer that you have it, with<br>
183 "pk-put attr <acct-permanode> twitterArchiveZipFileRef <zip-fileref>".<br>
184 Then you can start running the importer.
185 </p>
186 <p>
187 If you want to import likes as well, please run <br>
188 "pk-put attr <acct-permanode> twitterImportLikes true" to enable it.
189 </p>
190 `, base, base+"/callback")
191 }
192
193
194 type run struct {
195 *importer.RunContext
196 im *imp
197 incremental bool
198
199 oauthClient *oauth.Client
200 accessCreds *oauth.Credentials
201
202 mu sync.Mutex
203 anyErr bool
204 }
205
206 var forceFullImport, _ = strconv.ParseBool(os.Getenv("CAMLI_TWITTER_FULL_IMPORT"))
207
208 func (im *imp) Run(ctx *importer.RunContext) error {
209 clientId, secret, err := ctx.Credentials()
210 if err != nil {
211 return fmt.Errorf("no API credentials: %v", err)
212 }
213 acctNode := ctx.AccountNode()
214 accessToken := acctNode.Attr(importer.AcctAttrAccessToken)
215 accessSecret := acctNode.Attr(importer.AcctAttrAccessTokenSecret)
216 if accessToken == "" || accessSecret == "" {
217 return errors.New("access credentials not found")
218 }
219 r := &run{
220 RunContext: ctx,
221 im: im,
222 incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion,
223
224 oauthClient: &oauth.Client{
225 TemporaryCredentialRequestURI: temporaryCredentialRequestURL,
226 ResourceOwnerAuthorizationURI: resourceOwnerAuthorizationURL,
227 TokenRequestURI: tokenRequestURL,
228 Credentials: oauth.Credentials{
229 Token: clientId,
230 Secret: secret,
231 },
232 },
233 accessCreds: &oauth.Credentials{
234 Token: accessToken,
235 Secret: accessSecret,
236 },
237 }
238
239 userID := acctNode.Attr(importer.AcctAttrUserID)
240 if userID == "" {
241 return errors.New("userID hasn't been set by account setup")
242 }
243
244 skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT"))
245 if !skipAPITweets {
246 if err := r.importTweets(userID, userTimeLineAPIPath); err != nil {
247 return err
248 }
249 }
250
251 acctNode, err = ctx.Host.ObjectFromRef(acctNode.PermanodeRef())
252 if err != nil {
253 return fmt.Errorf("error reloading account node: %v", err)
254 }
255 importLikes, err := strconv.ParseBool(acctNode.Attr(acctAttrImportLikes))
256 if err == nil && importLikes {
257 if err := r.importTweets(userID, userLikesAPIPath); err != nil {
258 return err
259 }
260 }
261
262 zipRef := acctNode.Attr(acctAttrTweetZip)
263 zipDoneVal := zipRef + ":" + runCompleteVersion
264 if zipRef != "" && !(r.incremental && acctNode.Attr(acctAttrZipDoneVersion) == zipDoneVal) {
265 zipbr, ok := blob.Parse(zipRef)
266 if !ok {
267 return fmt.Errorf("invalid zip file blobref %q", zipRef)
268 }
269 fr, err := schema.NewFileReader(r.Context(), r.Host.BlobSource(), zipbr)
270 if err != nil {
271 return fmt.Errorf("error opening zip %v: %v", zipbr, err)
272 }
273 defer fr.Close()
274 zr, err := zip.NewReader(fr, fr.Size())
275 if err != nil {
276 return fmt.Errorf("Error opening twitter zip file %v: %v", zipRef, err)
277 }
278 if err := r.importTweetsFromZip(userID, zr); err != nil {
279 return err
280 }
281 if err := acctNode.SetAttrs(acctAttrZipDoneVersion, zipDoneVal); err != nil {
282 return err
283 }
284 }
285
286 r.mu.Lock()
287 anyErr := r.anyErr
288 r.mu.Unlock()
289
290 if !anyErr {
291 if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil {
292 return err
293 }
294 }
295
296 return nil
297 }
298
299 var _ importer.LongPoller = (*imp)(nil)
300
301 func (im *imp) LongPoll(rctx *importer.RunContext) error {
302 clientId, secret, err := rctx.Credentials()
303 if err != nil {
304 return err
305 }
306
307 acctNode := rctx.AccountNode()
308 accessToken := acctNode.Attr(importer.AcctAttrAccessToken)
309 accessSecret := acctNode.Attr(importer.AcctAttrAccessTokenSecret)
310 if accessToken == "" || accessSecret == "" {
311 return errors.New("access credentials not found")
312 }
313 oauthClient := &oauth.Client{
314 TemporaryCredentialRequestURI: temporaryCredentialRequestURL,
315 ResourceOwnerAuthorizationURI: resourceOwnerAuthorizationURL,
316 TokenRequestURI: tokenRequestURL,
317 Credentials: oauth.Credentials{
318 Token: clientId,
319 Secret: secret,
320 },
321 }
322 accessCreds := &oauth.Credentials{
323 Token: accessToken,
324 Secret: accessSecret,
325 }
326
327 form := url.Values{"with": {"user"}}
328 req, _ := http.NewRequest("GET", "https://userstream.twitter.com/1.1/user.json", nil)
329 req.Header.Set("Authorization", oauthClient.AuthorizationHeader(accessCreds, "GET", req.URL, form))
330 req.URL.RawQuery = form.Encode()
331 req.Cancel = rctx.Context().Done()
332
333 log.Printf("twitter: beginning long poll, awaiting new tweets...")
334 res, err := http.DefaultClient.Do(req)
335 if err != nil {
336 return err
337 }
338 defer res.Body.Close()
339 if res.StatusCode != 200 {
340 return errors.New(res.Status)
341 }
342 bs := bufio.NewScanner(res.Body)
343 for bs.Scan() {
344 line := strings.TrimSpace(bs.Text())
345 if line == "" || strings.HasPrefix(line, `{"friends`) {
346 continue
347 }
348 log.Printf("twitter: long poll saw activity")
349 return nil
350 }
351 if err := bs.Err(); err != nil {
352 return err
353 }
354 return errors.New("twitter: got EOF without a tweet")
355 }
356
357 func (r *run) errorf(format string, args ...interface{}) {
358 log.Printf("twitter: "+format, args...)
359 r.mu.Lock()
360 defer r.mu.Unlock()
361 r.anyErr = true
362 }
363
364 func (r *run) doAPI(result interface{}, apiPath string, keyval ...string) error {
365 return importer.OAuthContext{
366 Ctx: r.Context(),
367 Client: r.oauthClient,
368 Creds: r.accessCreds,
369 }.PopulateJSONFromURL(result, http.MethodGet, apiURL+apiPath, keyval...)
370 }
371
372
373
374
375 func (r *run) importTweets(userID string, apiPath string) error {
376 maxId := ""
377 continueRequests := true
378
379 var tweetsNode *importer.Object
380 var err error
381 var importType string
382 if apiPath == userLikesAPIPath {
383 importType = "likes"
384 } else {
385 importType = "tweets"
386 }
387 tweetsNode, err = r.getTopLevelNode(importType)
388 if err != nil {
389 return err
390 }
391
392 numTweets := 0
393 sawTweet := map[string]bool{}
394
395
396
397 attrs := []string{
398 "user_id", userID,
399 "count", strconv.Itoa(tweetRequestLimit),
400 }
401 for continueRequests {
402 select {
403 case <-r.Context().Done():
404 r.errorf("interrupted")
405 return r.Context().Err()
406 default:
407 }
408
409 var resp []*apiTweetItem
410 var err error
411 if maxId == "" {
412 log.Printf("twitter: fetching %s for userid %s", importType, userID)
413 err = r.doAPI(&resp, apiPath, attrs...)
414 } else {
415 log.Printf("twitter: fetching %s for userid %s with max ID %s", userID, importType, maxId)
416 err = r.doAPI(&resp, apiPath,
417 append(attrs, "max_id", maxId)...)
418 }
419 if err != nil {
420 return err
421 }
422
423 var (
424 newThisBatch = 0
425 allDupMu sync.Mutex
426 allDups = true
427 gate = syncutil.NewGate(tweetsAtOnce)
428 grp syncutil.Group
429 )
430 for i := range resp {
431 tweet := resp[i]
432
433
434 if sawTweet[tweet.Id] {
435 continue
436 }
437 sawTweet[tweet.Id] = true
438 newThisBatch++
439 maxId = tweet.Id
440
441 gate.Start()
442 grp.Go(func() error {
443 defer gate.Done()
444 dup, err := r.importTweet(tweetsNode, tweet, true)
445 if !dup {
446 allDupMu.Lock()
447 allDups = false
448 allDupMu.Unlock()
449 }
450 if err != nil {
451 r.errorf("error importing tweet %s %v", tweet.Id, err)
452 }
453 return err
454 })
455 }
456 if err := grp.Err(); err != nil {
457 return err
458 }
459 numTweets += newThisBatch
460 log.Printf("twitter: imported %d %s this batch; %d total.", newThisBatch, importType, numTweets)
461 if r.incremental && allDups {
462 log.Printf("twitter: incremental import found end batch")
463 break
464 }
465 continueRequests = newThisBatch > 0
466 }
467 log.Printf("twitter: successfully did full run of importing %d %s", numTweets, importType)
468 return nil
469 }
470
471 func tweetsFromZipFile(zf *zip.File) (tweets []*zipTweetItem, err error) {
472 rc, err := zf.Open()
473 if err != nil {
474 return nil, err
475 }
476 slurp, err := io.ReadAll(rc)
477 rc.Close()
478 if err != nil {
479 return nil, err
480 }
481 i := bytes.IndexByte(slurp, '[')
482 if i < 0 {
483 return nil, errors.New("No '[' found in zip file")
484 }
485 slurp = slurp[i:]
486 if err := json.Unmarshal(slurp, &tweets); err != nil {
487 return nil, fmt.Errorf("JSON error: %v", err)
488 }
489 return
490 }
491
492 func (r *run) importTweetsFromZip(userID string, zr *zip.Reader) error {
493 log.Printf("twitter: processing zip file with %d files", len(zr.File))
494
495 tweetsNode, err := r.getTopLevelNode("tweets")
496 if err != nil {
497 return err
498 }
499
500 var (
501 gate = syncutil.NewGate(tweetsAtOnce)
502 grp syncutil.Group
503 )
504 total := 0
505 for _, zf := range zr.File {
506 if !(strings.HasPrefix(zf.Name, "data/js/tweets/2") && strings.HasSuffix(zf.Name, ".js")) {
507 continue
508 }
509 tweets, err := tweetsFromZipFile(zf)
510 if err != nil {
511 return fmt.Errorf("error reading tweets from %s: %v", zf.Name, err)
512 }
513
514 for i := range tweets {
515 total++
516 tweet := tweets[i]
517 gate.Start()
518 grp.Go(func() error {
519 defer gate.Done()
520 _, err := r.importTweet(tweetsNode, tweet, false)
521 return err
522 })
523 }
524 }
525 err = grp.Err()
526 log.Printf("zip import of tweets: %d total, err = %v", total, err)
527 return err
528 }
529
530 func timeParseFirstFormat(timeStr string, format ...string) (t time.Time, err error) {
531 if len(format) == 0 {
532 panic("need more than 1 format")
533 }
534 for _, f := range format {
535 t, err = time.Parse(f, timeStr)
536 if err == nil {
537 break
538 }
539 }
540 return
541 }
542
543
544 func (r *run) importTweet(parent *importer.Object, tweet tweetItem, viaAPI bool) (dup bool, err error) {
545 select {
546 case <-r.Context().Done():
547 r.errorf("Twitter importer: interrupted")
548 return false, r.Context().Err()
549 default:
550 }
551 id := tweet.ID()
552 tweetNode, err := parent.ChildPathObject(id)
553 if err != nil {
554 return false, err
555 }
556
557
558
559
560
561
562
563 if tweetNode.Attr(attrImportMethod) == "api" && !viaAPI {
564 return true, nil
565 }
566
567
568 createdTime, err := timeParseFirstFormat(tweet.CreatedAt(), time.RubyDate, "2006-01-02 15:04:05 -0700")
569 if err != nil {
570 return false, fmt.Errorf("could not parse time %q: %v", tweet.CreatedAt(), err)
571 }
572
573 url := fmt.Sprintf("https://twitter.com/%s/status/%v",
574 r.AccountNode().Attr(importer.AcctAttrUserName),
575 id)
576
577 nodeType := nodeTypeTweet
578 if tweet.Liked() {
579 nodeType = nodeTypeLike
580 }
581
582 attrs := []string{
583 "twitterId", id,
584 nodeattr.Type, nodeType,
585 nodeattr.StartDate, schema.RFC3339FromTime(createdTime),
586 nodeattr.Content, tweet.Text(),
587 nodeattr.URL, url,
588 }
589 if lat, long, ok := tweet.LatLong(); ok {
590 attrs = append(attrs,
591 nodeattr.Latitude, fmt.Sprint(lat),
592 nodeattr.Longitude, fmt.Sprint(long),
593 )
594 }
595 if viaAPI {
596 attrs = append(attrs, attrImportMethod, "api")
597 } else {
598 attrs = append(attrs, attrImportMethod, "zip")
599 }
600
601 for i, m := range tweet.Media() {
602 filename := m.BaseFilename()
603 if tweetNode.Attr("camliPath:"+filename) != "" && (i > 0 || tweetNode.Attr("camliContentImage") != "") {
604
605 continue
606 }
607 tried, gotMedia := 0, false
608 for _, mediaURL := range m.URLs() {
609 tried++
610 res, err := ctxutil.Client(r.Context()).Get(mediaURL)
611 if err != nil {
612 return false, fmt.Errorf("Error fetching %s for tweet %s : %v", mediaURL, url, err)
613 }
614 if res.StatusCode == http.StatusNotFound {
615 continue
616 }
617 if res.StatusCode != 200 {
618 return false, fmt.Errorf("HTTP status %d fetching %s for tweet %s", res.StatusCode, mediaURL, url)
619 }
620 if !viaAPI {
621 log.Printf("twitter: for zip tweet %s, reading %v", url, mediaURL)
622 }
623 fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), filename, res.Body)
624 res.Body.Close()
625 if err != nil {
626 return false, fmt.Errorf("Error fetching media %s for tweet %s: %v", mediaURL, url, err)
627 }
628 attrs = append(attrs, "camliPath:"+filename, fileRef.String())
629 if i == 0 {
630 attrs = append(attrs, "camliContentImage", fileRef.String())
631 }
632 log.Printf("twitter: slurped %s as %s for tweet %s (%v)", mediaURL, fileRef.String(), url, tweetNode.PermanodeRef())
633 gotMedia = true
634 break
635 }
636 if !gotMedia && tried > 0 {
637 return false, fmt.Errorf("All media URLs 404s for tweet %s", url)
638 }
639 }
640
641 changes, err := tweetNode.SetAttrs2(attrs...)
642 if err == nil && changes {
643 log.Printf("twitter: imported tweet %s", url)
644 }
645 return !changes, err
646 }
647
648
649 func (r *run) getTopLevelNode(path string) (*importer.Object, error) {
650 acctNode := r.AccountNode()
651
652 root := r.RootNode()
653 rootTitle := fmt.Sprintf("%s's Twitter Data", acctNode.Attr(importer.AcctAttrUserName))
654 if err := root.SetAttr(nodeattr.Title, rootTitle); err != nil {
655 return nil, err
656 }
657
658 obj, err := root.ChildPathObject(path)
659 if err != nil {
660 return nil, err
661 }
662 var title string
663 switch path {
664 case "tweets":
665 title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName))
666 case "likes":
667 title = fmt.Sprintf("%s's Likes", acctNode.Attr(importer.AcctAttrUserName))
668 }
669 return obj, obj.SetAttr(nodeattr.Title, title)
670 }
671
672 type userInfo struct {
673 ID string `json:"id_str"`
674 ScreenName string `json:"screen_name"`
675 Name string `json:"name,omitempty"`
676 }
677
678 func getUserInfo(ctx importer.OAuthContext) (userInfo, error) {
679 var ui userInfo
680 if err := ctx.PopulateJSONFromURL(&ui, http.MethodGet, apiURL+userInfoAPIPath); err != nil {
681 return ui, err
682 }
683 if ui.ID == "" {
684 return ui, fmt.Errorf("No userid returned")
685 }
686 return ui, nil
687 }
688
689 func (im *imp) ServeSetup(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) error {
690 oauthClient, err := ctx.NewOAuthClient(oAuthURIs)
691 if err != nil {
692 err = fmt.Errorf("error getting OAuth client: %v", err)
693 httputil.ServeError(w, r, err)
694 return err
695 }
696 tempCred, err := oauthClient.RequestTemporaryCredentials(ctxutil.Client(ctx), ctx.CallbackURL(), nil)
697 if err != nil {
698 err = fmt.Errorf("Error getting temp cred: %v", err)
699 httputil.ServeError(w, r, err)
700 return err
701 }
702 if err := ctx.AccountNode.SetAttrs(
703 importer.AcctAttrTempToken, tempCred.Token,
704 importer.AcctAttrTempSecret, tempCred.Secret,
705 ); err != nil {
706 err = fmt.Errorf("Error saving temp creds: %v", err)
707 httputil.ServeError(w, r, err)
708 return err
709 }
710
711 authURL := oauthClient.AuthorizationURL(tempCred, nil)
712 http.Redirect(w, r, authURL, http.StatusFound)
713 return nil
714 }
715
716 func (im *imp) ServeCallback(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) {
717 tempToken := ctx.AccountNode.Attr(importer.AcctAttrTempToken)
718 tempSecret := ctx.AccountNode.Attr(importer.AcctAttrTempSecret)
719 if tempToken == "" || tempSecret == "" {
720 log.Printf("twitter: no temp creds in callback")
721 httputil.BadRequestError(w, "no temp creds in callback")
722 return
723 }
724 if tempToken != r.FormValue("oauth_token") {
725 log.Printf("twitter: unexpected oauth_token: got %v, want %v", r.FormValue("oauth_token"), tempToken)
726 httputil.BadRequestError(w, "unexpected oauth_token")
727 return
728 }
729 oauthClient, err := ctx.NewOAuthClient(oAuthURIs)
730 if err != nil {
731 err = fmt.Errorf("error getting OAuth client: %v", err)
732 httputil.ServeError(w, r, err)
733 return
734 }
735 tokenCred, vals, err := oauthClient.RequestToken(
736 ctxutil.Client(ctx),
737 &oauth.Credentials{
738 Token: tempToken,
739 Secret: tempSecret,
740 },
741 r.FormValue("oauth_verifier"),
742 )
743 if err != nil {
744 httputil.ServeError(w, r, fmt.Errorf("Error getting request token: %v ", err))
745 return
746 }
747 userid := vals.Get("user_id")
748 if userid == "" {
749 httputil.ServeError(w, r, fmt.Errorf("Couldn't get user id: %v", err))
750 return
751 }
752 if err := ctx.AccountNode.SetAttrs(
753 importer.AcctAttrAccessToken, tokenCred.Token,
754 importer.AcctAttrAccessTokenSecret, tokenCred.Secret,
755 ); err != nil {
756 httputil.ServeError(w, r, fmt.Errorf("Error setting token attributes: %v", err))
757 return
758 }
759
760 u, err := getUserInfo(importer.OAuthContext{Ctx: ctx.Context, Client: oauthClient, Creds: tokenCred})
761 if err != nil {
762 httputil.ServeError(w, r, fmt.Errorf("Couldn't get user info: %v", err))
763 return
764 }
765 if err := ctx.AccountNode.SetAttrs(
766 importer.AcctAttrUserID, u.ID,
767 importer.AcctAttrName, u.Name,
768 importer.AcctAttrUserName, u.ScreenName,
769 nodeattr.Title, fmt.Sprintf("%s's Twitter Account", u.ScreenName),
770 ); err != nil {
771 httputil.ServeError(w, r, fmt.Errorf("Error setting attribute: %v", err))
772 return
773 }
774 http.Redirect(w, r, ctx.AccountURL(), http.StatusFound)
775 }
776
777 type tweetItem interface {
778 ID() string
779 LatLong() (lat, long float64, ok bool)
780 CreatedAt() string
781 Text() string
782 Media() []tweetMedia
783 Liked() bool
784 }
785
786 type tweetMedia interface {
787 URLs() []string
788 BaseFilename() string
789 }
790
791 type apiTweetItem struct {
792 Id string `json:"id_str"`
793 TextStr string `json:"text"`
794 CreatedAtStr string `json:"created_at"`
795 Entities entities `json:"entities"`
796 Favorited bool `json:"favorited"`
797
798
799 Geo *geo `json:"geo"`
800 Coordinates *coords `json:"coordinates"`
801 }
802
803
804 type zipTweetItem struct {
805 Id string `json:"id_str"`
806 TextStr string `json:"text"`
807 CreatedAtStr string `json:"created_at"`
808
809
810 Geo *geo `json:"geo"`
811 Coordinates *coords `json:"coordinates"`
812 Entities zipEntities `json:"entities"`
813 }
814
815 func (t *apiTweetItem) ID() string {
816 if t.Id == "" {
817 panic("empty id")
818 }
819 return t.Id
820 }
821
822 func (t *zipTweetItem) ID() string {
823 if t.Id == "" {
824 panic("empty id")
825 }
826 return t.Id
827 }
828
829 func (t *apiTweetItem) CreatedAt() string { return t.CreatedAtStr }
830 func (t *zipTweetItem) CreatedAt() string { return t.CreatedAtStr }
831
832 func (t *apiTweetItem) Text() string { return html.UnescapeString(t.TextStr) }
833 func (t *zipTweetItem) Text() string { return html.UnescapeString(t.TextStr) }
834
835 func (t *apiTweetItem) LatLong() (lat, long float64, ok bool) {
836 return latLong(t.Geo, t.Coordinates)
837 }
838
839 func (t *zipTweetItem) LatLong() (lat, long float64, ok bool) {
840 return latLong(t.Geo, t.Coordinates)
841 }
842
843 func latLong(g *geo, c *coords) (lat, long float64, ok bool) {
844 if g != nil && len(g.Coordinates) == 2 {
845 co := g.Coordinates
846 if co[0] != 0 && co[1] != 0 {
847 return co[0], co[1], true
848 }
849 }
850 if c != nil && len(c.Coordinates) == 2 {
851 co := c.Coordinates
852 if co[0] != 0 && co[1] != 0 {
853 return co[1], co[0], true
854 }
855 }
856 return
857 }
858
859 func (t *zipTweetItem) Media() (ret []tweetMedia) {
860 for _, m := range t.Entities.Media {
861 ret = append(ret, m)
862 }
863 ret = append(ret, getImagesFromURLs(t.Entities.URLs)...)
864 return
865 }
866
867 func (t *apiTweetItem) Media() (ret []tweetMedia) {
868 for _, m := range t.Entities.Media {
869 ret = append(ret, m)
870 }
871 ret = append(ret, getImagesFromURLs(t.Entities.URLs)...)
872 return
873 }
874
875 func (t *apiTweetItem) Liked() bool { return t.Favorited }
876 func (t *zipTweetItem) Liked() bool { return false }
877
878 type geo struct {
879 Coordinates []float64 `json:"coordinates"`
880 }
881
882 type coords struct {
883 Coordinates []float64 `json:"coordinates"`
884 }
885
886 type entities struct {
887 Media []*media `json:"media"`
888 URLs []*urlEntity `json:"urls"`
889 }
890
891 type zipEntities struct {
892 Media []*zipMedia `json:"media"`
893 URLs []*urlEntity `json:"urls"`
894 }
895
896
897
898
899
900
901
902 type urlEntity struct {
903 URL string `json:"url"`
904 ExpandedURL string `json:"expanded_url"`
905 DisplayURL string `json:"display_url"`
906 }
907
908 var imgurRx = regexp.MustCompile(`\bimgur\.com/(\w\w\w+)`)
909
910 func getImagesFromURLs(urls []*urlEntity) (ret []tweetMedia) {
911
912
913 for _, u := range urls {
914 if strings.HasPrefix(u.DisplayURL, "twitpic.com") {
915 ret = append(ret, twitpicImage(strings.TrimPrefix(u.DisplayURL, "twitpic.com/")))
916 continue
917 }
918 if m := imgurRx.FindStringSubmatch(u.DisplayURL); m != nil {
919 ret = append(ret, imgurImage(m[1]))
920 continue
921 }
922 }
923 return
924 }
925
926
927 type media struct {
928 Id string `json:"id_str"`
929 IdNum int64 `json:"id"`
930 MediaURL string `json:"media_url"`
931 MediaURLHTTPS string `json:"media_url_https"`
932 Sizes map[string]mediaSize `json:"sizes"`
933 Type_ string `json:"type"`
934 }
935
936
937
938 type zipMedia struct {
939 Id string `json:"id_str"`
940 IdNum int64 `json:"id"`
941 MediaURL string `json:"media_url"`
942 MediaURLHTTPS string `json:"media_url_https"`
943 Sizes []mediaSize `json:"sizes"`
944 }
945
946 func (m *media) URLs() []string {
947 u := m.baseURL()
948 if u == "" {
949 return nil
950 }
951 return []string{u + m.largestMediaSuffix(), u}
952 }
953
954 func (m *zipMedia) URLs() []string {
955
956
957 u := m.baseURL()
958 if u == "" {
959 return nil
960 }
961 return []string{
962 u + ":large",
963 u,
964 }
965 }
966
967 func (m *media) baseURL() string {
968 if v := m.MediaURLHTTPS; v != "" {
969 return v
970 }
971 return m.MediaURL
972 }
973
974 func (m *zipMedia) baseURL() string {
975 if v := m.MediaURLHTTPS; v != "" {
976 return v
977 }
978 return m.MediaURL
979 }
980
981 func (m *media) BaseFilename() string {
982 return path.Base(m.baseURL())
983 }
984
985 func (m *zipMedia) BaseFilename() string {
986 return path.Base(m.baseURL())
987 }
988
989 func (m *media) largestMediaSuffix() string {
990 bestPixels := 0
991 bestSuffix := ""
992 for k, sz := range m.Sizes {
993 if px := sz.W * sz.H; px > bestPixels {
994 bestPixels = px
995 bestSuffix = ":" + k
996 }
997 }
998 return bestSuffix
999 }
1000
1001 type mediaSize struct {
1002 W int `json:"w"`
1003 H int `json:"h"`
1004 Resize string `json:"resize"`
1005 }
1006
1007
1008 type twitpicImage string
1009
1010 func (im twitpicImage) BaseFilename() string { return string(im) }
1011
1012 func (im twitpicImage) URLs() []string {
1013 return []string{"https://twitpic.com/show/large/" + string(im)}
1014 }
1015
1016
1017 type imgurImage string
1018
1019 func (im imgurImage) BaseFilename() string { return string(im) }
1020
1021 func (im imgurImage) URLs() []string {
1022
1023
1024 return []string{"https://i.imgur.com/" + string(im) + ".gif"}
1025 }