1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
16
17 package server
18
19 import (
20 "archive/zip"
21 "bytes"
22 "context"
23 "errors"
24 "fmt"
25 "io"
26 "log"
27 "net/http"
28 "os"
29 "path/filepath"
30 "regexp"
31 "strings"
32 "time"
33 "unicode/utf8"
34
35 "go4.org/readerutil"
36 "perkeep.org/internal/httputil"
37 "perkeep.org/internal/magic"
38 "perkeep.org/pkg/blob"
39 "perkeep.org/pkg/blobserver"
40 "perkeep.org/pkg/cacher"
41 "perkeep.org/pkg/schema"
42 "perkeep.org/pkg/search"
43 )
44
45 const (
46 oneYear = 365 * 86400 * time.Second
47 downloadTimeLayout = "20060102150405"
48 )
49
50 var (
51 debugPack = strings.Contains(os.Getenv("CAMLI_DEBUG_X"), "packserve")
52
53
54
55
56
57 downloadPattern = regexp.MustCompile(`^download/([^/]+)(/.*)?$`)
58 )
59
60 type DownloadHandler struct {
61 Fetcher blob.Fetcher
62
63
64
65
66 Search *search.Handler
67
68 ForceMIME string
69 forceInline bool
70
71
72
73
74 pathByRef map[blob.Ref]string
75
76
77
78
79 r *http.Request
80 }
81
82 type fileInfo struct {
83 mime string
84 name string
85 size int64
86 modtime time.Time
87 mode os.FileMode
88 rs io.ReadSeeker
89 close func() error
90 whyNot string
91 isDir bool
92 children []blob.Ref
93 }
94
95 var errNotDir = errors.New("not a directory")
96
97
98
99
100 func (dh *DownloadHandler) dirInfo(ctx context.Context, dir blob.Ref) (fi fileInfo, err error) {
101 rc, _, err := dh.Fetcher.Fetch(ctx, dir)
102 if err != nil {
103 return fi, fmt.Errorf("could not fetch %v: %v", dir, err)
104 }
105 b, err := schema.BlobFromReader(dir, rc)
106 rc.Close()
107 if err != nil {
108 return fi, fmt.Errorf("could not read %v as blob: %v", dir, err)
109 }
110 tp := b.Type()
111 if tp != "directory" {
112 return fi, errNotDir
113 }
114 dr, err := schema.NewDirReader(ctx, dh.Fetcher, dir)
115 if err != nil {
116 return fi, fmt.Errorf("could not open %v as directory: %v", dir, err)
117 }
118 children, err := dr.StaticSet(ctx)
119 if err != nil {
120 return fi, fmt.Errorf("could not get dir entries of %v: %v", dir, err)
121 }
122 return fileInfo{
123 isDir: true,
124 name: b.FileName(),
125 modtime: b.ModTime(),
126 children: children,
127 }, nil
128 }
129
130 func (dh *DownloadHandler) fileInfo(ctx context.Context, file blob.Ref) (fi fileInfo, packed bool, err error) {
131
132
133 rc, _, err := dh.Fetcher.Fetch(ctx, file)
134 if err != nil {
135 return fi, false, fmt.Errorf("could not fetch %v: %v", file, err)
136 }
137 b, err := schema.BlobFromReader(file, rc)
138 rc.Close()
139 if err != nil {
140 return fi, false, fmt.Errorf("could not read %v as blob: %v", file, err)
141 }
142 tp := b.Type()
143 if tp != schema.TypeFile {
144
145 var contents string
146 if tp == schema.TypeSymlink {
147 sf, _ := b.AsStaticFile()
148 sl, _ := sf.AsStaticSymlink()
149 contents = sl.SymlinkTargetString()
150 }
151 size := int64(len(contents))
152
153 rd := strings.NewReader(contents)
154 fi = fileInfo{
155 size: size,
156 modtime: b.ModTime(),
157 name: b.FileName(),
158 mode: b.FileMode(),
159 rs: readerutil.NewFakeSeeker(rd, size),
160 close: io.NopCloser(rd).Close,
161 }
162 return fi, false, nil
163 }
164
165
166 fi, ok := fileInfoPacked(ctx, dh.Search, dh.Fetcher, dh.r, file)
167 if debugPack {
168 log.Printf("download.go: fileInfoPacked: ok=%v, %+v", ok, fi)
169 }
170 if ok {
171 return fi, true, nil
172 }
173
174 fr, err := schema.NewFileReader(ctx, dh.Fetcher, file)
175 if err != nil {
176 return
177 }
178 mime := dh.ForceMIME
179 if mime == "" {
180 mime = magic.MIMETypeFromReaderAt(fr)
181 }
182 if mime == "" {
183 mime = "application/octet-stream"
184 }
185 return fileInfo{
186 mime: mime,
187 name: fr.FileName(),
188 size: fr.Size(),
189 modtime: fr.ModTime(),
190 mode: fr.FileMode(),
191 rs: fr,
192 close: fr.Close,
193 }, false, nil
194 }
195
196
197 func fileInfoPacked(ctx context.Context, sh *search.Handler, src blob.Fetcher, r *http.Request, file blob.Ref) (packFileInfo fileInfo, ok bool) {
198 if sh == nil {
199 return fileInfo{whyNot: "no search"}, false
200 }
201 wf, ok := src.(blobserver.WholeRefFetcher)
202 if !ok {
203 return fileInfo{whyNot: "fetcher type"}, false
204 }
205 if r != nil && r.Header.Get("Range") != "" {
206
207
208 return fileInfo{whyNot: "range header"}, false
209 }
210 des, err := sh.Describe(ctx, &search.DescribeRequest{BlobRef: file})
211 if err != nil {
212 log.Printf("ui: fileInfoPacked: skipping fast path due to error from search: %v", err)
213 return fileInfo{whyNot: "search error"}, false
214 }
215 db, ok := des.Meta[file.String()]
216 if !ok || db.File == nil {
217 return fileInfo{whyNot: "search index doesn't know file"}, false
218 }
219 fi := db.File
220 if !fi.WholeRef.Valid() {
221 return fileInfo{whyNot: "no wholeref from search index"}, false
222 }
223
224 offset := int64(0)
225 rc, wholeSize, err := wf.OpenWholeRef(fi.WholeRef, offset)
226 if err == os.ErrNotExist {
227 return fileInfo{whyNot: "WholeRefFetcher returned ErrNotexist"}, false
228 }
229 if wholeSize != fi.Size {
230 log.Printf("ui: fileInfoPacked: OpenWholeRef size %d != index size %d; ignoring fast path", wholeSize, fi.Size)
231 return fileInfo{whyNot: "WholeRefFetcher and index don't agree"}, false
232 }
233 if err != nil {
234 log.Printf("ui: fileInfoPacked: skipping fast path due to error from WholeRefFetcher (%T): %v", src, err)
235 return fileInfo{whyNot: "WholeRefFetcher error"}, false
236 }
237
238 var modtime time.Time
239 if !fi.ModTime.IsAnyZero() {
240 modtime = fi.ModTime.Time()
241 } else if !fi.Time.IsAnyZero() {
242 modtime = fi.Time.Time()
243 }
244
245
246
247
248 fr, err := schema.NewFileReader(ctx, src, file)
249 fr.Close()
250 if err != nil {
251 return fileInfo{whyNot: fmt.Sprintf("cannot open a file reader: %v", err)}, false
252 }
253 return fileInfo{
254 mime: fi.MIMEType,
255 name: fi.FileName,
256 size: fi.Size,
257 modtime: modtime,
258 mode: fr.FileMode(),
259 rs: readerutil.NewFakeSeeker(rc, fi.Size-offset),
260 close: rc.Close,
261 }, true
262 }
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279 func (dh *DownloadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
280 if r.Method == "POST" {
281 dh.serveZip(w, r)
282 return
283 }
284
285 suffix := httputil.PathSuffix(r)
286 m := downloadPattern.FindStringSubmatch(suffix)
287 if m == nil {
288 httputil.ErrorRouting(w, r)
289 return
290 }
291 file, ok := blob.Parse(m[1])
292 if !ok {
293 http.Error(w, "Invalid blobref", http.StatusBadRequest)
294 return
295 }
296
297 dh.ServeFile(w, r, file)
298 }
299
300 func (dh *DownloadHandler) ServeFile(w http.ResponseWriter, r *http.Request, file blob.Ref) {
301 ctx := r.Context()
302 if r.Method != "GET" && r.Method != "HEAD" {
303 http.Error(w, "Invalid download method", http.StatusBadRequest)
304 return
305 }
306
307 if r.Header.Get("If-Modified-Since") != "" {
308
309 w.WriteHeader(http.StatusNotModified)
310 return
311 }
312
313 dh.r = r
314 fi, packed, err := dh.fileInfo(ctx, file)
315 if err != nil {
316 http.Error(w, "Can't serve file: "+err.Error(), http.StatusInternalServerError)
317 return
318 }
319 if !fi.mode.IsRegular() {
320 http.Error(w, "Not a regular file", http.StatusBadRequest)
321 return
322 }
323 defer fi.close()
324
325 h := w.Header()
326 h.Set("Content-Length", fmt.Sprint(fi.size))
327 h.Set("Expires", time.Now().Add(oneYear).Format(http.TimeFormat))
328 if packed {
329 h.Set("X-Camlistore-Packed", "1")
330 }
331
332 fileName := func(ext string) string {
333 if fi.name != "" {
334 return fi.name
335 }
336 return "file-" + file.String() + ext
337 }
338
339 if r.FormValue("inline") == "1" || dh.forceInline {
340
341 if fi.mime == "application/octet-stream" {
342
343
344
345 text, err := isText(fi.rs)
346 if err != nil {
347
348 httputil.ServeError(w, r, fmt.Errorf("cannot verify MIME type of file: %v", err))
349 return
350 }
351 if text {
352 fi.mime = "text/plain"
353 }
354 }
355 h.Set("Content-Disposition", "inline")
356 } else {
357 w.Header().Set("Content-Disposition", "attachment; filename="+fileName(".dat"))
358 }
359 h.Set("Content-Type", fi.mime)
360
361 if r.Method == "HEAD" && r.FormValue("verifycontents") != "" {
362 vbr, ok := blob.Parse(r.FormValue("verifycontents"))
363 if !ok {
364 return
365 }
366 hash := vbr.Hash()
367 if hash == nil {
368 return
369 }
370 io.Copy(hash, fi.rs)
371 if vbr.HashMatches(hash) {
372 w.Header().Set("X-Camli-Contents", vbr.String())
373 }
374 return
375 }
376
377 http.ServeContent(w, r, "", time.Now(), fi.rs)
378 }
379
380
381 func isText(rs io.ReadSeeker) (ok bool, err error) {
382 defer func() {
383 if _, seekErr := rs.Seek(0, io.SeekStart); seekErr != nil {
384 if err == nil {
385 err = seekErr
386 }
387 }
388 }()
389 var buf bytes.Buffer
390 if _, err := io.CopyN(&buf, rs, 1e6); err != nil {
391 if err != io.EOF {
392 return false, err
393 }
394 }
395 return utf8.Valid(buf.Bytes()), nil
396 }
397
398
399
400
401
402 func (dh *DownloadHandler) statFiles(refs []blob.Ref) error {
403 statter, ok := dh.Fetcher.(blobserver.BlobStatter)
404 if !ok {
405 return fmt.Errorf("DownloadHandler.Fetcher %T is not a BlobStatter", dh.Fetcher)
406 }
407 statted := make(map[blob.Ref]bool)
408
409 err := statter.StatBlobs(context.TODO(), refs, func(sb blob.SizedRef) error {
410 statted[sb.Ref] = true
411 return nil
412 })
413 if err != nil {
414 log.Printf("Error statting blob files for download archive: %v", err)
415 return fmt.Errorf("error looking for files")
416 }
417 for _, v := range refs {
418 if _, ok := statted[v]; !ok {
419 return fmt.Errorf("%q was not found", v)
420 }
421 }
422 return nil
423 }
424
425 var allowedFileTypes = map[schema.CamliType]bool{
426 schema.TypeFile: true,
427 schema.TypeSymlink: true,
428 schema.TypeFIFO: true,
429 schema.TypeSocket: true,
430 }
431
432
433
434
435
436 func (dh *DownloadHandler) checkFiles(ctx context.Context, parentPath string, fileRefs []blob.Ref) error {
437
438 for _, br := range fileRefs {
439 rc, _, err := dh.Fetcher.Fetch(ctx, br)
440 if err != nil {
441 return fmt.Errorf("could not fetch %v: %v", br, err)
442 }
443 b, err := schema.BlobFromReader(br, rc)
444 rc.Close()
445 if err != nil {
446 return fmt.Errorf("could not read %v as blob: %v", br, err)
447 }
448 tp := b.Type()
449 if _, ok := allowedFileTypes[tp]; !ok && tp != schema.TypeDirectory {
450 return fmt.Errorf("%v not a supported file or directory type: %q", br, tp)
451 }
452 if tp == schema.TypeDirectory {
453 dr, err := b.NewDirReader(ctx, dh.Fetcher)
454 if err != nil {
455 return fmt.Errorf("could not open %v as directory: %v", br, err)
456 }
457 children, err := dr.StaticSet(ctx)
458 if err != nil {
459 return fmt.Errorf("could not get dir entries of %v: %v", br, err)
460 }
461 if err := dh.checkFiles(ctx, filepath.Join(parentPath, b.FileName()), children); err != nil {
462 return err
463 }
464 continue
465 }
466 if tp != schema.TypeFile {
467
468
469 dh.pathByRef[br] = filepath.Join(parentPath, b.FileName())
470 continue
471 }
472 fr, err := b.NewFileReader(dh.Fetcher)
473 if err != nil {
474 return fmt.Errorf("could not open %v: %v", br, err)
475 }
476 _, err = io.Copy(io.Discard, fr)
477 fr.Close()
478 if err != nil {
479 return fmt.Errorf("could not read %v: %v", br, err)
480 }
481 dh.pathByRef[br] = filepath.Join(parentPath, b.FileName())
482 }
483 return nil
484 }
485
486
487
488 func (dh *DownloadHandler) serveZip(w http.ResponseWriter, r *http.Request) {
489 ctx := r.Context()
490 if r.Method != "POST" {
491 http.Error(w, "Invalid download method", http.StatusBadRequest)
492 return
493 }
494
495 filesValue := r.FormValue("files")
496 if filesValue == "" {
497 http.Error(w, "No file blobRefs specified", http.StatusBadRequest)
498 return
499 }
500 files := strings.Split(filesValue, ",")
501
502 var refs []blob.Ref
503 for _, file := range files {
504 br, ok := blob.Parse(file)
505 if !ok {
506 http.Error(w, fmt.Sprintf("%q is not a valid blobRef", file), http.StatusBadRequest)
507 return
508 }
509 refs = append(refs, br)
510 }
511
512
513
514 var allRefs map[blob.Ref]string
515 _, ok := (dh.Fetcher).(*cacher.CachingFetcher)
516 if ok {
517
518
519
520 dh.pathByRef = make(map[blob.Ref]string, len(refs))
521 err := dh.checkFiles(ctx, "", refs)
522 if err != nil {
523 http.Error(w, err.Error(), http.StatusInternalServerError)
524 return
525 }
526 allRefs = dh.pathByRef
527 } else {
528 _, ok := dh.Fetcher.(blobserver.BlobStatter)
529 if ok {
530 if err := dh.statFiles(refs); err != nil {
531 http.Error(w, err.Error(), http.StatusInternalServerError)
532 return
533 }
534 }
535
536
537
538
539
540 allRefs = make(map[blob.Ref]string, len(refs))
541 for _, v := range refs {
542 allRefs[v] = ""
543 }
544 }
545
546 h := w.Header()
547 h.Set("Content-Type", "application/zip")
548 zipName := "camli-download-" + time.Now().Format(downloadTimeLayout) + ".zip"
549 h.Set("Content-Disposition", "attachment; filename="+zipName)
550 zw := zip.NewWriter(w)
551 dh.r = r
552 for br := range allRefs {
553 if err := dh.zipFile(ctx, "", br, zw); err != nil {
554 log.Printf("error zipping %v: %v", br, err)
555
556 panic(http.ErrAbortHandler)
557 }
558 }
559 if err := zw.Close(); err != nil {
560 log.Printf("error closing zip stream: %v", err)
561 panic(http.ErrAbortHandler)
562 }
563 }
564
565
566
567
568
569 func (dh *DownloadHandler) zipFile(ctx context.Context, parentPath string, br blob.Ref, zw *zip.Writer) error {
570 if len(dh.pathByRef) == 0 {
571
572
573 di, err := dh.dirInfo(ctx, br)
574 if err != nil && err != errNotDir {
575 return err
576 }
577 if di.isDir {
578 for _, v := range di.children {
579 if err := dh.zipFile(ctx, filepath.Join(parentPath, di.name), v, zw); err != nil {
580 return err
581 }
582 }
583 return nil
584 }
585 }
586 fi, _, err := dh.fileInfo(ctx, br)
587 if err != nil {
588 return err
589 }
590 defer fi.close()
591 filename, ok := dh.pathByRef[br]
592 if !ok {
593
594 filename = filepath.Join(parentPath, fi.name)
595 }
596 zh := &zip.FileHeader{
597 Name: filename,
598 Method: zip.Store,
599 }
600 zh.SetModTime(fi.modtime)
601 zh.SetMode(fi.mode)
602 zfh, err := zw.CreateHeader(zh)
603 if err != nil {
604 return err
605 }
606 _, err = io.Copy(zfh, fi.rs)
607 if err != nil {
608 return err
609 }
610 return nil
611 }