Home Download Docs Code Community
     1	/*
     2	Copyright 2011 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	// Package schema manipulates Camlistore schema blobs.
    18	//
    19	// A schema blob is a JSON-encoded blob that describes other blobs.
    20	// See documentation in Perkeep's doc/schema/ directory.
    21	package schema // import "perkeep.org/pkg/schema"
    22	
    23	import (
    24		"bytes"
    25		"context"
    26		"crypto/rand"
    27		"encoding/base64"
    28		"encoding/json"
    29		"errors"
    30		"fmt"
    31		"hash"
    32		"io"
    33		"log"
    34		"os"
    35		"regexp"
    36		"strconv"
    37		"strings"
    38		"sync"
    39		"time"
    40		"unicode/utf8"
    41	
    42		"github.com/bradfitz/latlong"
    43		"perkeep.org/pkg/blob"
    44	
    45		"github.com/rwcarlsen/goexif/exif"
    46		"github.com/rwcarlsen/goexif/tiff"
    47		"go4.org/strutil"
    48		"go4.org/types"
    49	)
    50	
    51	func init() {
    52		// Intern common strings as used by schema blobs (camliType values), to reduce
    53		// index memory usage, which uses strutil.StringFromBytes.
    54		strutil.RegisterCommonString(
    55			"bytes",
    56			"claim",
    57			"directory",
    58			"file",
    59			"permanode",
    60			"share",
    61			"static-set",
    62			"symlink",
    63		)
    64	}
    65	
    66	// MaxSchemaBlobSize represents the upper bound for how large
    67	// a schema blob may be.
    68	const MaxSchemaBlobSize = 1 << 20
    69	
    70	var (
    71		ErrNoCamliVersion = errors.New("schema: no camliVersion key in map")
    72	)
    73	
    74	var clockNow = time.Now
    75	
    76	type StatHasher interface {
    77		Lstat(fileName string) (os.FileInfo, error)
    78		Hash(fileName string) (blob.Ref, error)
    79	}
    80	
    81	// File is the interface returned when opening a DirectoryEntry that
    82	// is a regular file.
    83	type File interface {
    84		io.Closer
    85		io.ReaderAt
    86		io.Reader
    87		Size() int64
    88	}
    89	
    90	// Directory is a read-only interface to a "directory" schema blob.
    91	type Directory interface {
    92		// Readdir reads the contents of the directory associated with dr
    93		// and returns an array of up to n DirectoryEntries structures.
    94		// Subsequent calls on the same file will yield further
    95		// DirectoryEntries.
    96		// If n > 0, Readdir returns at most n DirectoryEntry structures. In
    97		// this case, if Readdir returns an empty slice, it will return
    98		// a non-nil error explaining why. At the end of a directory,
    99		// the error is os.EOF.
   100		// If n <= 0, Readdir returns all the DirectoryEntries from the
   101		// directory in a single slice. In this case, if Readdir succeeds
   102		// (reads all the way to the end of the directory), it returns the
   103		// slice and a nil os.Error. If it encounters an error before the
   104		// end of the directory, Readdir returns the DirectoryEntry read
   105		// until that point and a non-nil error.
   106		Readdir(ctx context.Context, n int) ([]DirectoryEntry, error)
   107	}
   108	
   109	type Symlink interface {
   110		// .. TODO
   111	}
   112	
   113	// FIFO is the read-only interface to a "fifo" schema blob.
   114	type FIFO interface {
   115		// .. TODO
   116	}
   117	
   118	// Socket is the read-only interface to a "socket" schema blob.
   119	type Socket interface {
   120		// .. TODO
   121	}
   122	
   123	// DirectoryEntry is a read-only interface to an entry in a (static)
   124	// directory.
   125	type DirectoryEntry interface {
   126		// CamliType returns the schema blob's "camliType" field.
   127		// This may be "file", "directory", "symlink", or other more
   128		// obscure types added in the future.
   129		CamliType() string
   130	
   131		FileName() string
   132		BlobRef() blob.Ref
   133	
   134		File(ctx context.Context) (File, error)           // if camliType is "file"
   135		Directory(ctx context.Context) (Directory, error) // if camliType is "directory"
   136		Symlink() (Symlink, error)                        // if camliType is "symlink"
   137		FIFO() (FIFO, error)                              // if camliType is "fifo"
   138		Socket() (Socket, error)                          // If camliType is "socket"
   139	}
   140	
   141	// dirEntry is the default implementation of DirectoryEntry
   142	type dirEntry struct {
   143		ss      superset
   144		fetcher blob.Fetcher
   145		fr      *FileReader // or nil if not a file
   146		dr      *DirReader  // or nil if not a directory
   147	}
   148	
   149	// A SearchQuery must be of type *search.SearchQuery.
   150	// This type breaks an otherwise-circular dependency.
   151	type SearchQuery interface{}
   152	
   153	func (de *dirEntry) CamliType() string {
   154		return de.ss.Type
   155	}
   156	
   157	func (de *dirEntry) FileName() string {
   158		return de.ss.FileNameString()
   159	}
   160	
   161	func (de *dirEntry) BlobRef() blob.Ref {
   162		return de.ss.BlobRef
   163	}
   164	
   165	func (de *dirEntry) File(ctx context.Context) (File, error) {
   166		if de.fr == nil {
   167			if de.ss.Type != "file" {
   168				return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, "file")
   169			}
   170			fr, err := NewFileReader(ctx, de.fetcher, de.ss.BlobRef)
   171			if err != nil {
   172				return nil, err
   173			}
   174			de.fr = fr
   175		}
   176		return de.fr, nil
   177	}
   178	
   179	func (de *dirEntry) Directory(ctx context.Context) (Directory, error) {
   180		if de.dr == nil {
   181			if de.ss.Type != "directory" {
   182				return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, "directory")
   183			}
   184			dr, err := NewDirReader(ctx, de.fetcher, de.ss.BlobRef)
   185			if err != nil {
   186				return nil, err
   187			}
   188			de.dr = dr
   189		}
   190		return de.dr, nil
   191	}
   192	
   193	func (de *dirEntry) Symlink() (Symlink, error) {
   194		return 0, errors.New("TODO: Symlink not implemented")
   195	}
   196	
   197	func (de *dirEntry) FIFO() (FIFO, error) {
   198		return 0, errors.New("TODO: FIFO not implemented")
   199	}
   200	
   201	func (de *dirEntry) Socket() (Socket, error) {
   202		return 0, errors.New("TODO: Socket not implemented")
   203	}
   204	
   205	// newDirectoryEntry takes a superset and returns a DirectoryEntry if
   206	// the Supserset is valid and represents an entry in a directory.  It
   207	// must by of type "file", "directory", "symlink" or "socket".
   208	// TODO: "char", block", probably.  later.
   209	func newDirectoryEntry(fetcher blob.Fetcher, ss *superset) (DirectoryEntry, error) {
   210		if ss == nil {
   211			return nil, errors.New("ss was nil")
   212		}
   213		if !ss.BlobRef.Valid() {
   214			return nil, errors.New("ss.BlobRef was invalid")
   215		}
   216		switch ss.Type {
   217		case "file", "directory", "symlink", "fifo", "socket":
   218			// Okay
   219		default:
   220			return nil, fmt.Errorf("invalid DirectoryEntry camliType of %q", ss.Type)
   221		}
   222		de := &dirEntry{ss: *ss, fetcher: fetcher} // defensive copy
   223		return de, nil
   224	}
   225	
   226	// NewDirectoryEntryFromBlobRef takes a BlobRef and returns a
   227	//  DirectoryEntry if the BlobRef contains a type "file", "directory",
   228	//  "symlink", "fifo" or "socket".
   229	// TODO: ""char", "block", probably.  later.
   230	func NewDirectoryEntryFromBlobRef(ctx context.Context, fetcher blob.Fetcher, blobRef blob.Ref) (DirectoryEntry, error) {
   231		ss := new(superset)
   232		err := ss.setFromBlobRef(ctx, fetcher, blobRef)
   233		if err != nil {
   234			return nil, fmt.Errorf("schema/filereader: can't fill superset: %v", err)
   235		}
   236		return newDirectoryEntry(fetcher, ss)
   237	}
   238	
   239	// superset represents the superset of common Perkeep JSON schema
   240	// keys as a convenient json.Unmarshal target.
   241	// TODO(bradfitz): unexport this type. Getting too gross. Move to schema.Blob
   242	type superset struct {
   243		// BlobRef isn't for a particular metadata blob field, but included
   244		// for convenience.
   245		BlobRef blob.Ref
   246	
   247		Version int    `json:"camliVersion"`
   248		Type    string `json:"camliType"`
   249	
   250		Signer blob.Ref `json:"camliSigner"`
   251		Sig    string   `json:"camliSig"`
   252	
   253		ClaimType string         `json:"claimType"`
   254		ClaimDate types.Time3339 `json:"claimDate"`
   255	
   256		Permanode blob.Ref `json:"permaNode"`
   257		Attribute string   `json:"attribute"`
   258		Value     string   `json:"value"`
   259	
   260		// FileName and FileNameBytes represent one of the two
   261		// representations of file names in schema blobs.  They should
   262		// not be accessed directly.  Use the FileNameString accessor
   263		// instead, which also sanitizes malicious values.
   264		FileName      string        `json:"fileName"`
   265		FileNameBytes []interface{} `json:"fileNameBytes"`
   266	
   267		SymlinkTarget      string        `json:"symlinkTarget"`
   268		SymlinkTargetBytes []interface{} `json:"symlinkTargetBytes"`
   269	
   270		UnixPermission string `json:"unixPermission"`
   271		UnixOwnerId    int    `json:"unixOwnerId"`
   272		UnixOwner      string `json:"unixOwner"`
   273		UnixGroupId    int    `json:"unixGroupId"`
   274		UnixGroup      string `json:"unixGroup"`
   275		UnixMtime      string `json:"unixMtime"`
   276		UnixCtime      string `json:"unixCtime"`
   277		UnixAtime      string `json:"unixAtime"`
   278	
   279		// Parts are references to the data chunks of a regular file (or a "bytes" schema blob).
   280		// See doc/schema/bytes.txt and doc/schema/files/file.txt.
   281		Parts []*BytesPart `json:"parts"`
   282	
   283		Entries   blob.Ref   `json:"entries"`   // for directories, a blobref to a static-set
   284		Members   []blob.Ref `json:"members"`   // for static sets (for directory static-sets: blobrefs to child dirs/files)
   285		MergeSets []blob.Ref `json:"mergeSets"` // each is a "sub static-set", that has either Members or MergeSets. For large dirs.
   286	
   287		// Search allows a "share" blob to share an entire search. Contrast with "target".
   288		Search SearchQuery `json:"search"`
   289		// Target is a "share" blob's target (the thing being shared)
   290		// Or it is the object being deleted in a DeleteClaim claim.
   291		Target blob.Ref `json:"target"`
   292		// Transitive is a property of a "share" blob.
   293		Transitive bool `json:"transitive"`
   294		// AuthType is a "share" blob's authentication type that is required.
   295		// Currently (2013-01-02) just "haveref" (if you know the share's blobref,
   296		// you get access: the secret URL model)
   297		AuthType string         `json:"authType"`
   298		Expires  types.Time3339 `json:"expires"` // or zero for no expiration
   299	}
   300	
   301	func parseSuperset(r io.Reader) (*superset, error) {
   302		var ss superset
   303		if err := json.NewDecoder(io.LimitReader(r, MaxSchemaBlobSize)).Decode(&ss); err != nil {
   304			return nil, err
   305		}
   306		return &ss, nil
   307	}
   308	
   309	// BlobFromReader returns a new Blob from the provided Reader r,
   310	// which should be the body of the provided blobref.
   311	// Note: the hash checksum is not verified.
   312	func BlobFromReader(ref blob.Ref, r io.Reader) (*Blob, error) {
   313		if !ref.Valid() {
   314			return nil, errors.New("schema.BlobFromReader: invalid blobref")
   315		}
   316		var buf bytes.Buffer
   317		tee := io.TeeReader(r, &buf)
   318		ss, err := parseSuperset(tee)
   319		if err != nil {
   320			return nil, err
   321		}
   322		var wb [16]byte
   323		afterObj := 0
   324		for {
   325			n, err := tee.Read(wb[:])
   326			afterObj += n
   327			for i := 0; i < n; i++ {
   328				if !isASCIIWhite(wb[i]) {
   329					return nil, fmt.Errorf("invalid bytes after JSON schema blob in %v", ref)
   330				}
   331			}
   332			if afterObj > MaxSchemaBlobSize {
   333				break
   334			}
   335			if err == io.EOF {
   336				break
   337			}
   338			if err != nil {
   339				return nil, err
   340			}
   341		}
   342		json := buf.String()
   343		if len(json) > MaxSchemaBlobSize {
   344			return nil, fmt.Errorf("schema: metadata blob %v is over expected limit; size=%d", ref, len(json))
   345		}
   346		return &Blob{ref, json, ss}, nil
   347	}
   348	
   349	func isASCIIWhite(b byte) bool {
   350		switch b {
   351		case ' ', '\t', '\r', '\n':
   352			return true
   353		}
   354		return false
   355	}
   356	
   357	// BytesPart is the type representing one of the "parts" in a "file"
   358	// or "bytes" JSON schema.
   359	//
   360	// See doc/schema/bytes.txt and doc/schema/files/file.txt.
   361	type BytesPart struct {
   362		// Size is the number of bytes that this part contributes to the overall segment.
   363		Size uint64 `json:"size"`
   364	
   365		// At most one of BlobRef or BytesRef must be non-zero
   366		// (Valid), but it's illegal for both.
   367		// If neither are set, this BytesPart represents Size zero bytes.
   368		// BlobRef refers to raw bytes. BytesRef references a "bytes" schema blob.
   369		BlobRef  blob.Ref `json:"blobRef,omitempty"`
   370		BytesRef blob.Ref `json:"bytesRef,omitempty"`
   371	
   372		// Offset optionally specifies the offset into BlobRef to skip
   373		// when reading Size bytes.
   374		Offset uint64 `json:"offset,omitempty"`
   375	}
   376	
   377	// stringFromMixedArray joins a slice of either strings or float64
   378	// values (as retrieved from JSON decoding) into a string.  These are
   379	// used for non-UTF8 filenames in "fileNameBytes" fields.  The strings
   380	// are UTF-8 segments and the float64s (actually uint8 values) are
   381	// byte values.
   382	func stringFromMixedArray(parts []interface{}) string {
   383		var buf bytes.Buffer
   384		for _, part := range parts {
   385			if s, ok := part.(string); ok {
   386				buf.WriteString(s)
   387				continue
   388			}
   389			if num, ok := part.(float64); ok {
   390				buf.WriteByte(byte(num))
   391				continue
   392			}
   393		}
   394		return buf.String()
   395	}
   396	
   397	// mixedArrayFromString is the inverse of stringFromMixedArray. It
   398	// splits a string to a series of either UTF-8 strings and non-UTF-8
   399	// bytes.
   400	func mixedArrayFromString(s string) (parts []interface{}) {
   401		for len(s) > 0 {
   402			if n := utf8StrLen(s); n > 0 {
   403				parts = append(parts, s[:n])
   404				s = s[n:]
   405			} else {
   406				parts = append(parts, s[0])
   407				s = s[1:]
   408			}
   409		}
   410		return parts
   411	}
   412	
   413	// utf8StrLen returns how many prefix bytes of s are valid UTF-8.
   414	func utf8StrLen(s string) int {
   415		for i, r := range s {
   416			for r == utf8.RuneError {
   417				// The RuneError value can be an error
   418				// sentinel value (if it's size 1) or the same
   419				// value encoded properly. Decode it to see if
   420				// it's the 1 byte sentinel value.
   421				_, size := utf8.DecodeRuneInString(s[i:])
   422				if size == 1 {
   423					return i
   424				}
   425			}
   426		}
   427		return len(s)
   428	}
   429	
   430	func (ss *superset) SumPartsSize() (size uint64) {
   431		for _, part := range ss.Parts {
   432			size += uint64(part.Size)
   433		}
   434		return size
   435	}
   436	
   437	func (ss *superset) SymlinkTargetString() string {
   438		if ss.SymlinkTarget != "" {
   439			return ss.SymlinkTarget
   440		}
   441		return stringFromMixedArray(ss.SymlinkTargetBytes)
   442	}
   443	
   444	// FileNameString returns the schema blob's base filename.
   445	//
   446	// If the fileName field of the blob accidentally or maliciously
   447	// contains a slash, this function returns an empty string instead.
   448	func (ss *superset) FileNameString() string {
   449		v := ss.FileName
   450		if v == "" {
   451			v = stringFromMixedArray(ss.FileNameBytes)
   452		}
   453		if v != "" {
   454			if strings.Contains(v, "/") {
   455				// Bogus schema blob; ignore.
   456				return ""
   457			}
   458			if strings.Contains(v, "\\") {
   459				// Bogus schema blob; ignore.
   460				return ""
   461			}
   462		}
   463		return v
   464	}
   465	
   466	func (ss *superset) HasFilename(name string) bool {
   467		return ss.FileNameString() == name
   468	}
   469	
   470	func (b *Blob) FileMode() os.FileMode {
   471		// TODO: move this to a different type, off *Blob
   472		return b.ss.FileMode()
   473	}
   474	
   475	func (ss *superset) FileMode() os.FileMode {
   476		var mode os.FileMode
   477		hasPerm := ss.UnixPermission != ""
   478		if hasPerm {
   479			m64, err := strconv.ParseUint(ss.UnixPermission, 8, 64)
   480			if err == nil {
   481				mode = mode | os.FileMode(m64)
   482			}
   483		}
   484	
   485		// TODO: add other types (block, char, etc)
   486		switch ss.Type {
   487		case "directory":
   488			mode = mode | os.ModeDir
   489		case "file":
   490			// No extra bit.
   491		case "symlink":
   492			mode = mode | os.ModeSymlink
   493		case "fifo":
   494			mode = mode | os.ModeNamedPipe
   495		case "socket":
   496			mode = mode | os.ModeSocket
   497		}
   498		if !hasPerm {
   499			switch ss.Type {
   500			case "directory":
   501				mode |= 0755
   502			default:
   503				mode |= 0644
   504			}
   505		}
   506		return mode
   507	}
   508	
   509	// MapUid returns the most appropriate mapping from this file's owner
   510	// to the local machine's owner, trying first a match by name,
   511	// followed by just mapping the number through directly.
   512	func (b *Blob) MapUid() int { return b.ss.MapUid() }
   513	
   514	// MapGid returns the most appropriate mapping from this file's group
   515	// to the local machine's group, trying first a match by name,
   516	// followed by just mapping the number through directly.
   517	func (b *Blob) MapGid() int { return b.ss.MapGid() }
   518	
   519	func (ss *superset) MapUid() int {
   520		if ss.UnixOwner != "" {
   521			uid, ok := getUidFromName(ss.UnixOwner)
   522			if ok {
   523				return uid
   524			}
   525		}
   526		return ss.UnixOwnerId // TODO: will be 0 if unset, which isn't ideal
   527	}
   528	
   529	func (ss *superset) MapGid() int {
   530		if ss.UnixGroup != "" {
   531			gid, ok := getGidFromName(ss.UnixGroup)
   532			if ok {
   533				return gid
   534			}
   535		}
   536		return ss.UnixGroupId // TODO: will be 0 if unset, which isn't ideal
   537	}
   538	
   539	func (ss *superset) ModTime() time.Time {
   540		if ss.UnixMtime == "" {
   541			return time.Time{}
   542		}
   543		t, err := time.Parse(time.RFC3339, ss.UnixMtime)
   544		if err != nil {
   545			return time.Time{}
   546		}
   547		return t
   548	}
   549	
   550	var DefaultStatHasher = &defaultStatHasher{}
   551	
   552	type defaultStatHasher struct{}
   553	
   554	func (d *defaultStatHasher) Lstat(fileName string) (os.FileInfo, error) {
   555		return os.Lstat(fileName)
   556	}
   557	
   558	func (d *defaultStatHasher) Hash(fileName string) (blob.Ref, error) {
   559		h := blob.NewHash()
   560		file, err := os.Open(fileName)
   561		if err != nil {
   562			return blob.Ref{}, err
   563		}
   564		defer file.Close()
   565		_, err = io.Copy(h, file)
   566		if err != nil {
   567			return blob.Ref{}, err
   568		}
   569		return blob.RefFromHash(h), nil
   570	}
   571	
   572	// maximum number of static-set members in a static-set schema. As noted in
   573	// https://github.com/camlistore/camlistore/issues/924 , 33k members result in a
   574	// 1.7MB blob, so 10k members seems reasonable to stay under the MaxSchemaBlobSize (1MB)
   575	// limit. This is not a const, so we can lower it during tests and test the logic
   576	// without having to create thousands of blobs.
   577	var maxStaticSetMembers = 10000
   578	
   579	// NewStaticSet returns the "static-set" schema for a directory. Its members
   580	// should be populated with SetStaticSetMembers.
   581	func NewStaticSet() *Builder {
   582		return base(1, "static-set")
   583	}
   584	
   585	// SetStaticSetMembers sets the given members as the static-set members of this
   586	// builder. If the members are so numerous that they would not fit on a schema
   587	// blob, they are spread (recursively, if needed) onto sub static-sets. In which
   588	// case, these subsets are set as "mergeSets" of this builder. All the created
   589	// subsets are returned, so the caller can upload them along with the top
   590	// static-set created from this builder.
   591	// SetStaticSetMembers panics if bb isn't a "static-set" claim type.
   592	func (bb *Builder) SetStaticSetMembers(members []blob.Ref) []*Blob {
   593		if bb.Type() != "static-set" {
   594			panic("called SetStaticSetMembers on non static-set")
   595		}
   596	
   597		if len(members) <= maxStaticSetMembers {
   598			ms := make([]string, len(members))
   599			for i := range members {
   600				ms[i] = members[i].String()
   601			}
   602			bb.m["members"] = ms
   603			return nil
   604		}
   605	
   606		// too many members to fit in one static-set, so we spread them in
   607		// several sub static-sets.
   608		subsetsNumber := len(members) / maxStaticSetMembers
   609		var perSubset int
   610		if subsetsNumber < maxStaticSetMembers {
   611			// this means we can fill each subset up to maxStaticSetMembers,
   612			// and stash the rest in one last subset.
   613			perSubset = maxStaticSetMembers
   614		} else {
   615			// otherwise we need to divide the members evenly in
   616			// (maxStaticSetMembers - 1) subsets, and each of these subsets
   617			// will also (recursively) have subsets of its own. There might
   618			// also be a rest in one last subset, as above.
   619			subsetsNumber = maxStaticSetMembers - 1
   620			perSubset = len(members) / subsetsNumber
   621		}
   622		// only the subsets at this level
   623		subsets := make([]*Blob, 0, subsetsNumber)
   624		// subsets at this level, plus all the children subsets.
   625		allSubsets := make([]*Blob, 0, subsetsNumber)
   626		for i := 0; i < subsetsNumber; i++ {
   627			ss := NewStaticSet()
   628			subss := ss.SetStaticSetMembers(members[i*perSubset : (i+1)*perSubset])
   629			subsets = append(subsets, ss.Blob())
   630			allSubsets = append(allSubsets, ss.Blob())
   631			for _, v := range subss {
   632				allSubsets = append(allSubsets, v)
   633			}
   634		}
   635	
   636		// Deal with the rest (of the euclidian division)
   637		if perSubset*subsetsNumber < len(members) {
   638			ss := NewStaticSet()
   639			ss.SetStaticSetMembers(members[perSubset*subsetsNumber:])
   640			allSubsets = append(allSubsets, ss.Blob())
   641			subsets = append(subsets, ss.Blob())
   642		}
   643	
   644		mss := make([]string, len(subsets))
   645		for i := range subsets {
   646			mss[i] = subsets[i].BlobRef().String()
   647		}
   648		bb.m["mergeSets"] = mss
   649		return allSubsets
   650	}
   651	
   652	func base(version int, ctype string) *Builder {
   653		return &Builder{map[string]interface{}{
   654			"camliVersion": version,
   655			"camliType":    ctype,
   656		}}
   657	}
   658	
   659	// NewUnsignedPermanode returns a new random permanode, not yet signed.
   660	func NewUnsignedPermanode() *Builder {
   661		bb := base(1, "permanode")
   662		chars := make([]byte, 20)
   663		_, err := io.ReadFull(rand.Reader, chars)
   664		if err != nil {
   665			panic("error reading random bytes: " + err.Error())
   666		}
   667		bb.m["random"] = base64.StdEncoding.EncodeToString(chars)
   668		return bb
   669	}
   670	
   671	// NewPlannedPermanode returns a permanode with a fixed key.  Like
   672	// NewUnsignedPermanode, this builder is also not yet signed.  Callers of
   673	// NewPlannedPermanode must sign the map with a fixed claimDate and
   674	// GPG date to create consistent JSON encodings of the Map (its
   675	// blobref), between runs.
   676	func NewPlannedPermanode(key string) *Builder {
   677		bb := base(1, "permanode")
   678		bb.m["key"] = key
   679		return bb
   680	}
   681	
   682	// NewHashPlannedPermanode returns a planned permanode with the sum
   683	// of the hash, prefixed with "sha1-", as the key.
   684	func NewHashPlannedPermanode(h hash.Hash) *Builder {
   685		return NewPlannedPermanode(blob.RefFromHash(h).String())
   686	}
   687	
   688	// JSON returns the map m encoded as JSON in its
   689	// recommended canonical form. The canonical form is readable with newlines and indentation,
   690	// and always starts with the header bytes:
   691	//
   692	//   {"camliVersion":
   693	//
   694	func mapJSON(m map[string]interface{}) (string, error) {
   695		version, hasVersion := m["camliVersion"]
   696		if !hasVersion {
   697			return "", ErrNoCamliVersion
   698		}
   699		delete(m, "camliVersion")
   700		jsonBytes, err := json.MarshalIndent(m, "", "  ")
   701		if err != nil {
   702			return "", err
   703		}
   704		m["camliVersion"] = version
   705		var buf bytes.Buffer
   706		fmt.Fprintf(&buf, "{\"camliVersion\": %v,\n", version)
   707		buf.Write(jsonBytes[2:])
   708		return buf.String(), nil
   709	}
   710	
   711	// NewFileMap returns a new builder of a type "file" schema for the provided fileName.
   712	// The chunk parts of the file are not populated.
   713	func NewFileMap(fileName string) *Builder {
   714		return newCommonFilenameMap(fileName).SetType("file")
   715	}
   716	
   717	// NewDirMap returns a new builder of a type "directory" schema for the provided fileName.
   718	func NewDirMap(fileName string) *Builder {
   719		return newCommonFilenameMap(fileName).SetType("directory")
   720	}
   721	
   722	func newCommonFilenameMap(fileName string) *Builder {
   723		bb := base(1, "" /* no type yet */)
   724		if fileName != "" {
   725			bb.SetFileName(fileName)
   726		}
   727		return bb
   728	}
   729	
   730	var populateSchemaStat []func(schemaMap map[string]interface{}, fi os.FileInfo)
   731	
   732	func NewCommonFileMap(fileName string, fi os.FileInfo) *Builder {
   733		bb := newCommonFilenameMap(fileName)
   734		// Common elements (from file-common.txt)
   735		if fi.Mode()&os.ModeSymlink == 0 {
   736			bb.m["unixPermission"] = fmt.Sprintf("0%o", fi.Mode().Perm())
   737		}
   738	
   739		// OS-specific population; defined in schema_posix.go, etc. (not on App Engine)
   740		for _, f := range populateSchemaStat {
   741			f(bb.m, fi)
   742		}
   743	
   744		if mtime := fi.ModTime(); !mtime.IsZero() {
   745			bb.m["unixMtime"] = RFC3339FromTime(mtime)
   746		}
   747		return bb
   748	}
   749	
   750	// PopulateParts sets the "parts" field of the blob with the provided
   751	// parts.  The sum of the sizes of parts must match the provided size
   752	// or an error is returned.  Also, each BytesPart may only contain either
   753	// a BytesPart or a BlobRef, but not both.
   754	func (bb *Builder) PopulateParts(size int64, parts []BytesPart) error {
   755		return populateParts(bb.m, size, parts)
   756	}
   757	
   758	func populateParts(m map[string]interface{}, size int64, parts []BytesPart) error {
   759		sumSize := int64(0)
   760		mparts := make([]map[string]interface{}, len(parts))
   761		for idx, part := range parts {
   762			mpart := make(map[string]interface{})
   763			mparts[idx] = mpart
   764			switch {
   765			case part.BlobRef.Valid() && part.BytesRef.Valid():
   766				return errors.New("schema: part contains both BlobRef and BytesRef")
   767			case part.BlobRef.Valid():
   768				mpart["blobRef"] = part.BlobRef.String()
   769			case part.BytesRef.Valid():
   770				mpart["bytesRef"] = part.BytesRef.String()
   771			default:
   772				return errors.New("schema: part must contain either a BlobRef or BytesRef")
   773			}
   774			mpart["size"] = part.Size
   775			sumSize += int64(part.Size)
   776			if part.Offset != 0 {
   777				mpart["offset"] = part.Offset
   778			}
   779		}
   780		if sumSize != size {
   781			return fmt.Errorf("schema: declared size %d doesn't match sum of parts size %d", size, sumSize)
   782		}
   783		m["parts"] = mparts
   784		return nil
   785	}
   786	
   787	func newBytes() *Builder {
   788		return base(1, "bytes")
   789	}
   790	
   791	// ClaimType is one of the valid "claimType" fields in a "claim" schema blob. See doc/schema/claims/.
   792	type ClaimType string
   793	
   794	const (
   795		SetAttributeClaim ClaimType = "set-attribute"
   796		AddAttributeClaim ClaimType = "add-attribute"
   797		DelAttributeClaim ClaimType = "del-attribute"
   798		ShareClaim        ClaimType = "share"
   799		// DeleteClaim deletes a permanode or another claim.
   800		// A delete claim can itself be deleted, and so on.
   801		DeleteClaim ClaimType = "delete"
   802	)
   803	
   804	// claimParam is used to populate a claim map when building a new claim
   805	type claimParam struct {
   806		claimType ClaimType
   807	
   808		// Params specific to *Attribute claims:
   809		permanode blob.Ref // modified permanode
   810		attribute string   // required
   811		value     string   // optional if Type == DelAttributeClaim
   812	
   813		// Params specific to ShareClaim claims:
   814		authType   string
   815		transitive bool
   816	
   817		// Params specific to ShareClaim and DeleteClaim claims.
   818		target blob.Ref
   819	}
   820	
   821	func newClaim(claims ...*claimParam) *Builder {
   822		bb := base(1, "claim")
   823		bb.SetClaimDate(clockNow())
   824		if len(claims) == 1 {
   825			cp := claims[0]
   826			populateClaimMap(bb.m, cp)
   827			return bb
   828		}
   829		var claimList []interface{}
   830		for _, cp := range claims {
   831			m := map[string]interface{}{}
   832			populateClaimMap(m, cp)
   833			claimList = append(claimList, m)
   834		}
   835		bb.m["claimType"] = "multi"
   836		bb.m["claims"] = claimList
   837		return bb
   838	}
   839	
   840	func populateClaimMap(m map[string]interface{}, cp *claimParam) {
   841		m["claimType"] = string(cp.claimType)
   842		switch cp.claimType {
   843		case ShareClaim:
   844			m["authType"] = cp.authType
   845			m["transitive"] = cp.transitive
   846		case DeleteClaim:
   847			m["target"] = cp.target.String()
   848		default:
   849			m["permaNode"] = cp.permanode.String()
   850			m["attribute"] = cp.attribute
   851			if !(cp.claimType == DelAttributeClaim && cp.value == "") {
   852				m["value"] = cp.value
   853			}
   854		}
   855	}
   856	
   857	// NewShareRef creates a *Builder for a "share" claim.
   858	func NewShareRef(authType string, transitive bool) *Builder {
   859		return newClaim(&claimParam{
   860			claimType:  ShareClaim,
   861			authType:   authType,
   862			transitive: transitive,
   863		})
   864	}
   865	
   866	func NewSetAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
   867		return newClaim(&claimParam{
   868			permanode: permaNode,
   869			claimType: SetAttributeClaim,
   870			attribute: attr,
   871			value:     value,
   872		})
   873	}
   874	
   875	func NewAddAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
   876		return newClaim(&claimParam{
   877			permanode: permaNode,
   878			claimType: AddAttributeClaim,
   879			attribute: attr,
   880			value:     value,
   881		})
   882	}
   883	
   884	// NewDelAttributeClaim creates a new claim to remove value from the
   885	// values set for the attribute attr of permaNode. If value is empty then
   886	// all the values for attribute are cleared.
   887	func NewDelAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
   888		return newClaim(&claimParam{
   889			permanode: permaNode,
   890			claimType: DelAttributeClaim,
   891			attribute: attr,
   892			value:     value,
   893		})
   894	}
   895	
   896	// NewDeleteClaim creates a new claim to delete a target claim or permanode.
   897	func NewDeleteClaim(target blob.Ref) *Builder {
   898		return newClaim(&claimParam{
   899			target:    target,
   900			claimType: DeleteClaim,
   901		})
   902	}
   903	
   904	// ShareHaveRef is the auth type specifying that if you "have the
   905	// reference" (know the blobref to the haveref share blob), then you
   906	// have access to the referenced object from that share blob.
   907	// This is the "send a link to a friend" access model.
   908	const ShareHaveRef = "haveref"
   909	
   910	// UnknownLocation is a magic timezone value used when the actual location
   911	// of a time is unknown. For instance, EXIF files commonly have a time without
   912	// a corresponding location or timezone offset.
   913	var UnknownLocation = time.FixedZone("Unknown", -60) // 1 minute west
   914	
   915	// IsZoneKnown reports whether t is in a known timezone.
   916	// Perkeep uses the magic timezone offset of 1 minute west of UTC
   917	// to mean that the timezone wasn't known.
   918	func IsZoneKnown(t time.Time) bool {
   919		if t.Location() == UnknownLocation {
   920			return false
   921		}
   922		if _, off := t.Zone(); off == -60 {
   923			return false
   924		}
   925		return true
   926	}
   927	
   928	// RFC3339FromTime returns an RFC3339-formatted time.
   929	//
   930	// If the timezone is known, the time will be converted to UTC and
   931	// returned with a "Z" suffix. For unknown zones, the timezone will be
   932	// "-00:01" (1 minute west of UTC).
   933	//
   934	// Fractional seconds are only included if the time has fractional
   935	// seconds.
   936	func RFC3339FromTime(t time.Time) string {
   937		if IsZoneKnown(t) {
   938			t = t.UTC()
   939		}
   940		if t.UnixNano()%1e9 == 0 {
   941			return t.Format(time.RFC3339)
   942		}
   943		return t.Format(time.RFC3339Nano)
   944	}
   945	
   946	var bytesCamliVersion = []byte("camliVersion")
   947	
   948	// LikelySchemaBlob returns quickly whether buf likely contains (or is
   949	// the prefix of) a schema blob.
   950	func LikelySchemaBlob(buf []byte) bool {
   951		if len(buf) == 0 || buf[0] != '{' {
   952			return false
   953		}
   954		return bytes.Contains(buf, bytesCamliVersion)
   955	}
   956	
   957	// findSize checks if v is an *os.File or if it has
   958	// a Size() int64 method, to find its size.
   959	// It returns 0, false otherwise.
   960	func findSize(v interface{}) (size int64, ok bool) {
   961		if fi, ok := v.(*os.File); ok {
   962			v, _ = fi.Stat()
   963		}
   964		if sz, ok := v.(interface {
   965			Size() int64
   966		}); ok {
   967			return sz.Size(), true
   968		}
   969		// For bytes.Reader, strings.Reader, etc:
   970		if li, ok := v.(interface {
   971			Len() int
   972		}); ok {
   973			ln := int64(li.Len()) // unread portion, typically
   974			// If it's also a seeker, remove add any seek offset:
   975			if sk, ok := v.(io.Seeker); ok {
   976				if cur, err := sk.Seek(0, 1); err == nil {
   977					ln += cur
   978				}
   979			}
   980			return ln, true
   981		}
   982		return 0, false
   983	}
   984	
   985	// FileTime returns the best guess of the file's creation time (or modtime).
   986	// If the file doesn't have its own metadata indication the creation time (such as in EXIF),
   987	// FileTime uses the modification time from the file system.
   988	// It there was a valid EXIF but an error while trying to get a date from it,
   989	// it logs the error and tries the other methods.
   990	func FileTime(f io.ReaderAt) (time.Time, error) {
   991		var ct time.Time
   992		defaultTime := func() (time.Time, error) {
   993			if osf, ok := f.(*os.File); ok {
   994				fi, err := osf.Stat()
   995				if err != nil {
   996					return ct, fmt.Errorf("Failed to find a modtime: stat: %v", err)
   997				}
   998				return fi.ModTime(), nil
   999			}
  1000			return ct, errors.New("all methods failed to find a creation time or modtime")
  1001		}
  1002	
  1003		size, ok := findSize(f)
  1004		if !ok {
  1005			size = 256 << 10 // enough to get the EXIF
  1006		}
  1007		r := io.NewSectionReader(f, 0, size)
  1008		var tiffErr error
  1009		ex, err := exif.Decode(r)
  1010		if err != nil {
  1011			tiffErr = err
  1012			if exif.IsShortReadTagValueError(err) {
  1013				return ct, io.ErrUnexpectedEOF
  1014			}
  1015			if exif.IsCriticalError(err) || exif.IsExifError(err) {
  1016				return defaultTime()
  1017			}
  1018		}
  1019		ct, err = ex.DateTime()
  1020		if err != nil {
  1021			return defaultTime()
  1022		}
  1023		// If the EXIF file only had local timezone, but it did have
  1024		// GPS, then lookup the timezone and correct the time.
  1025		if ct.Location() == time.Local {
  1026			if exif.IsGPSError(tiffErr) {
  1027				log.Printf("Invalid EXIF GPS data: %v", tiffErr)
  1028				return ct, nil
  1029			}
  1030			if lat, long, err := ex.LatLong(); err == nil {
  1031				if loc := lookupLocation(latlong.LookupZoneName(lat, long)); loc != nil {
  1032					if t, err := exifDateTimeInLocation(ex, loc); err == nil {
  1033						return t, nil
  1034					}
  1035				}
  1036			} else if !exif.IsTagNotPresentError(err) {
  1037				log.Printf("Invalid EXIF GPS data: %v", err)
  1038			}
  1039		}
  1040		return ct, nil
  1041	}
  1042	
  1043	// This is basically a copy of the exif.Exif.DateTime() method, except:
  1044	//   * it takes a *time.Location to assume
  1045	//   * the caller already assumes there's no timezone offset or GPS time
  1046	//     in the EXIF, so any of that code can be ignored.
  1047	func exifDateTimeInLocation(x *exif.Exif, loc *time.Location) (time.Time, error) {
  1048		tag, err := x.Get(exif.DateTimeOriginal)
  1049		if err != nil {
  1050			tag, err = x.Get(exif.DateTime)
  1051			if err != nil {
  1052				return time.Time{}, err
  1053			}
  1054		}
  1055		if tag.Format() != tiff.StringVal {
  1056			return time.Time{}, errors.New("DateTime[Original] not in string format")
  1057		}
  1058		const exifTimeLayout = "2006:01:02 15:04:05"
  1059		dateStr := strings.TrimRight(string(tag.Val), "\x00")
  1060		return time.ParseInLocation(exifTimeLayout, dateStr, loc)
  1061	}
  1062	
  1063	var zoneCache struct {
  1064		sync.RWMutex
  1065		m map[string]*time.Location
  1066	}
  1067	
  1068	func lookupLocation(zone string) *time.Location {
  1069		if zone == "" {
  1070			return nil
  1071		}
  1072		zoneCache.RLock()
  1073		l, ok := zoneCache.m[zone]
  1074		zoneCache.RUnlock()
  1075		if ok {
  1076			return l
  1077		}
  1078		// could use singleflight here, but doesn't really
  1079		// matter if two callers both do this.
  1080		loc, err := time.LoadLocation(zone)
  1081	
  1082		zoneCache.Lock()
  1083		if zoneCache.m == nil {
  1084			zoneCache.m = make(map[string]*time.Location)
  1085		}
  1086		zoneCache.m[zone] = loc // even if nil
  1087		zoneCache.Unlock()
  1088	
  1089		if err != nil {
  1090			log.Printf("failed to lookup timezone %q: %v", zone, err)
  1091			return nil
  1092		}
  1093		return loc
  1094	}
  1095	
  1096	var boringTitlePattern = regexp.MustCompile(`^(?:IMG_|DSC|PANO_|ESR_).*$`)
  1097	
  1098	// IsInterestingTitle returns whether title would be interesting information as
  1099	// a title for a permanode. For example, filenames automatically created by
  1100	// cameras, such as IMG_XXXX.JPG, do not add any interesting value.
  1101	func IsInterestingTitle(title string) bool {
  1102		return !boringTitlePattern.MatchString(title)
  1103	}
Website layout inspired by memcached.
Content by the authors.