Source file - Perkeep

     1	/*
     2	Copyright 2011 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	// Package schema manipulates Camlistore schema blobs.
    18	//
    19	// A schema blob is a JSON-encoded blob that describes other blobs.
    20	// See documentation in Perkeep's doc/schema/ directory.
    21	package schema // import "perkeep.org/pkg/schema"
    22	
    23	import (
    24		"bytes"
    25		"context"
    26		"crypto/rand"
    27		"encoding/base64"
    28		"encoding/json"
    29		"errors"
    30		"fmt"
    31		"hash"
    32		"io"
    33		"log"
    34		"os"
    35		"regexp"
    36		"strconv"
    37		"strings"
    38		"sync"
    39		"time"
    40		"unicode/utf8"
    41	
    42		"github.com/bradfitz/latlong"
    43		"perkeep.org/internal/pools"
    44		"perkeep.org/pkg/blob"
    45	
    46		"github.com/rwcarlsen/goexif/exif"
    47		"github.com/rwcarlsen/goexif/tiff"
    48		"go4.org/strutil"
    49		"go4.org/types"
    50	)
    51	
    52	func init() {
    53		// Intern common strings as used by schema blobs (camliType values), to reduce
    54		// index memory usage, which uses strutil.StringFromBytes.
    55		strutil.RegisterCommonString(
    56			"bytes",
    57			"claim",
    58			"directory",
    59			"file",
    60			"permanode",
    61			"share",
    62			"static-set",
    63			"symlink",
    64		)
    65	}
    66	
    67	// MaxSchemaBlobSize represents the upper bound for how large
    68	// a schema blob may be.
    69	const MaxSchemaBlobSize = 1 << 20
    70	
    71	var (
    72		ErrNoCamliVersion = errors.New("schema: no camliVersion key in map")
    73	)
    74	
    75	var clockNow = time.Now
    76	
    77	type StatHasher interface {
    78		Lstat(fileName string) (os.FileInfo, error)
    79		Hash(fileName string) (blob.Ref, error)
    80	}
    81	
    82	// File is the interface returned when opening a DirectoryEntry that
    83	// is a regular file.
    84	type File interface {
    85		io.Closer
    86		io.ReaderAt
    87		io.Reader
    88		Size() int64
    89	}
    90	
    91	// Directory is a read-only interface to a "directory" schema blob.
    92	type Directory interface {
    93		// Readdir reads the contents of the directory associated with dr
    94		// and returns an array of up to n DirectoryEntries structures.
    95		// Subsequent calls on the same file will yield further
    96		// DirectoryEntries.
    97		// If n > 0, Readdir returns at most n DirectoryEntry structures. In
    98		// this case, if Readdir returns an empty slice, it will return
    99		// a non-nil error explaining why. At the end of a directory,
   100		// the error is os.EOF.
   101		// If n <= 0, Readdir returns all the DirectoryEntries from the
   102		// directory in a single slice. In this case, if Readdir succeeds
   103		// (reads all the way to the end of the directory), it returns the
   104		// slice and a nil os.Error. If it encounters an error before the
   105		// end of the directory, Readdir returns the DirectoryEntry read
   106		// until that point and a non-nil error.
   107		Readdir(ctx context.Context, n int) ([]DirectoryEntry, error)
   108	}
   109	
   110	type Symlink interface {
   111		// .. TODO
   112	}
   113	
   114	// FIFO is the read-only interface to a "fifo" schema blob.
   115	type FIFO interface {
   116		// .. TODO
   117	}
   118	
   119	// Socket is the read-only interface to a "socket" schema blob.
   120	type Socket interface {
   121		// .. TODO
   122	}
   123	
   124	// DirectoryEntry is a read-only interface to an entry in a (static)
   125	// directory.
   126	type DirectoryEntry interface {
   127		// CamliType returns the schema blob's "camliType" field.
   128		// This may be "file", "directory", "symlink", or other more
   129		// obscure types added in the future.
   130		CamliType() CamliType
   131	
   132		FileName() string
   133		BlobRef() blob.Ref
   134	
   135		File(ctx context.Context) (File, error)           // if camliType is "file"
   136		Directory(ctx context.Context) (Directory, error) // if camliType is "directory"
   137		Symlink() (Symlink, error)                        // if camliType is "symlink"
   138		FIFO() (FIFO, error)                              // if camliType is "fifo"
   139		Socket() (Socket, error)                          // If camliType is "socket"
   140	}
   141	
   142	// dirEntry is the default implementation of DirectoryEntry
   143	type dirEntry struct {
   144		ss      superset
   145		fetcher blob.Fetcher
   146		fr      *FileReader // or nil if not a file
   147		dr      *DirReader  // or nil if not a directory
   148	}
   149	
   150	// A SearchQuery must be of type *search.SearchQuery.
   151	// This type breaks an otherwise-circular dependency.
   152	type SearchQuery interface{}
   153	
   154	func (de *dirEntry) CamliType() CamliType {
   155		return de.ss.Type
   156	}
   157	
   158	func (de *dirEntry) FileName() string {
   159		return de.ss.FileNameString()
   160	}
   161	
   162	func (de *dirEntry) BlobRef() blob.Ref {
   163		return de.ss.BlobRef
   164	}
   165	
   166	func (de *dirEntry) File(ctx context.Context) (File, error) {
   167		if de.fr == nil {
   168			if de.ss.Type != TypeFile {
   169				return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, TypeFile)
   170			}
   171			fr, err := NewFileReader(ctx, de.fetcher, de.ss.BlobRef)
   172			if err != nil {
   173				return nil, err
   174			}
   175			de.fr = fr
   176		}
   177		return de.fr, nil
   178	}
   179	
   180	func (de *dirEntry) Directory(ctx context.Context) (Directory, error) {
   181		if de.dr == nil {
   182			if de.ss.Type != TypeDirectory {
   183				return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, TypeDirectory)
   184			}
   185			dr, err := NewDirReader(ctx, de.fetcher, de.ss.BlobRef)
   186			if err != nil {
   187				return nil, err
   188			}
   189			de.dr = dr
   190		}
   191		return de.dr, nil
   192	}
   193	
   194	func (de *dirEntry) Symlink() (Symlink, error) {
   195		return 0, errors.New("TODO: Symlink not implemented")
   196	}
   197	
   198	func (de *dirEntry) FIFO() (FIFO, error) {
   199		return 0, errors.New("TODO: FIFO not implemented")
   200	}
   201	
   202	func (de *dirEntry) Socket() (Socket, error) {
   203		return 0, errors.New("TODO: Socket not implemented")
   204	}
   205	
   206	// newDirectoryEntry takes a superset and returns a DirectoryEntry if
   207	// the Supserset is valid and represents an entry in a directory.  It
   208	// must by of type "file", "directory", "symlink" or "socket".
   209	// TODO: "char", block", probably.  later.
   210	func newDirectoryEntry(fetcher blob.Fetcher, ss *superset) (DirectoryEntry, error) {
   211		if ss == nil {
   212			return nil, errors.New("ss was nil")
   213		}
   214		if !ss.BlobRef.Valid() {
   215			return nil, errors.New("ss.BlobRef was invalid")
   216		}
   217		switch ss.Type {
   218		case TypeFile, TypeDirectory, TypeSymlink, TypeFIFO, TypeSocket:
   219			// Okay
   220		default:
   221			return nil, fmt.Errorf("invalid DirectoryEntry camliType of %q", ss.Type)
   222		}
   223		de := &dirEntry{ss: *ss, fetcher: fetcher} // defensive copy
   224		return de, nil
   225	}
   226	
   227	// NewDirectoryEntryFromBlobRef takes a BlobRef and returns a
   228	//
   229	//	DirectoryEntry if the BlobRef contains a type "file", "directory",
   230	//	"symlink", "fifo" or "socket".
   231	//
   232	// TODO: ""char", "block", probably.  later.
   233	func NewDirectoryEntryFromBlobRef(ctx context.Context, fetcher blob.Fetcher, blobRef blob.Ref) (DirectoryEntry, error) {
   234		ss := new(superset)
   235		err := ss.setFromBlobRef(ctx, fetcher, blobRef)
   236		if err != nil {
   237			return nil, fmt.Errorf("schema/filereader: can't fill superset: %w", err)
   238		}
   239		return newDirectoryEntry(fetcher, ss)
   240	}
   241	
   242	// superset represents the superset of common Perkeep JSON schema
   243	// keys as a convenient json.Unmarshal target.
   244	// TODO(bradfitz): unexport this type. Getting too gross. Move to schema.Blob
   245	type superset struct {
   246		// BlobRef isn't for a particular metadata blob field, but included
   247		// for convenience.
   248		BlobRef blob.Ref
   249	
   250		Version int       `json:"camliVersion"`
   251		Type    CamliType `json:"camliType"`
   252	
   253		Signer blob.Ref `json:"camliSigner"`
   254		Sig    string   `json:"camliSig"`
   255	
   256		ClaimType string         `json:"claimType"`
   257		ClaimDate types.Time3339 `json:"claimDate"`
   258	
   259		Permanode blob.Ref `json:"permaNode"`
   260		Attribute string   `json:"attribute"`
   261		Value     string   `json:"value"`
   262	
   263		// FileName and FileNameBytes represent one of the two
   264		// representations of file names in schema blobs.  They should
   265		// not be accessed directly.  Use the FileNameString accessor
   266		// instead, which also sanitizes malicious values.
   267		FileName      string        `json:"fileName"`
   268		FileNameBytes []interface{} `json:"fileNameBytes"`
   269	
   270		SymlinkTarget      string        `json:"symlinkTarget"`
   271		SymlinkTargetBytes []interface{} `json:"symlinkTargetBytes"`
   272	
   273		UnixPermission string `json:"unixPermission"`
   274		UnixOwnerId    int    `json:"unixOwnerId"`
   275		UnixOwner      string `json:"unixOwner"`
   276		UnixGroupId    int    `json:"unixGroupId"`
   277		UnixGroup      string `json:"unixGroup"`
   278		UnixMtime      string `json:"unixMtime"`
   279		UnixCtime      string `json:"unixCtime"`
   280		UnixAtime      string `json:"unixAtime"`
   281	
   282		// Parts are references to the data chunks of a regular file (or a "bytes" schema blob).
   283		// See doc/schema/bytes.txt and doc/schema/files/file.txt.
   284		Parts []*BytesPart `json:"parts"`
   285	
   286		Entries   blob.Ref   `json:"entries"`   // for directories, a blobref to a static-set
   287		Members   []blob.Ref `json:"members"`   // for static sets (for directory static-sets: blobrefs to child dirs/files)
   288		MergeSets []blob.Ref `json:"mergeSets"` // each is a "sub static-set", that has either Members or MergeSets. For large dirs.
   289	
   290		// Search allows a "share" blob to share an entire search. Contrast with "target".
   291		Search SearchQuery `json:"search"`
   292		// Target is a "share" blob's target (the thing being shared)
   293		// Or it is the object being deleted in a DeleteClaim claim.
   294		Target blob.Ref `json:"target"`
   295		// Transitive is a property of a "share" blob.
   296		Transitive bool `json:"transitive"`
   297		// AuthType is a "share" blob's authentication type that is required.
   298		// Currently (2013-01-02) just "haveref" (if you know the share's blobref,
   299		// you get access: the secret URL model)
   300		AuthType string         `json:"authType"`
   301		Expires  types.Time3339 `json:"expires"` // or zero for no expiration
   302	}
   303	
   304	var errSchemaBlobTooLarge = errors.New("schema blob too large")
   305	
   306	func parseSuperset(r io.Reader) (*superset, error) {
   307		buf := pools.BytesBuffer()
   308		defer pools.PutBuffer(buf)
   309	
   310		n, err := io.CopyN(buf, r, MaxSchemaBlobSize+1)
   311		if err != nil && err != io.EOF {
   312			return nil, err
   313		}
   314		if n > MaxSchemaBlobSize {
   315			return nil, errSchemaBlobTooLarge
   316		}
   317	
   318		ss := new(superset)
   319		if err := json.Unmarshal(buf.Bytes(), ss); err != nil {
   320			return nil, err
   321		}
   322		return ss, nil
   323	}
   324	
   325	// BlobFromReader returns a new Blob from the provided Reader r,
   326	// which should be the body of the provided blobref.
   327	// Note: the hash checksum is not verified.
   328	func BlobFromReader(ref blob.Ref, r io.Reader) (*Blob, error) {
   329		if !ref.Valid() {
   330			return nil, errors.New("schema.BlobFromReader: invalid blobref")
   331		}
   332		var buf bytes.Buffer
   333		tee := io.TeeReader(r, &buf)
   334		ss, err := parseSuperset(tee)
   335		if err != nil {
   336			return nil, fmt.Errorf("error parsing Blob %v: %w", ref, err)
   337		}
   338		return &Blob{ref, buf.String(), ss}, nil
   339	}
   340	
   341	// BytesPart is the type representing one of the "parts" in a "file"
   342	// or "bytes" JSON schema.
   343	//
   344	// See doc/schema/bytes.txt and doc/schema/files/file.txt.
   345	type BytesPart struct {
   346		// Size is the number of bytes that this part contributes to the overall segment.
   347		Size uint64 `json:"size"`
   348	
   349		// At most one of BlobRef or BytesRef must be non-zero
   350		// (Valid), but it's illegal for both.
   351		// If neither are set, this BytesPart represents Size zero bytes.
   352		// BlobRef refers to raw bytes. BytesRef references a "bytes" schema blob.
   353		BlobRef  blob.Ref `json:"blobRef,omitempty"`
   354		BytesRef blob.Ref `json:"bytesRef,omitempty"`
   355	
   356		// Offset optionally specifies the offset into BlobRef to skip
   357		// when reading Size bytes.
   358		Offset uint64 `json:"offset,omitempty"`
   359	}
   360	
   361	// stringFromMixedArray joins a slice of either strings or float64
   362	// values (as retrieved from JSON decoding) into a string.  These are
   363	// used for non-UTF8 filenames in "fileNameBytes" fields.  The strings
   364	// are UTF-8 segments and the float64s (actually uint8 values) are
   365	// byte values.
   366	func stringFromMixedArray(parts []interface{}) string {
   367		var buf bytes.Buffer
   368		for _, part := range parts {
   369			if s, ok := part.(string); ok {
   370				buf.WriteString(s)
   371				continue
   372			}
   373			if num, ok := part.(float64); ok {
   374				buf.WriteByte(byte(num))
   375				continue
   376			}
   377		}
   378		return buf.String()
   379	}
   380	
   381	// mixedArrayFromString is the inverse of stringFromMixedArray. It
   382	// splits a string to a series of either UTF-8 strings and non-UTF-8
   383	// bytes.
   384	func mixedArrayFromString(s string) (parts []interface{}) {
   385		for len(s) > 0 {
   386			if n := utf8StrLen(s); n > 0 {
   387				parts = append(parts, s[:n])
   388				s = s[n:]
   389			} else {
   390				parts = append(parts, s[0])
   391				s = s[1:]
   392			}
   393		}
   394		return parts
   395	}
   396	
   397	// utf8StrLen returns how many prefix bytes of s are valid UTF-8.
   398	func utf8StrLen(s string) int {
   399		for i, r := range s {
   400			for r == utf8.RuneError {
   401				// The RuneError value can be an error
   402				// sentinel value (if it's size 1) or the same
   403				// value encoded properly. Decode it to see if
   404				// it's the 1 byte sentinel value.
   405				_, size := utf8.DecodeRuneInString(s[i:])
   406				if size == 1 {
   407					return i
   408				}
   409			}
   410		}
   411		return len(s)
   412	}
   413	
   414	func (ss *superset) SumPartsSize() (size uint64) {
   415		for _, part := range ss.Parts {
   416			size += uint64(part.Size)
   417		}
   418		return size
   419	}
   420	
   421	func (ss *superset) SymlinkTargetString() string {
   422		if ss.SymlinkTarget != "" {
   423			return ss.SymlinkTarget
   424		}
   425		return stringFromMixedArray(ss.SymlinkTargetBytes)
   426	}
   427	
   428	// FileNameString returns the schema blob's base filename.
   429	//
   430	// If the fileName field of the blob accidentally or maliciously
   431	// contains a slash, this function returns an empty string instead.
   432	func (ss *superset) FileNameString() string {
   433		v := ss.FileName
   434		if v == "" {
   435			v = stringFromMixedArray(ss.FileNameBytes)
   436		}
   437		if v != "" {
   438			if strings.Contains(v, "/") {
   439				// Bogus schema blob; ignore.
   440				return ""
   441			}
   442			if strings.Contains(v, "\\") {
   443				// Bogus schema blob; ignore.
   444				return ""
   445			}
   446		}
   447		return v
   448	}
   449	
   450	func (ss *superset) HasFilename(name string) bool {
   451		return ss.FileNameString() == name
   452	}
   453	
   454	func (b *Blob) FileMode() os.FileMode {
   455		// TODO: move this to a different type, off *Blob
   456		return b.ss.FileMode()
   457	}
   458	
   459	func (ss *superset) FileMode() os.FileMode {
   460		var mode os.FileMode
   461		hasPerm := ss.UnixPermission != ""
   462		if hasPerm {
   463			m64, err := strconv.ParseUint(ss.UnixPermission, 8, 64)
   464			if err == nil {
   465				mode = mode | os.FileMode(m64)
   466			}
   467		}
   468	
   469		// TODO: add other types (block, char, etc)
   470		switch ss.Type {
   471		case TypeDirectory:
   472			mode = mode | os.ModeDir
   473		case TypeFile:
   474			// No extra bit.
   475		case TypeSymlink:
   476			mode = mode | os.ModeSymlink
   477		case TypeFIFO:
   478			mode = mode | os.ModeNamedPipe
   479		case TypeSocket:
   480			mode = mode | os.ModeSocket
   481		}
   482		if !hasPerm {
   483			switch ss.Type {
   484			case TypeDirectory:
   485				mode |= 0755
   486			default:
   487				mode |= 0644
   488			}
   489		}
   490		return mode
   491	}
   492	
   493	// MapUid returns the most appropriate mapping from this file's owner
   494	// to the local machine's owner, trying first a match by name,
   495	// followed by just mapping the number through directly.
   496	func (b *Blob) MapUid() int { return b.ss.MapUid() }
   497	
   498	// MapGid returns the most appropriate mapping from this file's group
   499	// to the local machine's group, trying first a match by name,
   500	// followed by just mapping the number through directly.
   501	func (b *Blob) MapGid() int { return b.ss.MapGid() }
   502	
   503	func (ss *superset) MapUid() int {
   504		if ss.UnixOwner != "" {
   505			uid, ok := getUidFromName(ss.UnixOwner)
   506			if ok {
   507				return uid
   508			}
   509		}
   510		return ss.UnixOwnerId // TODO: will be 0 if unset, which isn't ideal
   511	}
   512	
   513	func (ss *superset) MapGid() int {
   514		if ss.UnixGroup != "" {
   515			gid, ok := getGidFromName(ss.UnixGroup)
   516			if ok {
   517				return gid
   518			}
   519		}
   520		return ss.UnixGroupId // TODO: will be 0 if unset, which isn't ideal
   521	}
   522	
   523	func (ss *superset) ModTime() time.Time {
   524		if ss.UnixMtime == "" {
   525			return time.Time{}
   526		}
   527		t, err := time.Parse(time.RFC3339, ss.UnixMtime)
   528		if err != nil {
   529			return time.Time{}
   530		}
   531		return t
   532	}
   533	
   534	var DefaultStatHasher = &defaultStatHasher{}
   535	
   536	type defaultStatHasher struct{}
   537	
   538	func (d *defaultStatHasher) Lstat(fileName string) (os.FileInfo, error) {
   539		return os.Lstat(fileName)
   540	}
   541	
   542	func (d *defaultStatHasher) Hash(fileName string) (blob.Ref, error) {
   543		h := blob.NewHash()
   544		file, err := os.Open(fileName)
   545		if err != nil {
   546			return blob.Ref{}, err
   547		}
   548		defer file.Close()
   549		_, err = io.Copy(h, file)
   550		if err != nil {
   551			return blob.Ref{}, err
   552		}
   553		return blob.RefFromHash(h), nil
   554	}
   555	
   556	// maximum number of static-set members in a static-set schema. As noted in
   557	// https://github.com/camlistore/camlistore/issues/924 , 33k members result in a
   558	// 1.7MB blob, so 10k members seems reasonable to stay under the MaxSchemaBlobSize (1MB)
   559	// limit. This is not a const, so we can lower it during tests and test the logic
   560	// without having to create thousands of blobs.
   561	var maxStaticSetMembers = 10000
   562	
   563	// NewStaticSet returns the "static-set" schema for a directory. Its members
   564	// should be populated with SetStaticSetMembers.
   565	func NewStaticSet() *Builder {
   566		return base(1, TypeStaticSet)
   567	}
   568	
   569	// SetStaticSetMembers sets the given members as the static-set members of this
   570	// builder. If the members are so numerous that they would not fit on a schema
   571	// blob, they are spread (recursively, if needed) onto sub static-sets. In which
   572	// case, these subsets are set as "mergeSets" of this builder. All the created
   573	// subsets are returned, so the caller can upload them along with the top
   574	// static-set created from this builder.
   575	// SetStaticSetMembers panics if bb isn't a "static-set" claim type.
   576	func (bb *Builder) SetStaticSetMembers(members []blob.Ref) []*Blob {
   577		if bb.Type() != TypeStaticSet {
   578			panic("called SetStaticSetMembers on non static-set")
   579		}
   580	
   581		if len(members) <= maxStaticSetMembers {
   582			ms := make([]string, len(members))
   583			for i := range members {
   584				ms[i] = members[i].String()
   585			}
   586			bb.m["members"] = ms
   587			return nil
   588		}
   589	
   590		// too many members to fit in one static-set, so we spread them in
   591		// several sub static-sets.
   592		subsetsNumber := len(members) / maxStaticSetMembers
   593		var perSubset int
   594		if subsetsNumber < maxStaticSetMembers {
   595			// this means we can fill each subset up to maxStaticSetMembers,
   596			// and stash the rest in one last subset.
   597			perSubset = maxStaticSetMembers
   598		} else {
   599			// otherwise we need to divide the members evenly in
   600			// (maxStaticSetMembers - 1) subsets, and each of these subsets
   601			// will also (recursively) have subsets of its own. There might
   602			// also be a rest in one last subset, as above.
   603			subsetsNumber = maxStaticSetMembers - 1
   604			perSubset = len(members) / subsetsNumber
   605		}
   606		// only the subsets at this level
   607		subsets := make([]*Blob, 0, subsetsNumber)
   608		// subsets at this level, plus all the children subsets.
   609		allSubsets := make([]*Blob, 0, subsetsNumber)
   610		for i := 0; i < subsetsNumber; i++ {
   611			ss := NewStaticSet()
   612			subss := ss.SetStaticSetMembers(members[i*perSubset : (i+1)*perSubset])
   613			subsets = append(subsets, ss.Blob())
   614			allSubsets = append(allSubsets, ss.Blob())
   615			for _, v := range subss {
   616				allSubsets = append(allSubsets, v)
   617			}
   618		}
   619	
   620		// Deal with the rest (of the euclidean division)
   621		if perSubset*subsetsNumber < len(members) {
   622			ss := NewStaticSet()
   623			ss.SetStaticSetMembers(members[perSubset*subsetsNumber:])
   624			allSubsets = append(allSubsets, ss.Blob())
   625			subsets = append(subsets, ss.Blob())
   626		}
   627	
   628		mss := make([]string, len(subsets))
   629		for i := range subsets {
   630			mss[i] = subsets[i].BlobRef().String()
   631		}
   632		bb.m["mergeSets"] = mss
   633		return allSubsets
   634	}
   635	
   636	func base(version int, ctype CamliType) *Builder {
   637		return &Builder{map[string]interface{}{
   638			"camliVersion": version,
   639			"camliType":    string(ctype),
   640		}}
   641	}
   642	
   643	// NewUnsignedPermanode returns a new random permanode, not yet signed.
   644	func NewUnsignedPermanode() *Builder {
   645		bb := base(1, TypePermanode)
   646		chars := make([]byte, 20)
   647		_, err := io.ReadFull(rand.Reader, chars)
   648		if err != nil {
   649			panic("error reading random bytes: " + err.Error())
   650		}
   651		bb.m["random"] = base64.StdEncoding.EncodeToString(chars)
   652		return bb
   653	}
   654	
   655	// NewPlannedPermanode returns a permanode with a fixed key.  Like
   656	// NewUnsignedPermanode, this builder is also not yet signed.  Callers of
   657	// NewPlannedPermanode must sign the map with a fixed claimDate and
   658	// GPG date to create consistent JSON encodings of the Map (its
   659	// blobref), between runs.
   660	func NewPlannedPermanode(key string) *Builder {
   661		bb := base(1, TypePermanode)
   662		bb.m["key"] = key
   663		return bb
   664	}
   665	
   666	// NewHashPlannedPermanode returns a planned permanode with the sum
   667	// of the hash, prefixed with "sha1-", as the key.
   668	func NewHashPlannedPermanode(h hash.Hash) *Builder {
   669		return NewPlannedPermanode(blob.RefFromHash(h).String())
   670	}
   671	
   672	// JSON returns the map m encoded as JSON in its
   673	// recommended canonical form. The canonical form is readable with newlines and indentation,
   674	// and always starts with the header bytes:
   675	//
   676	//	{"camliVersion":
   677	func mapJSON(m map[string]interface{}) (string, error) {
   678		version, hasVersion := m["camliVersion"]
   679		if !hasVersion {
   680			return "", ErrNoCamliVersion
   681		}
   682		delete(m, "camliVersion")
   683		jsonBytes, err := json.MarshalIndent(m, "", "  ")
   684		if err != nil {
   685			return "", err
   686		}
   687		m["camliVersion"] = version
   688		var buf bytes.Buffer
   689		fmt.Fprintf(&buf, "{\"camliVersion\": %v,\n", version)
   690		buf.Write(jsonBytes[2:])
   691		return buf.String(), nil
   692	}
   693	
   694	// NewFileMap returns a new builder of a type "file" schema for the provided fileName.
   695	// The chunk parts of the file are not populated.
   696	func NewFileMap(fileName string) *Builder {
   697		return newCommonFilenameMap(fileName).SetType(TypeFile)
   698	}
   699	
   700	// NewDirMap returns a new builder of a type "directory" schema for the provided fileName.
   701	func NewDirMap(fileName string) *Builder {
   702		return newCommonFilenameMap(fileName).SetType(TypeDirectory)
   703	}
   704	
   705	func newCommonFilenameMap(fileName string) *Builder {
   706		bb := base(1, "" /* no type yet */)
   707		if fileName != "" {
   708			bb.SetFileName(fileName)
   709		}
   710		return bb
   711	}
   712	
   713	var populateSchemaStat []func(schemaMap map[string]interface{}, fi os.FileInfo)
   714	
   715	func NewCommonFileMap(fileName string, fi os.FileInfo) *Builder {
   716		bb := newCommonFilenameMap(fileName)
   717		// Common elements (from file-common.txt)
   718		if fi.Mode()&os.ModeSymlink == 0 {
   719			bb.m["unixPermission"] = fmt.Sprintf("0%o", fi.Mode().Perm())
   720		}
   721	
   722		// OS-specific population; defined in schema_posix.go, etc. (not on App Engine)
   723		for _, f := range populateSchemaStat {
   724			f(bb.m, fi)
   725		}
   726	
   727		if mtime := fi.ModTime(); !mtime.IsZero() {
   728			bb.m["unixMtime"] = RFC3339FromTime(mtime)
   729		}
   730		return bb
   731	}
   732	
   733	// PopulateParts sets the "parts" field of the blob with the provided
   734	// parts.  The sum of the sizes of parts must match the provided size
   735	// or an error is returned.  Also, each BytesPart may only contain either
   736	// a BytesPart or a BlobRef, but not both.
   737	func (bb *Builder) PopulateParts(size int64, parts []BytesPart) error {
   738		return populateParts(bb.m, size, parts)
   739	}
   740	
   741	func populateParts(m map[string]interface{}, size int64, parts []BytesPart) error {
   742		sumSize := int64(0)
   743		mparts := make([]map[string]interface{}, len(parts))
   744		for idx, part := range parts {
   745			mpart := make(map[string]interface{})
   746			mparts[idx] = mpart
   747			switch {
   748			case part.BlobRef.Valid() && part.BytesRef.Valid():
   749				return errors.New("schema: part contains both BlobRef and BytesRef")
   750			case part.BlobRef.Valid():
   751				mpart["blobRef"] = part.BlobRef.String()
   752			case part.BytesRef.Valid():
   753				mpart["bytesRef"] = part.BytesRef.String()
   754			default:
   755				return errors.New("schema: part must contain either a BlobRef or BytesRef")
   756			}
   757			mpart["size"] = part.Size
   758			sumSize += int64(part.Size)
   759			if part.Offset != 0 {
   760				mpart["offset"] = part.Offset
   761			}
   762		}
   763		if sumSize != size {
   764			return fmt.Errorf("schema: declared size %d doesn't match sum of parts size %d", size, sumSize)
   765		}
   766		m["parts"] = mparts
   767		return nil
   768	}
   769	
   770	func newBytes() *Builder {
   771		return base(1, TypeBytes)
   772	}
   773	
   774	// CamliType is one of the valid "camliType" fields in a schema blob. See doc/schema.
   775	type CamliType string
   776	
   777	const (
   778		TypeBytes     CamliType = "bytes"
   779		TypeClaim     CamliType = "claim"
   780		TypeDirectory CamliType = "directory"
   781		TypeFIFO      CamliType = "fifo"
   782		TypeFile      CamliType = "file"
   783		TypeInode     CamliType = "inode"
   784		TypeKeep      CamliType = "keep"
   785		TypePermanode CamliType = "permanode"
   786		TypeShare     CamliType = "share"
   787		TypeSocket    CamliType = "socket"
   788		TypeStaticSet CamliType = "static-set"
   789		TypeSymlink   CamliType = "symlink"
   790	)
   791	
   792	// ClaimType is one of the valid "claimType" fields in a "claim" schema blob. See doc/schema/claims/.
   793	type ClaimType string
   794	
   795	const (
   796		SetAttributeClaim ClaimType = "set-attribute"
   797		AddAttributeClaim ClaimType = "add-attribute"
   798		DelAttributeClaim ClaimType = "del-attribute"
   799		ShareClaim        ClaimType = "share"
   800		// DeleteClaim deletes a permanode or another claim.
   801		// A delete claim can itself be deleted, and so on.
   802		DeleteClaim ClaimType = "delete"
   803	)
   804	
   805	// claimParam is used to populate a claim map when building a new claim
   806	type claimParam struct {
   807		claimType ClaimType
   808	
   809		// Params specific to *Attribute claims:
   810		permanode blob.Ref // modified permanode
   811		attribute string   // required
   812		value     string   // optional if Type == DelAttributeClaim
   813	
   814		// Params specific to ShareClaim claims:
   815		authType   string
   816		transitive bool
   817	
   818		// Params specific to ShareClaim and DeleteClaim claims.
   819		target blob.Ref
   820	}
   821	
   822	func newClaim(claims ...*claimParam) *Builder {
   823		bb := base(1, TypeClaim)
   824		bb.SetClaimDate(clockNow())
   825		if len(claims) == 1 {
   826			cp := claims[0]
   827			populateClaimMap(bb.m, cp)
   828			return bb
   829		}
   830		var claimList []interface{}
   831		for _, cp := range claims {
   832			m := map[string]interface{}{}
   833			populateClaimMap(m, cp)
   834			claimList = append(claimList, m)
   835		}
   836		bb.m["claimType"] = "multi"
   837		bb.m["claims"] = claimList
   838		return bb
   839	}
   840	
   841	func populateClaimMap(m map[string]interface{}, cp *claimParam) {
   842		m["claimType"] = string(cp.claimType)
   843		switch cp.claimType {
   844		case ShareClaim:
   845			m["authType"] = cp.authType
   846			m["transitive"] = cp.transitive
   847		case DeleteClaim:
   848			m["target"] = cp.target.String()
   849		default:
   850			m["permaNode"] = cp.permanode.String()
   851			m["attribute"] = cp.attribute
   852			if !(cp.claimType == DelAttributeClaim && cp.value == "") {
   853				m["value"] = cp.value
   854			}
   855		}
   856	}
   857	
   858	// NewShareRef creates a *Builder for a "share" claim.
   859	func NewShareRef(authType string, transitive bool) *Builder {
   860		return newClaim(&claimParam{
   861			claimType:  ShareClaim,
   862			authType:   authType,
   863			transitive: transitive,
   864		})
   865	}
   866	
   867	func NewSetAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
   868		return newClaim(&claimParam{
   869			permanode: permaNode,
   870			claimType: SetAttributeClaim,
   871			attribute: attr,
   872			value:     value,
   873		})
   874	}
   875	
   876	func NewAddAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
   877		return newClaim(&claimParam{
   878			permanode: permaNode,
   879			claimType: AddAttributeClaim,
   880			attribute: attr,
   881			value:     value,
   882		})
   883	}
   884	
   885	// NewDelAttributeClaim creates a new claim to remove value from the
   886	// values set for the attribute attr of permaNode. If value is empty then
   887	// all the values for attribute are cleared.
   888	func NewDelAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
   889		return newClaim(&claimParam{
   890			permanode: permaNode,
   891			claimType: DelAttributeClaim,
   892			attribute: attr,
   893			value:     value,
   894		})
   895	}
   896	
   897	// NewDeleteClaim creates a new claim to delete a target claim or permanode.
   898	func NewDeleteClaim(target blob.Ref) *Builder {
   899		return newClaim(&claimParam{
   900			target:    target,
   901			claimType: DeleteClaim,
   902		})
   903	}
   904	
   905	// ShareHaveRef is the auth type specifying that if you "have the
   906	// reference" (know the blobref to the haveref share blob), then you
   907	// have access to the referenced object from that share blob.
   908	// This is the "send a link to a friend" access model.
   909	const ShareHaveRef = "haveref"
   910	
   911	// UnknownLocation is a magic timezone value used when the actual location
   912	// of a time is unknown. For instance, EXIF files commonly have a time without
   913	// a corresponding location or timezone offset.
   914	var UnknownLocation = time.FixedZone("Unknown", -60) // 1 minute west
   915	
   916	// IsZoneKnown reports whether t is in a known timezone.
   917	// Perkeep uses the magic timezone offset of 1 minute west of UTC
   918	// to mean that the timezone wasn't known.
   919	func IsZoneKnown(t time.Time) bool {
   920		if t.Location() == UnknownLocation {
   921			return false
   922		}
   923		if _, off := t.Zone(); off == -60 {
   924			return false
   925		}
   926		return true
   927	}
   928	
   929	// RFC3339FromTime returns an RFC3339-formatted time.
   930	//
   931	// If the timezone is known, the time will be converted to UTC and
   932	// returned with a "Z" suffix. For unknown zones, the timezone will be
   933	// "-00:01" (1 minute west of UTC).
   934	//
   935	// Fractional seconds are only included if the time has fractional
   936	// seconds.
   937	func RFC3339FromTime(t time.Time) string {
   938		if IsZoneKnown(t) {
   939			t = t.UTC()
   940		}
   941		if t.UnixNano()%1e9 == 0 {
   942			return t.Format(time.RFC3339)
   943		}
   944		return t.Format(time.RFC3339Nano)
   945	}
   946	
   947	var bytesCamliVersion = []byte("camliVersion")
   948	
   949	// LikelySchemaBlob returns quickly whether buf likely contains (or is
   950	// the prefix of) a schema blob.
   951	func LikelySchemaBlob(buf []byte) bool {
   952		if len(buf) == 0 || buf[0] != '{' {
   953			return false
   954		}
   955		return bytes.Contains(buf, bytesCamliVersion)
   956	}
   957	
   958	// findSize checks if v is an *os.File or if it has
   959	// a Size() int64 method, to find its size.
   960	// It returns 0, false otherwise.
   961	func findSize(v interface{}) (size int64, ok bool) {
   962		if fi, ok := v.(*os.File); ok {
   963			v, _ = fi.Stat()
   964		}
   965		if sz, ok := v.(interface {
   966			Size() int64
   967		}); ok {
   968			return sz.Size(), true
   969		}
   970		// For bytes.Reader, strings.Reader, etc:
   971		if li, ok := v.(interface {
   972			Len() int
   973		}); ok {
   974			ln := int64(li.Len()) // unread portion, typically
   975			// If it's also a seeker, remove add any seek offset:
   976			if sk, ok := v.(io.Seeker); ok {
   977				if cur, err := sk.Seek(0, 1); err == nil {
   978					ln += cur
   979				}
   980			}
   981			return ln, true
   982		}
   983		return 0, false
   984	}
   985	
   986	// FileTime returns the best guess of the file's creation time (or modtime).
   987	// If the file doesn't have its own metadata indication the creation time (such as in EXIF),
   988	// FileTime uses the modification time from the file system.
   989	// It there was a valid EXIF but an error while trying to get a date from it,
   990	// it logs the error and tries the other methods.
   991	func FileTime(f io.ReaderAt) (time.Time, error) {
   992		var ct time.Time
   993		defaultTime := func() (time.Time, error) {
   994			if osf, ok := f.(*os.File); ok {
   995				fi, err := osf.Stat()
   996				if err != nil {
   997					return ct, fmt.Errorf("Failed to find a modtime: stat: %w", err)
   998				}
   999				return fi.ModTime(), nil
  1000			}
  1001			return ct, errors.New("all methods failed to find a creation time or modtime")
  1002		}
  1003	
  1004		size, ok := findSize(f)
  1005		if !ok {
  1006			size = 256 << 10 // enough to get the EXIF
  1007		}
  1008		r := io.NewSectionReader(f, 0, size)
  1009		var tiffErr error
  1010		ex, err := exif.Decode(r)
  1011		if err != nil {
  1012			tiffErr = err
  1013			if exif.IsShortReadTagValueError(err) {
  1014				return ct, io.ErrUnexpectedEOF
  1015			}
  1016			if exif.IsCriticalError(err) || exif.IsExifError(err) {
  1017				return defaultTime()
  1018			}
  1019		}
  1020		ct, err = ex.DateTime()
  1021		if err != nil {
  1022			return defaultTime()
  1023		}
  1024		// If the EXIF file only had local timezone, but it did have
  1025		// GPS, then lookup the timezone and correct the time.
  1026		if ct.Location() == time.Local {
  1027			if exif.IsGPSError(tiffErr) {
  1028				log.Printf("Invalid EXIF GPS data: %v", tiffErr)
  1029				return ct, nil
  1030			}
  1031			if lat, long, err := ex.LatLong(); err == nil {
  1032				if loc := lookupLocation(latlong.LookupZoneName(lat, long)); loc != nil {
  1033					if t, err := exifDateTimeInLocation(ex, loc); err == nil {
  1034						return t, nil
  1035					}
  1036				}
  1037			} else if !exif.IsTagNotPresentError(err) {
  1038				log.Printf("Invalid EXIF GPS data: %v", err)
  1039			}
  1040		}
  1041		return ct, nil
  1042	}
  1043	
  1044	// This is basically a copy of the exif.Exif.DateTime() method, except:
  1045	//   - it takes a *time.Location to assume
  1046	//   - the caller already assumes there's no timezone offset or GPS time
  1047	//     in the EXIF, so any of that code can be ignored.
  1048	func exifDateTimeInLocation(x *exif.Exif, loc *time.Location) (time.Time, error) {
  1049		tag, err := x.Get(exif.DateTimeOriginal)
  1050		if err != nil {
  1051			tag, err = x.Get(exif.DateTime)
  1052			if err != nil {
  1053				return time.Time{}, err
  1054			}
  1055		}
  1056		if tag.Format() != tiff.StringVal {
  1057			return time.Time{}, errors.New("DateTime[Original] not in string format")
  1058		}
  1059		const exifTimeLayout = "2006:01:02 15:04:05"
  1060		dateStr := strings.TrimRight(string(tag.Val), "\x00")
  1061		return time.ParseInLocation(exifTimeLayout, dateStr, loc)
  1062	}
  1063	
  1064	var zoneCache struct {
  1065		sync.RWMutex
  1066		m map[string]*time.Location
  1067	}
  1068	
  1069	func lookupLocation(zone string) *time.Location {
  1070		if zone == "" {
  1071			return nil
  1072		}
  1073		zoneCache.RLock()
  1074		l, ok := zoneCache.m[zone]
  1075		zoneCache.RUnlock()
  1076		if ok {
  1077			return l
  1078		}
  1079		// could use singleflight here, but doesn't really
  1080		// matter if two callers both do this.
  1081		loc, err := time.LoadLocation(zone)
  1082	
  1083		zoneCache.Lock()
  1084		if zoneCache.m == nil {
  1085			zoneCache.m = make(map[string]*time.Location)
  1086		}
  1087		zoneCache.m[zone] = loc // even if nil
  1088		zoneCache.Unlock()
  1089	
  1090		if err != nil {
  1091			log.Printf("failed to lookup timezone %q: %v", zone, err)
  1092			return nil
  1093		}
  1094		return loc
  1095	}
  1096	
  1097	var boringTitlePattern = regexp.MustCompile(`^(?:IMG_|DSC|PANO_|ESR_).*$`)
  1098	
  1099	// IsInterestingTitle returns whether title would be interesting information as
  1100	// a title for a permanode. For example, filenames automatically created by
  1101	// cameras, such as IMG_XXXX.JPG, do not add any interesting value.
  1102	func IsInterestingTitle(title string) bool {
  1103		return !boringTitlePattern.MatchString(title)
  1104	}