Home Download Docs Code Community
     1	/*
     2	Copyright 2011 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	package index
    18	
    19	import (
    20		"bytes"
    21		"fmt"
    22		"strings"
    23	
    24		"perkeep.org/pkg/blob"
    25	)
    26	
    27	// requiredSchemaVersion is incremented every time
    28	// an index key type is added, changed, or removed.
    29	// Version 4: EXIF tags + GPS
    30	// Version 5: wholeRef added to keyFileInfo
    31	const requiredSchemaVersion = 5
    32	
    33	// type of key returns the identifier in k before the first ":" or "|".
    34	// (Originally we packed keys by hand and there are a mix of styles)
    35	func typeOfKey(k string) string {
    36		c := strings.Index(k, ":")
    37		p := strings.Index(k, "|")
    38		if c < 0 && p < 0 {
    39			return ""
    40		}
    41		if c < 0 {
    42			return k[:p]
    43		}
    44		if p < 0 {
    45			return k[:c]
    46		}
    47		min := c
    48		if p < min {
    49			min = p
    50		}
    51		return k[:min]
    52	}
    53	
    54	type keyType struct {
    55		name     string
    56		keyParts []part
    57		valParts []part
    58	}
    59	
    60	func (k *keyType) Prefix(args ...interface{}) string {
    61		return k.build(true, true, k.keyParts, args...)
    62	}
    63	
    64	func (k *keyType) Key(args ...interface{}) string {
    65		return k.build(false, true, k.keyParts, args...)
    66	}
    67	
    68	func (k *keyType) Val(args ...interface{}) string {
    69		return k.build(false, false, k.valParts, args...)
    70	}
    71	
    72	func (k *keyType) build(isPrefix, isKey bool, parts []part, args ...interface{}) string {
    73		var buf bytes.Buffer
    74		if isKey {
    75			buf.WriteString(k.name)
    76		}
    77		if !isPrefix && len(args) != len(parts) {
    78			panic("wrong number of arguments")
    79		}
    80		if len(args) > len(parts) {
    81			panic("too many arguments")
    82		}
    83		for i, arg := range args {
    84			if isKey || i > 0 {
    85				buf.WriteString("|")
    86			}
    87			asStr := func() string {
    88				s, ok := arg.(string)
    89				if !ok {
    90					s = arg.(fmt.Stringer).String()
    91				}
    92				return s
    93			}
    94			switch parts[i].typ {
    95			case typeIntStr:
    96				switch arg.(type) {
    97				case int, int64, uint64:
    98					buf.WriteString(fmt.Sprintf("%d", arg))
    99				default:
   100					panic("bogus int type")
   101				}
   102			case typeStr:
   103				buf.WriteString(urle(asStr()))
   104			case typeRawStr:
   105				buf.WriteString(asStr())
   106			case typeReverseTime:
   107				s := asStr()
   108				const example = "2011-01-23T05:23:12"
   109				if len(s) < len(example) || s[4] != '-' && s[10] != 'T' {
   110					panic("doesn't look like a time: " + s)
   111				}
   112				buf.WriteString(reverseTimeString(s))
   113			case typeBlobRef:
   114				if br, ok := arg.(blob.Ref); ok {
   115					if br.Valid() {
   116						buf.WriteString(br.String())
   117					}
   118					break
   119				}
   120				fallthrough
   121			default:
   122				if s, ok := arg.(string); ok {
   123					buf.WriteString(s)
   124				} else {
   125					buf.WriteString(arg.(fmt.Stringer).String())
   126				}
   127			}
   128		}
   129		if isPrefix {
   130			buf.WriteString("|")
   131		}
   132		return buf.String()
   133	}
   134	
   135	type part struct {
   136		name string
   137		typ  partType
   138	}
   139	
   140	type partType int
   141	
   142	const (
   143		typeKeyId partType = iota // PGP key id
   144		typeTime
   145		typeReverseTime // time prepended with "rt" + each numeric digit reversed from '9'
   146		typeBlobRef
   147		typeStr    // URL-escaped
   148		typeIntStr // integer as string
   149		typeRawStr // not URL-escaped
   150	)
   151	
   152	var (
   153		// keySchemaVersion indexes the index schema version.
   154		keySchemaVersion = &keyType{
   155			"schemaversion",
   156			nil,
   157			[]part{
   158				{"version", typeIntStr},
   159			},
   160		}
   161	
   162		keyMissing = &keyType{
   163			"missing",
   164			[]part{
   165				{"have", typeBlobRef},
   166				{"needed", typeBlobRef},
   167			},
   168			[]part{
   169				{"1", typeStr},
   170			},
   171		}
   172	
   173		keySignerKeyID = &keyType{
   174			"signerkeyid",
   175			[]part{
   176				{"hashsum", typeBlobRef},
   177			},
   178			[]part{
   179				{"keyID", typeStr},
   180			},
   181		}
   182	
   183		// keyPermanodeClaim indexes when a permanode is modified (or deleted) by a claim.
   184		// It ties the affected permanode to the date of the modification, the responsible
   185		// claim, and the nature of the modification.
   186		keyPermanodeClaim = &keyType{
   187			"claim",
   188			[]part{
   189				{"permanode", typeBlobRef}, // modified permanode
   190				{"signer", typeKeyId},
   191				{"claimDate", typeTime},
   192				{"claim", typeBlobRef},
   193			},
   194			[]part{
   195				{"claimType", typeStr},
   196				{"attr", typeStr},
   197				{"value", typeStr},
   198				// And the signerRef, which seems redundant
   199				// with the signer keyId in the jey, but the
   200				// Claim struct needs this, and there's 1:m
   201				// for keyId:blobRef, so:
   202				{"signerRef", typeBlobRef},
   203			},
   204		}
   205	
   206		keyRecentPermanode = &keyType{
   207			"recpn",
   208			[]part{
   209				{"owner", typeKeyId},
   210				{"modtime", typeReverseTime},
   211				{"claim", typeBlobRef},
   212			},
   213			nil,
   214		}
   215	
   216		keyPathBackward = &keyType{
   217			"signertargetpath",
   218			[]part{
   219				{"signer", typeKeyId},
   220				{"target", typeBlobRef},
   221				{"claim", typeBlobRef}, // for key uniqueness
   222			},
   223			[]part{
   224				{"claimDate", typeTime},
   225				{"base", typeBlobRef},
   226				{"active", typeStr}, // 'Y', or 'N' for deleted
   227				{"suffix", typeStr},
   228			},
   229		}
   230	
   231		keyPathForward = &keyType{
   232			"path",
   233			[]part{
   234				{"signer", typeKeyId},
   235				{"base", typeBlobRef},
   236				{"suffix", typeStr},
   237				{"claimDate", typeReverseTime},
   238				{"claim", typeBlobRef}, // for key uniqueness
   239			},
   240			[]part{
   241				{"active", typeStr}, // 'Y', or 'N' for deleted
   242				{"target", typeBlobRef},
   243			},
   244		}
   245	
   246		keyWholeToFileRef = &keyType{
   247			"wholetofile",
   248			[]part{
   249				{"whole", typeBlobRef},
   250				{"schema", typeBlobRef}, // for key uniqueness
   251			},
   252			[]part{
   253				{"1", typeStr},
   254			},
   255		}
   256	
   257		keyFileInfo = &keyType{
   258			"fileinfo",
   259			[]part{
   260				{"file", typeBlobRef},
   261			},
   262			[]part{
   263				{"size", typeIntStr},
   264				{"filename", typeStr},
   265				{"mimetype", typeStr},
   266				{"whole", typeBlobRef},
   267			},
   268		}
   269	
   270		keyFileTimes = &keyType{
   271			"filetimes",
   272			[]part{
   273				{"file", typeBlobRef},
   274			},
   275			[]part{
   276				// 0, 1, or 2 comma-separated types.Time3339
   277				// strings for creation/mod times. Oldest,
   278				// then newest. See FileInfo docs.
   279				{"time3339s", typeStr},
   280			},
   281		}
   282	
   283		keySignerAttrValue = &keyType{
   284			"signerattrvalue",
   285			[]part{
   286				{"signer", typeKeyId},
   287				{"attr", typeStr},
   288				{"value", typeStr},
   289				{"claimdate", typeReverseTime},
   290				{"claimref", typeBlobRef},
   291			},
   292			[]part{
   293				{"permanode", typeBlobRef},
   294			},
   295		}
   296	
   297		// keyDeleted indexes a claim that deletes an entity. It ties the deleted
   298		// entity to the date it was deleted, and to the deleter claim.
   299		keyDeleted = &keyType{
   300			"deleted",
   301			[]part{
   302				{"deleted", typeBlobRef}, // the deleted entity (a permanode or another claim)
   303				{"claimdate", typeReverseTime},
   304				{"deleter", typeBlobRef}, // the deleter claim blobref
   305			},
   306			nil,
   307		}
   308	
   309		// Given a blobref (permanode or static file or directory), provide a mapping
   310		// to potential parents (they may no longer be parents, in the case of permanodes).
   311		// In the case of permanodes, camliMember or camliContent constitutes a forward
   312		// edge.  In the case of static directories, the forward path is dir->static set->file,
   313		// and that's what's indexed here, inverted.
   314		keyEdgeBackward = &keyType{
   315			"edgeback",
   316			[]part{
   317				{"child", typeBlobRef},  // the edge target; thing we want to find parent(s) of
   318				{"parent", typeBlobRef}, // the parent / edge source (e.g. permanode blobref)
   319				// the blobref is the blob establishing the relationship
   320				// (for a permanode: the claim; for static: often same as parent)
   321				{"blobref", typeBlobRef},
   322			},
   323			[]part{
   324				{"parenttype", typeStr}, // either "permanode" or the camliType ("file", "static-set", etc)
   325				{"name", typeStr},       // the name, if static.
   326			},
   327		}
   328	
   329		// Width and height after any EXIF rotation.
   330		keyImageSize = &keyType{
   331			"imagesize",
   332			[]part{
   333				{"fileref", typeBlobRef}, // blobref of "file" schema blob
   334			},
   335			[]part{
   336				{"width", typeStr},
   337				{"height", typeStr},
   338			},
   339		}
   340	
   341		// child of a directory
   342		keyStaticDirChild = &keyType{
   343			"dirchild",
   344			[]part{
   345				{"dirref", typeBlobRef}, // blobref of "directory" schema blob
   346				{"child", typeStr},      // blobref of the child
   347			},
   348			[]part{
   349				{"1", typeStr},
   350			},
   351		}
   352	
   353		// Media attributes (e.g. ID3 tags). Uses generic terms like
   354		// "artist", "title", "album", etc.
   355		keyMediaTag = &keyType{
   356			"mediatag",
   357			[]part{
   358				{"wholeRef", typeBlobRef}, // wholeRef for song
   359				{"tag", typeStr},
   360			},
   361			[]part{
   362				{"value", typeStr},
   363			},
   364		}
   365	
   366		// EXIF tags
   367		keyEXIFTag = &keyType{
   368			"exiftag",
   369			[]part{
   370				{"wholeRef", typeBlobRef}, // of entire file, not fileref
   371				{"tag", typeStr},          // uint16 tag number as hex: xxxx
   372			},
   373			[]part{
   374				{"type", typeStr},    // "int", "rat", "float", "string"
   375				{"n", typeIntStr},    // n components of type
   376				{"vals", typeRawStr}, // pipe-separated; rats are n/d. strings are URL-escaped.
   377			},
   378		}
   379	
   380		// Redundant version of keyEXIFTag. TODO: maybe get rid of this.
   381		// Easier to process as one row instead of 4, though.
   382		keyEXIFGPS = &keyType{
   383			"exifgps",
   384			[]part{
   385				{"wholeRef", typeBlobRef}, // of entire file, not fileref
   386			},
   387			[]part{
   388				{"lat", typeRawStr},
   389				{"long", typeRawStr},
   390			},
   391		}
   392	)
   393	
   394	func containsUnsafeRawStrByte(s string) bool {
   395		for _, r := range s {
   396			if r >= 'z' || r < ' ' {
   397				// pipe ('|) and non-ASCII are above 'z'.
   398				return true
   399			}
   400			if r == '%' || r == '+' {
   401				// Could be interpreted as URL-encoded
   402				return true
   403			}
   404		}
   405		return false
   406	}
Website layout inspired by memcached.
Content by the authors.