1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
16
17
18 package blob
19
20 import (
21 "bytes"
22 "crypto/sha1"
23 "crypto/sha256"
24 "errors"
25 "fmt"
26 "hash"
27 "io"
28 "reflect"
29 "strings"
30
31 "perkeep.org/internal/testhooks"
32 )
33
34
35
36 const Pattern = `\b([a-z][a-z0-9]*)-([a-f0-9]+)\b`
37
38
39
40
41 type Ref struct {
42 digest digestType
43 }
44
45
46
47 type SizedRef struct {
48 Ref Ref `json:"blobRef"`
49 Size uint32 `json:"size"`
50 }
51
52
53 func (sr SizedRef) Less(o SizedRef) bool {
54 return sr.Ref.Less(o.Ref)
55 }
56
57 func (sr SizedRef) Valid() bool { return sr.Ref.Valid() }
58
59 func (sr SizedRef) HashMatches(h hash.Hash) bool { return sr.Ref.HashMatches(h) }
60
61 func (sr SizedRef) String() string {
62 return fmt.Sprintf("[%s; %d bytes]", sr.Ref.String(), sr.Size)
63 }
64
65
66
67
68 type digestType interface {
69 bytes() []byte
70 digestName() string
71 newHash() hash.Hash
72 equalString(string) bool
73 hasPrefix(string) bool
74 }
75
76 func (r Ref) String() string {
77 if r.digest == nil {
78 return "<invalid-blob.Ref>"
79 }
80 dname := r.digest.digestName()
81 bs := r.digest.bytes()
82 buf := getBuf(len(dname) + 1 + len(bs)*2)[:0]
83 defer putBuf(buf)
84 return string(r.appendString(buf))
85 }
86
87
88 func (r Ref) StringMinusOne() string {
89 if r.digest == nil {
90 return "<invalid-blob.Ref>"
91 }
92 dname := r.digest.digestName()
93 bs := r.digest.bytes()
94 buf := getBuf(len(dname) + 1 + len(bs)*2)[:0]
95 defer putBuf(buf)
96 buf = r.appendString(buf)
97 buf[len(buf)-1]--
98 return string(buf)
99 }
100
101
102
103 func (r Ref) EqualString(s string) bool { return r.digest.equalString(s) }
104
105
106
107
108
109 func (r Ref) HasPrefix(s string) bool { return r.digest.hasPrefix(s) }
110
111 func (r Ref) appendString(buf []byte) []byte {
112 dname := r.digest.digestName()
113 bs := r.digest.bytes()
114 buf = append(buf, dname...)
115 buf = append(buf, '-')
116 for _, b := range bs {
117 buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf])
118 }
119 if o, ok := r.digest.(otherDigest); ok && o.odd {
120 buf = buf[:len(buf)-1]
121 }
122 return buf
123 }
124
125
126
127 func (r Ref) HashName() string {
128 if r.digest == nil {
129 panic("HashName called on invalid Ref")
130 }
131 return r.digest.digestName()
132 }
133
134
135
136 func (r Ref) Digest() string {
137 if r.digest == nil {
138 panic("Digest called on invalid Ref")
139 }
140 bs := r.digest.bytes()
141 buf := getBuf(len(bs) * 2)[:0]
142 defer putBuf(buf)
143 for _, b := range bs {
144 buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf])
145 }
146 if o, ok := r.digest.(otherDigest); ok && o.odd {
147 buf = buf[:len(buf)-1]
148 }
149 return string(buf)
150 }
151
152 func (r Ref) DigestPrefix(digits int) string {
153 v := r.Digest()
154 if len(v) < digits {
155 return v
156 }
157 return v[:digits]
158 }
159
160 func (r Ref) DomID() string {
161 if !r.Valid() {
162 return ""
163 }
164 return "camli-" + r.String()
165 }
166
167 func (r Ref) Sum32() uint32 {
168 var v uint32
169 for _, b := range r.digest.bytes()[:4] {
170 v = v<<8 | uint32(b)
171 }
172 return v
173 }
174
175 func (r Ref) Sum64() uint64 {
176 var v uint64
177 for _, b := range r.digest.bytes()[:8] {
178 v = v<<8 | uint64(b)
179 }
180 return v
181 }
182
183
184
185 func (r Ref) Hash() hash.Hash {
186 return r.digest.newHash()
187 }
188
189 func (r Ref) HashMatches(h hash.Hash) bool {
190 if r.digest == nil {
191 return false
192 }
193 return bytes.Equal(h.Sum(nil), r.digest.bytes())
194 }
195
196 const hexDigit = "0123456789abcdef"
197
198 func (r Ref) Valid() bool { return r.digest != nil }
199
200 func (r Ref) IsSupported() bool {
201 if !r.Valid() {
202 return false
203 }
204 _, ok := metaFromString[r.digest.digestName()]
205 return ok
206 }
207
208
209
210
211 func ParseKnown(s string) (ref Ref, ok bool) {
212 return parse(s, false)
213 }
214
215
216
217 func Parse(s string) (ref Ref, ok bool) {
218 return parse(s, true)
219 }
220
221 func parse(s string, allowAll bool) (ref Ref, ok bool) {
222 i := strings.Index(s, "-")
223 if i < 0 {
224 return
225 }
226 name := s[:i]
227 hex := s[i+1:]
228 meta, ok := metaFromString[name]
229 if !ok {
230 if allowAll || testRefType[name] {
231 return parseUnknown(name, hex)
232 }
233 return
234 }
235 if len(hex) != meta.size*2 {
236 ok = false
237 return
238 }
239 dt, ok := meta.ctors(hex)
240 if !ok {
241 return
242 }
243 return Ref{dt}, true
244 }
245
246 var testRefType = map[string]bool{
247 "fakeref": true,
248 "testref": true,
249 "perma": true,
250 }
251
252
253 func ParseBytes(s []byte) (ref Ref, ok bool) {
254 i := bytes.IndexByte(s, '-')
255 if i < 0 {
256 return
257 }
258 name := s[:i]
259 hex := s[i+1:]
260 meta, ok := metaFromBytes(name)
261 if !ok {
262 return parseUnknown(string(name), string(hex))
263 }
264 if len(hex) != meta.size*2 {
265 ok = false
266 return
267 }
268 dt, ok := meta.ctorb(hex)
269 if !ok {
270 return
271 }
272 return Ref{dt}, true
273 }
274
275
276
277 func ParseOrZero(s string) Ref {
278 ref, ok := Parse(s)
279 if !ok {
280 return Ref{}
281 }
282 return ref
283 }
284
285
286 func MustParse(s string) Ref {
287 ref, ok := Parse(s)
288 if !ok {
289 panic("Invalid blobref " + s)
290 }
291 return ref
292 }
293
294
295 func hexVal(b byte, bad *bool) byte {
296 if '0' <= b && b <= '9' {
297 return b - '0'
298 }
299 if 'a' <= b && b <= 'f' {
300 return b - 'a' + 10
301 }
302 *bad = true
303 return 0
304 }
305
306 func validDigestName(name string) bool {
307 if name == "" {
308 return false
309 }
310 for _, r := range name {
311 if 'a' <= r && r <= 'z' {
312 continue
313 }
314 if '0' <= r && r <= '9' {
315 continue
316 }
317 return false
318 }
319 return true
320 }
321
322
323
324 func parseUnknown(digest, hex string) (ref Ref, ok bool) {
325 if !validDigestName(digest) {
326 return
327 }
328
329
330 odd := false
331 if len(hex)%2 != 0 {
332 hex += "0"
333 odd = true
334 }
335
336 if len(hex) < 2 || len(hex)%2 != 0 || len(hex) > maxOtherDigestLen*2 {
337 return
338 }
339 o := otherDigest{
340 name: digest,
341 sumLen: len(hex) / 2,
342 odd: odd,
343 }
344 bad := false
345 for i := 0; i < len(hex); i += 2 {
346 o.sum[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
347 }
348 if bad {
349 return
350 }
351 return Ref{o}, true
352 }
353
354 func sha1FromBinary(b []byte) digestType {
355 var d sha1Digest
356 if len(d) != len(b) {
357 panic("bogus sha-1 length")
358 }
359 copy(d[:], b)
360 return d
361 }
362
363 func sha1FromHexString(hex string) (digestType, bool) {
364 var d sha1Digest
365 var bad bool
366 for i := 0; i < len(hex); i += 2 {
367 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
368 }
369 if bad {
370 return nil, false
371 }
372 return d, true
373 }
374
375
376 func sha1FromHexBytes(hex []byte) (digestType, bool) {
377 var d sha1Digest
378 var bad bool
379 for i := 0; i < len(hex); i += 2 {
380 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
381 }
382 if bad {
383 return nil, false
384 }
385 return d, true
386 }
387
388 func sha224FromBinary(b []byte) digestType {
389 var d sha224Digest
390 if len(d) != len(b) {
391 panic("bogus sha-224 length")
392 }
393 copy(d[:], b)
394 return d
395 }
396
397 func sha224FromHexString(hex string) (digestType, bool) {
398 var d sha224Digest
399 var bad bool
400 for i := 0; i < len(hex); i += 2 {
401 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
402 }
403 if bad {
404 return nil, false
405 }
406 return d, true
407 }
408
409
410 func sha224FromHexBytes(hex []byte) (digestType, bool) {
411 var d sha224Digest
412 var bad bool
413 for i := 0; i < len(hex); i += 2 {
414 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
415 }
416 if bad {
417 return nil, false
418 }
419 return d, true
420 }
421
422
423
424 func RefFromHash(h hash.Hash) Ref {
425 meta, ok := metaFromType[hashSig{reflect.TypeOf(h), h.Size()}]
426 if !ok {
427 panic(fmt.Sprintf("Currently-unsupported hash type %T", h))
428 }
429 return Ref{meta.ctor(h.Sum(nil))}
430 }
431
432
433
434 func RefFromString(s string) Ref {
435 h := NewHash()
436 io.WriteString(h, s)
437 return RefFromHash(h)
438 }
439
440
441
442 func RefFromBytes(b []byte) Ref {
443 h := NewHash()
444 h.Write(b)
445 return RefFromHash(h)
446 }
447
448 type sha1Digest [20]byte
449
450 func (d sha1Digest) digestName() string { return "sha1" }
451 func (d sha1Digest) bytes() []byte { return d[:] }
452 func (d sha1Digest) newHash() hash.Hash { return sha1.New() }
453 func (d sha1Digest) equalString(s string) bool {
454 if len(s) != 45 {
455 return false
456 }
457 if !strings.HasPrefix(s, "sha1-") {
458 return false
459 }
460 s = s[len("sha1-"):]
461 for i, b := range d[:] {
462 if s[i*2] != hexDigit[b>>4] || s[i*2+1] != hexDigit[b&0xf] {
463 return false
464 }
465 }
466 return true
467 }
468
469 func (d sha1Digest) hasPrefix(s string) bool {
470 if len(s) > 45 {
471 return false
472 }
473 if len(s) == 45 {
474 return d.equalString(s)
475 }
476 if !strings.HasPrefix(s, "sha1-") {
477 return false
478 }
479 s = s[len("sha1-"):]
480 if len(s) == 0 {
481
482 return false
483 }
484 for i, b := range d[:] {
485 even := i * 2
486 if even == len(s) {
487 break
488 }
489 if s[even] != hexDigit[b>>4] {
490 return false
491 }
492 odd := i*2 + 1
493 if odd == len(s) {
494 break
495 }
496 if s[odd] != hexDigit[b&0xf] {
497 return false
498 }
499 }
500 return true
501 }
502
503 type sha224Digest [28]byte
504
505 const sha224StrLen = 63
506
507 func (d sha224Digest) digestName() string { return "sha224" }
508 func (d sha224Digest) bytes() []byte { return d[:] }
509 func (d sha224Digest) newHash() hash.Hash { return sha256.New224() }
510 func (d sha224Digest) equalString(s string) bool {
511 if len(s) != sha224StrLen {
512 return false
513 }
514 if !strings.HasPrefix(s, "sha224-") {
515 return false
516 }
517 s = s[len("sha224-"):]
518 for i, b := range d[:] {
519 if s[i*2] != hexDigit[b>>4] || s[i*2+1] != hexDigit[b&0xf] {
520 return false
521 }
522 }
523 return true
524 }
525
526 func (d sha224Digest) hasPrefix(s string) bool {
527 if len(s) > sha224StrLen {
528 return false
529 }
530 if len(s) == sha224StrLen {
531 return d.equalString(s)
532 }
533 if !strings.HasPrefix(s, "sha224-") {
534 return false
535 }
536 s = s[len("sha224-"):]
537 if len(s) == 0 {
538
539 return false
540 }
541 for i, b := range d[:] {
542 even := i * 2
543 if even == len(s) {
544 break
545 }
546 if s[even] != hexDigit[b>>4] {
547 return false
548 }
549 odd := i*2 + 1
550 if odd == len(s) {
551 break
552 }
553 if s[odd] != hexDigit[b&0xf] {
554 return false
555 }
556 }
557 return true
558 }
559
560 const maxOtherDigestLen = 128
561
562 type otherDigest struct {
563 name string
564 sum [maxOtherDigestLen]byte
565 sumLen int
566 odd bool
567 }
568
569 func (d otherDigest) digestName() string { return d.name }
570 func (d otherDigest) bytes() []byte { return d.sum[:d.sumLen] }
571 func (d otherDigest) newHash() hash.Hash { return nil }
572 func (d otherDigest) equalString(s string) bool {
573 wantLen := len(d.name) + len("-") + 2*d.sumLen
574 if d.odd {
575 wantLen--
576 }
577 if len(s) != wantLen || !strings.HasPrefix(s, d.name) || s[len(d.name)] != '-' {
578 return false
579 }
580 s = s[len(d.name)+1:]
581 for i, b := range d.sum[:d.sumLen] {
582 if s[i*2] != hexDigit[b>>4] {
583 return false
584 }
585 if i == d.sumLen-1 && d.odd {
586 break
587 }
588 if s[i*2+1] != hexDigit[b&0xf] {
589 return false
590 }
591 }
592 return true
593 }
594
595 func (d otherDigest) hasPrefix(s string) bool {
596 maxLen := len(d.name) + len("-") + 2*d.sumLen
597 if d.odd {
598 maxLen--
599 }
600 if len(s) > maxLen || !strings.HasPrefix(s, d.name) || s[len(d.name)] != '-' {
601 return false
602 }
603 if len(s) == maxLen {
604 return d.equalString(s)
605 }
606 s = s[len(d.name)+1:]
607 if len(s) == 0 {
608
609 return false
610 }
611 for i, b := range d.sum[:d.sumLen] {
612 even := i * 2
613 if even == len(s) {
614 break
615 }
616 if s[even] != hexDigit[b>>4] {
617 return false
618 }
619 odd := i*2 + 1
620 if odd == len(s) {
621 break
622 }
623 if i == d.sumLen-1 && d.odd {
624 break
625 }
626 if s[odd] != hexDigit[b&0xf] {
627 return false
628 }
629 }
630 return true
631 }
632
633 var (
634 sha1Meta = &digestMeta{
635 ctor: sha1FromBinary,
636 ctors: sha1FromHexString,
637 ctorb: sha1FromHexBytes,
638 size: sha1.Size,
639 }
640 sha224Meta = &digestMeta{
641 ctor: sha224FromBinary,
642 ctors: sha224FromHexString,
643 ctorb: sha224FromHexBytes,
644 size: sha256.Size224,
645 }
646 )
647
648 var metaFromString = map[string]*digestMeta{
649 "sha1": sha1Meta,
650 "sha224": sha224Meta,
651 }
652
653 type blobTypeAndMeta struct {
654 name []byte
655 meta *digestMeta
656 }
657
658 var metas []blobTypeAndMeta
659
660 func metaFromBytes(name []byte) (meta *digestMeta, ok bool) {
661 for _, bm := range metas {
662 if bytes.Equal(name, bm.name) {
663 return bm.meta, true
664 }
665 }
666 return
667 }
668
669 func init() {
670 for name, meta := range metaFromString {
671 metas = append(metas, blobTypeAndMeta{
672 name: []byte(name),
673 meta: meta,
674 })
675 }
676 }
677
678
679 func HashFuncs() []string {
680 hashes := make([]string, len(metas))
681 for i, m := range metas {
682 hashes[i] = string(m.name)
683 }
684 return hashes
685 }
686
687 var (
688 sha1Type = reflect.TypeOf(sha1.New())
689 sha224Type = reflect.TypeOf(sha256.New224())
690 )
691
692
693
694
695 type hashSig struct {
696 rt reflect.Type
697 size int
698 }
699
700 var metaFromType = map[hashSig]*digestMeta{
701 {sha1Type, sha1.Size}: sha1Meta,
702 {sha224Type, sha256.Size224}: sha224Meta,
703 }
704
705 type digestMeta struct {
706 ctor func(binary []byte) digestType
707 ctors func(hex string) (digestType, bool)
708 ctorb func(hex []byte) (digestType, bool)
709 size int
710 }
711
712 var bufPool = make(chan []byte, 20)
713
714 func getBuf(size int) []byte {
715 for {
716 select {
717 case b := <-bufPool:
718 if cap(b) >= size {
719 return b[:size]
720 }
721 default:
722 return make([]byte, size)
723 }
724 }
725 }
726
727 func putBuf(b []byte) {
728 select {
729 case bufPool <- b:
730 default:
731 }
732 }
733
734
735
736 func NewHash() hash.Hash {
737 if testhooks.UseSHA1() {
738 return sha1.New()
739 }
740 return sha256.New224()
741 }
742
743 func ValidRefString(s string) bool {
744
745 return ParseOrZero(s).Valid()
746 }
747
748 var null = []byte(`null`)
749
750 func (r *Ref) UnmarshalJSON(d []byte) error {
751 if r.digest != nil {
752 return errors.New("Can't UnmarshalJSON into a non-zero Ref")
753 }
754 if len(d) == 0 || bytes.Equal(d, null) {
755 return nil
756 }
757 if len(d) < 2 || d[0] != '"' || d[len(d)-1] != '"' {
758 return fmt.Errorf("blob: expecting a JSON string to unmarshal, got %q", d)
759 }
760 d = d[1 : len(d)-1]
761 p, ok := ParseBytes(d)
762 if !ok {
763 return fmt.Errorf("blobref: invalid blobref %q (%d)", d, len(d))
764 }
765 *r = p
766 return nil
767 }
768
769 func (r Ref) MarshalJSON() ([]byte, error) {
770 if !r.Valid() {
771 return null, nil
772 }
773 dname := r.digest.digestName()
774 bs := r.digest.bytes()
775 buf := make([]byte, 0, 3+len(dname)+len(bs)*2)
776 buf = append(buf, '"')
777 buf = r.appendString(buf)
778 buf = append(buf, '"')
779 return buf, nil
780 }
781
782
783 func (r Ref) MarshalBinary() (data []byte, err error) {
784 dname := r.digest.digestName()
785 bs := r.digest.bytes()
786 data = make([]byte, 0, len(dname)+1+len(bs))
787 data = append(data, dname...)
788 data = append(data, '-')
789 data = append(data, bs...)
790 return
791 }
792
793
794 func (r *Ref) UnmarshalBinary(data []byte) error {
795 if r.digest != nil {
796 return errors.New("Can't UnmarshalBinary into a non-zero Ref")
797 }
798 i := bytes.IndexByte(data, '-')
799 if i < 1 {
800 return errors.New("no digest name")
801 }
802
803 digName := string(data[:i])
804 buf := data[i+1:]
805
806 meta, ok := metaFromString[digName]
807 if !ok {
808 r2, ok := parseUnknown(digName, fmt.Sprintf("%x", buf))
809 if !ok {
810 return errors.New("invalid blobref binary data")
811 }
812 *r = r2
813 return nil
814 }
815 if len(buf) != meta.size {
816 return errors.New("wrong size of data for digest " + digName)
817 }
818 r.digest = meta.ctor(buf)
819 return nil
820 }
821
822
823 func (r Ref) Less(o Ref) bool {
824 if r.Valid() != o.Valid() {
825 return o.Valid()
826 }
827 if !r.Valid() {
828 return false
829 }
830 if n1, n2 := r.digest.digestName(), o.digest.digestName(); n1 != n2 {
831 return n1 < n2
832 }
833 return bytes.Compare(r.digest.bytes(), o.digest.bytes()) < 0
834 }
835
836
837 type ByRef []Ref
838
839 func (s ByRef) Len() int { return len(s) }
840 func (s ByRef) Less(i, j int) bool { return s[i].Less(s[j]) }
841 func (s ByRef) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
842
843
844 type SizedByRef []SizedRef
845
846 func (s SizedByRef) Len() int { return len(s) }
847 func (s SizedByRef) Less(i, j int) bool { return s[i].Less(s[j]) }
848 func (s SizedByRef) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
849
850
851
852 func TypeAlphabet(typ string) string {
853 switch typ {
854 case "sha1":
855 return hexDigit
856 case "sha224":
857 return hexDigit
858 }
859 return ""
860 }