Skip to content

Commit

Permalink
internal/repocache: new package
Browse files Browse the repository at this point in the history
Proof of concept that populates a SQLite database with objects from a repository.
  • Loading branch information
zombiezen committed Dec 2, 2023
1 parent b990193 commit 0a5c431
Show file tree
Hide file tree
Showing 9 changed files with 801 additions and 1 deletion.
2 changes: 1 addition & 1 deletion default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ in buildGoModule {
"-X" "main.buildCommit=${commit}"
];

vendorHash = "sha256-56Sah030xbWsoOu8r3c3nN2UGHvQORheavebP+Z1Wc8=";
vendorHash = "sha256-S8g7BAdGyt4RznioaRO5jmLDMX4O6JZqt1ul3RKAuDQ=";

nativeBuildInputs = [ pandoc installShellFiles makeWrapper ];
nativeCheckInputs = [ bash coreutils git ];
Expand Down
12 changes: 12 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,16 @@ require (
golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691
golang.org/x/sys v0.1.0
golang.org/x/tools v0.2.0
zombiezen.com/go/sqlite v0.13.1
)

require (
github.com/dustin/go-humanize v1.0.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/mattn/go-isatty v0.0.16 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
modernc.org/libc v1.22.3 // indirect
modernc.org/mathutil v1.5.0 // indirect
modernc.org/memory v1.5.0 // indirect
modernc.org/sqlite v1.21.1 // indirect
)
20 changes: 20 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
Expand Down Expand Up @@ -102,6 +104,8 @@ github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hf
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
Expand All @@ -113,8 +117,13 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
Expand Down Expand Up @@ -233,6 +242,7 @@ golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down Expand Up @@ -375,6 +385,16 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
modernc.org/libc v1.22.3 h1:D/g6O5ftAfavceqlLOFwaZuA5KYafKwmr30A6iSqoyY=
modernc.org/libc v1.22.3/go.mod h1:MQrloYP209xa2zHome2a8HLiLm6k0UT8CoHpV74tOFw=
modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds=
modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU=
modernc.org/sqlite v1.21.1 h1:GyDFqNnESLOhwwDRaHGdp2jKLDzpyT/rNLglX3ZkMSU=
modernc.org/sqlite v1.21.1/go.mod h1:XwQ0wZPIh1iKb5mkvCJ3szzbhk+tykC8ZWqTRTgYRwI=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
zombiezen.com/go/sqlite v0.13.1 h1:qDzxyWWmMtSSEH5qxamqBFmqA2BLSSbtODi3ojaE02o=
zombiezen.com/go/sqlite v0.13.1/go.mod h1:Ht/5Rg3Ae2hoyh1I7gbWtWAl89CNocfqeb/aAMTkJr4=
12 changes: 12 additions & 0 deletions internal/repocache/objects/find.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
select
"oid" as "oid",
"type" as "type",
"size" as "uncompressed_size",
length("content") as "compressed_size"
from "objects"
where
"sha1" = :sha1 and
"type" is not null and
"size" >= 0 and
"content" is not null
limit 1;
18 changes: 18 additions & 0 deletions internal/repocache/objects/insert.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
insert into "objects" (
"sha1",
"type",
"size",
"content"
) values (
:sha1,
:type,
:uncompressed_size,
zeroblob(:compressed_size)
)
on conflict ("sha1") do
update set
"type" = :type,
"size" = :uncompressed_size,
"content" = zeroblob(:compressed_size)
where "size" < 0 or "type" is null or "content" is null
returning "oid" as "oid";
269 changes: 269 additions & 0 deletions internal/repocache/repocache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
// Copyright 2023 The gg Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// SPDX-License-Identifier: Apache-2.0

// Package repocache provides optimized queries over a Git repository
// using an on-disk index.
package repocache

import (
"compress/zlib"
"context"
"crypto/sha1"
"embed"
"errors"
"fmt"
"io"

"gg-scm.io/pkg/git/githash"
"gg-scm.io/pkg/git/object"
"zombiezen.com/go/sqlite"
"zombiezen.com/go/sqlite/ext/refunc"
"zombiezen.com/go/sqlite/sqlitex"
)

const (
objectsTable = "objects"
contentColumn = "content"
)

//go:embed schema.sql
//go:embed objects/*.sql
var sqlFiles embed.FS

const appID int32 = 0x40a9233d

const currentUserVersion = 1

// Cache represents an open connection to a cache database.
type Cache struct {
conn *sqlite.Conn
}

// Open opens a cache file on disk, creating it if necessary.
func Open(ctx context.Context, path string) (*Cache, error) {
conn, err := sqlite.OpenConn(path, sqlite.OpenCreate|sqlite.OpenReadWrite)
if err != nil {
return nil, fmt.Errorf("open git repo cache %s: %w", path, err)
}
if err := refunc.Register(conn); err != nil {
conn.Close()
return nil, fmt.Errorf("open git repo cache %s: %w", path, err)
}
if err := sqlitex.ExecuteTransient(conn, "PRAGMA page_size = 8192;", nil); err != nil {
conn.Close()
return nil, fmt.Errorf("open git repo cache %s: %w", path, err)
}

conn.SetInterrupt(ctx.Done())
if err := migrate(conn); err != nil {
conn.Close()
return nil, fmt.Errorf("open git repo cache %s: %w", path, err)
}
if err := sqlitex.ExecuteTransient(conn, `PRAGMA foreign_keys = on;`, nil); err != nil {
conn.Close()
return nil, fmt.Errorf("open git repo cache %s: %w", path, err)
}
conn.SetInterrupt(nil)
return &Cache{conn}, nil
}

func migrate(conn *sqlite.Conn) (err error) {
endFn, err := sqlitex.ImmediateTransaction(conn)
if err != nil {
return err
}
defer endFn(&err)

gotVersion, err := ensureAppID(conn)
if err != nil {
return err
}
if gotVersion != currentUserVersion {
if err := dropAllTables(conn); err != nil {
return err
}
}
if err := sqlitex.ExecuteScriptFS(conn, sqlFiles, "schema.sql", nil); err != nil {
return err
}
userVersionStmt := fmt.Sprintf("PRAGMA user_version = %d;", currentUserVersion)
if err := sqlitex.ExecuteTransient(conn, userVersionStmt, nil); err != nil {
return err
}
return nil
}

// Cat copies the content of the given object from the cache into dst.
// If the object is not present in the cache,
// then Cat will return an error that wraps [ErrObjectNotFound].
// If Cat does not return an error,
// it guarantees that the bytes written to dst match the hash.
func (c *Cache) Cat(ctx context.Context, dst io.Writer, id githash.SHA1) (_ object.Type, err error) {
c.conn.SetInterrupt(ctx.Done())
defer c.conn.SetInterrupt(nil)
defer sqlitex.Transaction(c.conn)(&err)

return c.cat(dst, id)
}

func (c *Cache) stat(id githash.SHA1) (oid int64, tp object.Type, uncompressedSize int64, err error) {
uncompressedSize = -1
err = sqlitex.ExecuteTransientFS(c.conn, sqlFiles, "objects/find.sql", &sqlitex.ExecOptions{
Named: map[string]any{
":sha1": id[:],
},
ResultFunc: func(stmt *sqlite.Stmt) error {
oid = stmt.GetInt64("oid")
tp = object.Type(stmt.GetText("type"))
uncompressedSize = stmt.GetInt64("uncompressed_size")
return nil
},
})
if err != nil {
return 0, "", 0, fmt.Errorf("read git object %v: %v", id, err)
}
if uncompressedSize < 0 {
return 0, "", 0, fmt.Errorf("read git object %v: %w", id, ErrObjectNotFound)
}
return oid, tp, uncompressedSize, nil
}

func (c *Cache) cat(dst io.Writer, id githash.SHA1) (_ object.Type, err error) {
defer sqlitex.Save(c.conn)(&err)

oid, tp, uncompressedSize, err := c.stat(id)
if err != nil {
return "", err
}
compressedContent, err := c.conn.OpenBlob("", objectsTable, contentColumn, oid, false)
if err != nil {
return "", fmt.Errorf("read git object %v: %v", id, err)
}
defer compressedContent.Close()
h := sha1.New()
h.Write(object.AppendPrefix(nil, tp, uncompressedSize))
uncompressedContent, err := zlib.NewReader(compressedContent)
if err != nil {
return "", fmt.Errorf("read git object %v: %v", id, err)
}
gotSize, err := io.Copy(io.MultiWriter(h, dst), uncompressedContent)
uncompressedContent.Close()
if err != nil {
return "", fmt.Errorf("read git object %v: %v", id, err)
}
if gotSize != uncompressedSize {
return "", fmt.Errorf("read git object %v: corrupted content (advertised size was %d bytes; found %d bytes)", id, uncompressedSize, gotSize)
}
var gotHash githash.SHA1
h.Sum(gotHash[:0])
if gotHash != id {
return "", fmt.Errorf("read git object %v: corrupted content (hash = %v)", id, gotHash)
}
return tp, nil
}

// Close releases all resources associated with the cache connection.
func (c *Cache) Close() error {
return c.conn.Close()
}

func dropAllTables(conn *sqlite.Conn) (err error) {
defer sqlitex.Save(conn)(&err)

var tables, views []string
const query = `SELECT "type", "name" FROM sqlite_schema WHERE "type" in ('table', 'view');`
err = sqlitex.ExecuteTransient(conn, query, &sqlitex.ExecOptions{
ResultFunc: func(stmt *sqlite.Stmt) error {
name := stmt.ColumnText(1)
switch stmt.ColumnText(0) {
case "table":
tables = append(tables, name)
case "view":
views = append(views, name)
}
return nil
},
})
if err != nil {
return fmt.Errorf("drop all tables: %w", err)
}
for _, name := range views {
if err := sqlitex.ExecuteTransient(conn, `DROP VIEW "`+name+`";`, nil); err != nil {
return fmt.Errorf("drop all tables: %w", err)
}
}
for _, name := range tables {
if err := sqlitex.ExecuteTransient(conn, `DROP TABLE "`+name+`";`, nil); err != nil {
return fmt.Errorf("drop all tables: %w", err)
}
}
return nil
}

func userVersion(conn *sqlite.Conn) (int32, error) {
var version int32
err := sqlitex.ExecuteTransient(conn, "PRAGMA user_version;", &sqlitex.ExecOptions{
ResultFunc: func(stmt *sqlite.Stmt) error {
version = stmt.ColumnInt32(0)
return nil
},
})
if err != nil {
return 0, fmt.Errorf("get database user_version: %w", err)
}
return version, nil
}

func ensureAppID(conn *sqlite.Conn) (schemaVersion int32, err error) {
defer sqlitex.Save(conn)(&err)

var hasSchema bool
err = sqlitex.ExecuteTransient(conn, "VALUES ((SELECT COUNT(*) FROM sqlite_master) > 0);", &sqlitex.ExecOptions{
ResultFunc: func(stmt *sqlite.Stmt) error {
hasSchema = stmt.ColumnInt(0) != 0
return nil
},
})
if err != nil {
return 0, err
}
var dbAppID int32
err = sqlitex.ExecuteTransient(conn, "PRAGMA application_id;", &sqlitex.ExecOptions{
ResultFunc: func(stmt *sqlite.Stmt) error {
dbAppID = stmt.ColumnInt32(0)
return nil
},
})
if err != nil {
return 0, err
}
if dbAppID != appID && !(dbAppID == 0 && !hasSchema) {
return 0, fmt.Errorf("database application_id = %#x (expected %#x)", dbAppID, appID)
}
schemaVersion, err = userVersion(conn)
if err != nil {
return 0, err
}
// Using Sprintf because PRAGMAs don't permit arbitrary expressions, and thus
// don't permit using parameter substitution.
err = sqlitex.ExecuteTransient(conn, fmt.Sprintf("PRAGMA application_id = %d;", appID), nil)
if err != nil {
return 0, err
}
return schemaVersion, nil
}

var ErrObjectNotFound = errors.New("git object not found")
Loading

0 comments on commit 0a5c431

Please sign in to comment.