hicli/database: refactor media cache

This commit is contained in:
Tulir Asokan 2024-10-20 13:17:30 +03:00
parent 0b59b2c733
commit 2c5738f7f2
7 changed files with 228 additions and 179 deletions

View file

@ -57,7 +57,7 @@ var ErrBadGateway = mautrix.RespError{
StatusCode: http.StatusBadGateway, StatusCode: http.StatusBadGateway,
} }
func (gmx *Gomuks) downloadMediaFromCache(ctx context.Context, w http.ResponseWriter, entry *database.CachedMedia, force bool) bool { func (gmx *Gomuks) downloadMediaFromCache(ctx context.Context, w http.ResponseWriter, entry *database.Media, force bool) bool {
if !entry.UseCache() { if !entry.UseCache() {
if force { if force {
mautrix.MNotFound.WithMessage("Media not found in cache").Write(w) mautrix.MNotFound.WithMessage("Media not found in cache").Write(w)
@ -97,7 +97,7 @@ func (gmx *Gomuks) cacheEntryToPath(hash []byte) string {
return filepath.Join(gmx.CacheDir, "media", hashPath[0:2], hashPath[2:4], hashPath[4:]) return filepath.Join(gmx.CacheDir, "media", hashPath[0:2], hashPath[2:4], hashPath[4:])
} }
func cacheEntryToHeaders(w http.ResponseWriter, entry *database.CachedMedia) { func cacheEntryToHeaders(w http.ResponseWriter, entry *database.Media) {
w.Header().Set("Content-Type", entry.MimeType) w.Header().Set("Content-Type", entry.MimeType)
w.Header().Set("Content-Length", strconv.FormatInt(entry.Size, 10)) w.Header().Set("Content-Length", strconv.FormatInt(entry.Size, 10))
w.Header().Set("Content-Disposition", mime.FormatMediaType(entry.ContentDisposition(), map[string]string{"filename": entry.FileName})) w.Header().Set("Content-Disposition", mime.FormatMediaType(entry.ContentDisposition(), map[string]string{"filename": entry.FileName}))
@ -122,7 +122,7 @@ func (gmx *Gomuks) DownloadMedia(w http.ResponseWriter, r *http.Request) {
Logger() Logger()
log := &logVal log := &logVal
ctx := log.WithContext(r.Context()) ctx := log.WithContext(r.Context())
cacheEntry, err := gmx.Client.DB.CachedMedia.Get(ctx, mxc) cacheEntry, err := gmx.Client.DB.Media.Get(ctx, mxc)
if err != nil { if err != nil {
log.Err(err).Msg("Failed to get cached media entry") log.Err(err).Msg("Failed to get cached media entry")
mautrix.MUnknown.WithMessage(fmt.Sprintf("Failed to get cached media entry: %v", err)).Write(w) mautrix.MUnknown.WithMessage(fmt.Sprintf("Failed to get cached media entry: %v", err)).Write(w)
@ -152,7 +152,7 @@ func (gmx *Gomuks) DownloadMedia(w http.ResponseWriter, r *http.Request) {
log.Err(err).Msg("Failed to download media") log.Err(err).Msg("Failed to download media")
var httpErr mautrix.HTTPError var httpErr mautrix.HTTPError
if cacheEntry == nil { if cacheEntry == nil {
cacheEntry = &database.CachedMedia{ cacheEntry = &database.Media{
MXC: mxc, MXC: mxc,
} }
} }
@ -179,7 +179,7 @@ func (gmx *Gomuks) DownloadMedia(w http.ResponseWriter, r *http.Request) {
cacheEntry.Error.Matrix = ptr.Ptr(ErrBadGateway.WithMessage(err.Error())) cacheEntry.Error.Matrix = ptr.Ptr(ErrBadGateway.WithMessage(err.Error()))
cacheEntry.Error.StatusCode = http.StatusBadGateway cacheEntry.Error.StatusCode = http.StatusBadGateway
} }
err = gmx.Client.DB.CachedMedia.Put(ctx, cacheEntry) err = gmx.Client.DB.Media.Put(ctx, cacheEntry)
if err != nil { if err != nil {
log.Err(err).Msg("Failed to save errored cache entry") log.Err(err).Msg("Failed to save errored cache entry")
} }
@ -190,7 +190,7 @@ func (gmx *Gomuks) DownloadMedia(w http.ResponseWriter, r *http.Request) {
_ = resp.Body.Close() _ = resp.Body.Close()
}() }()
if cacheEntry == nil { if cacheEntry == nil {
cacheEntry = &database.CachedMedia{ cacheEntry = &database.Media{
MXC: mxc, MXC: mxc,
MimeType: resp.Header.Get("Content-Type"), MimeType: resp.Header.Get("Content-Type"),
Size: resp.ContentLength, Size: resp.ContentLength,
@ -231,7 +231,7 @@ func (gmx *Gomuks) DownloadMedia(w http.ResponseWriter, r *http.Request) {
} }
cacheEntry.Hash = (*[32]byte)(fileHasher.Sum(nil)) cacheEntry.Hash = (*[32]byte)(fileHasher.Sum(nil))
cacheEntry.Error = nil cacheEntry.Error = nil
err = gmx.Client.DB.CachedMedia.Put(ctx, cacheEntry) err = gmx.Client.DB.Media.Put(ctx, cacheEntry)
if err != nil { if err != nil {
log.Err(err).Msg("Failed to save cache entry") log.Err(err).Msg("Failed to save cache entry")
mautrix.MUnknown.WithMessage(fmt.Sprintf("Failed to save cache entry: %v", err)).Write(w) mautrix.MUnknown.WithMessage(fmt.Sprintf("Failed to save cache entry: %v", err)).Write(w)
@ -333,7 +333,7 @@ func (gmx *Gomuks) UploadMedia(w http.ResponseWriter, r *http.Request) {
} }
func (gmx *Gomuks) uploadFile(ctx context.Context, checksum []byte, cacheFile *os.File, encrypt bool, fileSize int64, mimeType, fileName string) (*event.EncryptedFileInfo, id.ContentURIString, error) { func (gmx *Gomuks) uploadFile(ctx context.Context, checksum []byte, cacheFile *os.File, encrypt bool, fileSize int64, mimeType, fileName string) (*event.EncryptedFileInfo, id.ContentURIString, error) {
cm := &database.CachedMedia{ cm := &database.Media{
FileName: fileName, FileName: fileName,
MimeType: mimeType, MimeType: mimeType,
Size: fileSize, Size: fileSize,
@ -359,7 +359,7 @@ func (gmx *Gomuks) uploadFile(ctx context.Context, checksum []byte, cacheFile *o
return nil, "", fmt.Errorf("failed to close cache reader: %w", err) return nil, "", fmt.Errorf("failed to close cache reader: %w", err)
} }
cm.MXC = resp.ContentURI cm.MXC = resp.ContentURI
err = gmx.Client.DB.CachedMedia.Put(ctx, cm) err = gmx.Client.DB.Media.Put(ctx, cm)
if err != nil { if err != nil {
zerolog.Ctx(ctx).Err(err). zerolog.Ctx(ctx).Err(err).
Stringer("mxc", cm.MXC). Stringer("mxc", cm.MXC).

View file

@ -1,149 +0,0 @@
// Copyright (c) 2024 Tulir Asokan
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package database
import (
"context"
"database/sql"
"net/http"
"slices"
"time"
"go.mau.fi/util/dbutil"
"go.mau.fi/util/jsontime"
"maunium.net/go/mautrix"
"maunium.net/go/mautrix/crypto/attachment"
"maunium.net/go/mautrix/id"
)
const (
insertCachedMediaQuery = `
INSERT INTO cached_media (mxc, event_rowid, enc_file, file_name, mime_type, size, hash, error)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (mxc) DO NOTHING
`
upsertCachedMediaQuery = `
INSERT INTO cached_media (mxc, event_rowid, enc_file, file_name, mime_type, size, hash, error)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (mxc) DO UPDATE
SET enc_file = excluded.enc_file,
file_name = excluded.file_name,
mime_type = excluded.mime_type,
size = excluded.size,
hash = excluded.hash,
error = excluded.error
WHERE excluded.error IS NULL OR cached_media.hash IS NULL
`
getCachedMediaQuery = `
SELECT mxc, event_rowid, enc_file, file_name, mime_type, size, hash, error
FROM cached_media
WHERE mxc = $1
`
)
type CachedMediaQuery struct {
*dbutil.QueryHelper[*CachedMedia]
}
func (cmq *CachedMediaQuery) Add(ctx context.Context, cm *CachedMedia) error {
return cmq.Exec(ctx, insertCachedMediaQuery, cm.sqlVariables()...)
}
func (cmq *CachedMediaQuery) Put(ctx context.Context, cm *CachedMedia) error {
return cmq.Exec(ctx, upsertCachedMediaQuery, cm.sqlVariables()...)
}
func (cmq *CachedMediaQuery) Get(ctx context.Context, mxc id.ContentURI) (*CachedMedia, error) {
return cmq.QueryOne(ctx, getCachedMediaQuery, &mxc)
}
type MediaError struct {
Matrix *mautrix.RespError `json:"data"`
StatusCode int `json:"status_code"`
ReceivedAt jsontime.UnixMilli `json:"received_at"`
Attempts int `json:"attempts"`
}
const MaxMediaBackoff = 7 * 24 * time.Hour
func (me *MediaError) backoff() time.Duration {
return min(time.Duration(2<<me.Attempts)*time.Second, MaxMediaBackoff)
}
func (me *MediaError) UseCache() bool {
return me != nil && time.Since(me.ReceivedAt.Time) < me.backoff()
}
func (me *MediaError) Write(w http.ResponseWriter) {
if me.Matrix.ExtraData == nil {
me.Matrix.ExtraData = make(map[string]any)
}
me.Matrix.ExtraData["fi.mau.hicli.error_ts"] = me.ReceivedAt.UnixMilli()
me.Matrix.ExtraData["fi.mau.hicli.next_retry_ts"] = me.ReceivedAt.Add(me.backoff()).UnixMilli()
me.Matrix.WithStatus(me.StatusCode).Write(w)
}
type CachedMedia struct {
MXC id.ContentURI
EventRowID EventRowID
EncFile *attachment.EncryptedFile
FileName string
MimeType string
Size int64
Hash *[32]byte
Error *MediaError
}
func (c *CachedMedia) UseCache() bool {
return c != nil && (c.Hash != nil || c.Error.UseCache())
}
func (c *CachedMedia) sqlVariables() []any {
var hash []byte
if c.Hash != nil {
hash = c.Hash[:]
}
return []any{
&c.MXC, dbutil.NumPtr(c.EventRowID), dbutil.JSONPtr(c.EncFile),
dbutil.StrPtr(c.FileName), dbutil.StrPtr(c.MimeType), dbutil.NumPtr(c.Size),
hash, dbutil.JSONPtr(c.Error),
}
}
var safeMimes = []string{
"text/css", "text/plain", "text/csv",
"application/json", "application/ld+json",
"image/jpeg", "image/gif", "image/png", "image/apng", "image/webp", "image/avif",
"video/mp4", "video/webm", "video/ogg", "video/quicktime",
"audio/mp4", "audio/webm", "audio/aac", "audio/mpeg", "audio/ogg", "audio/wave",
"audio/wav", "audio/x-wav", "audio/x-pn-wav", "audio/flac", "audio/x-flac",
}
func (c *CachedMedia) Scan(row dbutil.Scannable) (*CachedMedia, error) {
var mimeType, fileName sql.NullString
var size, eventRowID sql.NullInt64
var hash []byte
err := row.Scan(&c.MXC, &eventRowID, dbutil.JSON{Data: &c.EncFile}, &fileName, &mimeType, &size, &hash, dbutil.JSON{Data: &c.Error})
if err != nil {
return nil, err
}
c.MimeType = mimeType.String
c.FileName = fileName.String
c.EventRowID = EventRowID(eventRowID.Int64)
c.Size = size.Int64
if len(hash) == 32 {
c.Hash = (*[32]byte)(hash)
}
return c, nil
}
func (c *CachedMedia) ContentDisposition() string {
if slices.Contains(safeMimes, c.MimeType) {
return "inline"
}
return "attachment"
}

View file

@ -23,7 +23,7 @@ type Database struct {
Timeline TimelineQuery Timeline TimelineQuery
SessionRequest SessionRequestQuery SessionRequest SessionRequestQuery
Receipt ReceiptQuery Receipt ReceiptQuery
CachedMedia CachedMediaQuery Media MediaQuery
} }
func New(rawDB *dbutil.Database) *Database { func New(rawDB *dbutil.Database) *Database {
@ -40,7 +40,7 @@ func New(rawDB *dbutil.Database) *Database {
Timeline: TimelineQuery{QueryHelper: eventQH}, Timeline: TimelineQuery{QueryHelper: eventQH},
SessionRequest: SessionRequestQuery{QueryHelper: dbutil.MakeQueryHelper(rawDB, newSessionRequest)}, SessionRequest: SessionRequestQuery{QueryHelper: dbutil.MakeQueryHelper(rawDB, newSessionRequest)},
Receipt: ReceiptQuery{QueryHelper: dbutil.MakeQueryHelper(rawDB, newReceipt)}, Receipt: ReceiptQuery{QueryHelper: dbutil.MakeQueryHelper(rawDB, newReceipt)},
CachedMedia: CachedMediaQuery{QueryHelper: dbutil.MakeQueryHelper(rawDB, newCachedMedia)}, Media: MediaQuery{QueryHelper: dbutil.MakeQueryHelper(rawDB, newMedia)},
} }
} }
@ -60,8 +60,8 @@ func newReceipt(_ *dbutil.QueryHelper[*Receipt]) *Receipt {
return &Receipt{} return &Receipt{}
} }
func newCachedMedia(_ *dbutil.QueryHelper[*CachedMedia]) *CachedMedia { func newMedia(_ *dbutil.QueryHelper[*Media]) *Media {
return &CachedMedia{} return &Media{}
} }
func newAccountData(_ *dbutil.QueryHelper[*AccountData]) *AccountData { func newAccountData(_ *dbutil.QueryHelper[*AccountData]) *AccountData {

156
pkg/hicli/database/media.go Normal file
View file

@ -0,0 +1,156 @@
// Copyright (c) 2024 Tulir Asokan
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package database
import (
"context"
"database/sql"
"net/http"
"slices"
"time"
"go.mau.fi/util/dbutil"
"go.mau.fi/util/jsontime"
"maunium.net/go/mautrix"
"maunium.net/go/mautrix/crypto/attachment"
"maunium.net/go/mautrix/id"
)
const (
insertMediaQuery = `
INSERT INTO media (mxc, enc_file, file_name, mime_type, size, hash, error)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (mxc) DO NOTHING
`
upsertMediaQuery = `
INSERT INTO media (mxc, enc_file, file_name, mime_type, size, hash, error)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (mxc) DO UPDATE
SET enc_file = COALESCE(excluded.enc_file, media.enc_file),
file_name = COALESCE(excluded.file_name, media.file_name),
mime_type = COALESCE(excluded.mime_type, media.mime_type),
size = COALESCE(excluded.size, media.size),
hash = COALESCE(excluded.hash, media.hash),
error = excluded.error
WHERE excluded.error IS NULL OR media.hash IS NULL
`
getMediaQuery = `
SELECT mxc, enc_file, file_name, mime_type, size, hash, error
FROM media
WHERE mxc = $1
`
addMediaReferenceQuery = `
INSERT INTO media_reference (event_rowid, media_mxc)
VALUES ($1, $2)
ON CONFLICT (event_rowid, media_mxc) DO NOTHING
`
)
type MediaQuery struct {
*dbutil.QueryHelper[*Media]
}
func (mq *MediaQuery) Add(ctx context.Context, cm *Media) error {
return mq.Exec(ctx, insertMediaQuery, cm.sqlVariables()...)
}
func (mq *MediaQuery) AddReference(ctx context.Context, evtRowID EventRowID, mxc id.ContentURI) error {
return mq.Exec(ctx, addMediaReferenceQuery, evtRowID, &mxc)
}
func (mq *MediaQuery) Put(ctx context.Context, cm *Media) error {
return mq.Exec(ctx, upsertMediaQuery, cm.sqlVariables()...)
}
func (mq *MediaQuery) Get(ctx context.Context, mxc id.ContentURI) (*Media, error) {
return mq.QueryOne(ctx, getMediaQuery, &mxc)
}
type MediaError struct {
Matrix *mautrix.RespError `json:"data"`
StatusCode int `json:"status_code"`
ReceivedAt jsontime.UnixMilli `json:"received_at"`
Attempts int `json:"attempts"`
}
const MaxMediaBackoff = 7 * 24 * time.Hour
func (me *MediaError) backoff() time.Duration {
return min(time.Duration(2<<me.Attempts)*time.Second, MaxMediaBackoff)
}
func (me *MediaError) UseCache() bool {
return me != nil && time.Since(me.ReceivedAt.Time) < me.backoff()
}
func (me *MediaError) Write(w http.ResponseWriter) {
if me.Matrix.ExtraData == nil {
me.Matrix.ExtraData = make(map[string]any)
}
me.Matrix.ExtraData["fi.mau.hicli.error_ts"] = me.ReceivedAt.UnixMilli()
me.Matrix.ExtraData["fi.mau.hicli.next_retry_ts"] = me.ReceivedAt.Add(me.backoff()).UnixMilli()
me.Matrix.WithStatus(me.StatusCode).Write(w)
}
type Media struct {
MXC id.ContentURI
EncFile *attachment.EncryptedFile
FileName string
MimeType string
Size int64
Hash *[32]byte
Error *MediaError
}
func (m *Media) UseCache() bool {
return m != nil && (m.Hash != nil || m.Error.UseCache())
}
func (m *Media) sqlVariables() []any {
var hash []byte
if m.Hash != nil {
hash = m.Hash[:]
}
return []any{
&m.MXC, dbutil.JSONPtr(m.EncFile),
dbutil.StrPtr(m.FileName), dbutil.StrPtr(m.MimeType), dbutil.NumPtr(m.Size),
hash, dbutil.JSONPtr(m.Error),
}
}
var safeMimes = []string{
"text/css", "text/plain", "text/csv",
"application/json", "application/ld+json",
"image/jpeg", "image/gif", "image/png", "image/apng", "image/webp", "image/avif",
"video/mp4", "video/webm", "video/ogg", "video/quicktime",
"audio/mp4", "audio/webm", "audio/aac", "audio/mpeg", "audio/ogg", "audio/wave",
"audio/wav", "audio/x-wav", "audio/x-pn-wav", "audio/flac", "audio/x-flac",
}
func (m *Media) Scan(row dbutil.Scannable) (*Media, error) {
var mimeType, fileName sql.NullString
var size sql.NullInt64
var hash []byte
err := row.Scan(&m.MXC, dbutil.JSON{Data: &m.EncFile}, &fileName, &mimeType, &size, &hash, dbutil.JSON{Data: &m.Error})
if err != nil {
return nil, err
}
m.MimeType = mimeType.String
m.FileName = fileName.String
m.Size = size.Int64
if len(hash) == 32 {
m.Hash = (*[32]byte)(hash)
}
return m, nil
}
func (m *Media) ContentDisposition() string {
if slices.Contains(safeMimes, m.MimeType) {
return "inline"
}
return "attachment"
}

View file

@ -1,4 +1,4 @@
-- v0 -> v4 (compatible with v1+): Latest revision -- v0 -> v5 (compatible with v5+): Latest revision
CREATE TABLE account ( CREATE TABLE account (
user_id TEXT NOT NULL PRIMARY KEY, user_id TEXT NOT NULL PRIMARY KEY,
device_id TEXT NOT NULL, device_id TEXT NOT NULL,
@ -192,17 +192,23 @@ BEGIN
AND reactions IS NOT NULL; AND reactions IS NOT NULL;
END; END;
CREATE TABLE cached_media ( CREATE TABLE media (
mxc TEXT NOT NULL PRIMARY KEY, mxc TEXT NOT NULL PRIMARY KEY,
event_rowid INTEGER, enc_file TEXT,
enc_file TEXT, file_name TEXT,
file_name TEXT, mime_type TEXT,
mime_type TEXT, size INTEGER,
size INTEGER, hash BLOB,
hash BLOB, error TEXT
error TEXT, ) STRICT;
CONSTRAINT cached_media_event_fkey FOREIGN KEY (event_rowid) REFERENCES event (rowid) ON DELETE SET NULL CREATE TABLE media_reference (
event_rowid INTEGER NOT NULL,
media_mxc TEXT NOT NULL,
PRIMARY KEY (event_rowid, media_mxc),
CONSTRAINT media_reference_event_fkey FOREIGN KEY (event_rowid) REFERENCES event (rowid) ON DELETE CASCADE,
CONSTRAINT media_reference_media_fkey FOREIGN KEY (media_mxc) REFERENCES media (mxc) ON DELETE CASCADE
) STRICT; ) STRICT;
CREATE TABLE session_request ( CREATE TABLE session_request (

View file

@ -0,0 +1,29 @@
-- v5: Refactor media cache
CREATE TABLE media (
mxc TEXT NOT NULL PRIMARY KEY,
enc_file TEXT,
file_name TEXT,
mime_type TEXT,
size INTEGER,
hash BLOB,
error TEXT
) STRICT;
INSERT INTO media (mxc, enc_file, file_name, mime_type, size, hash, error)
SELECT mxc, enc_file, file_name, mime_type, size, hash, error
FROM cached_media;
CREATE TABLE media_reference (
event_rowid INTEGER NOT NULL,
media_mxc TEXT NOT NULL,
PRIMARY KEY (event_rowid, media_mxc),
CONSTRAINT media_reference_event_fkey FOREIGN KEY (event_rowid) REFERENCES event (rowid) ON DELETE CASCADE,
CONSTRAINT media_reference_media_fkey FOREIGN KEY (media_mxc) REFERENCES media (mxc) ON DELETE CASCADE
) STRICT;
INSERT INTO media_reference (event_rowid, media_mxc)
SELECT event_rowid, mxc
FROM cached_media WHERE event_rowid IS NOT NULL;
DROP TABLE cached_media;

View file

@ -282,10 +282,9 @@ func (h *HiClient) addMediaCache(
if !parsedMXC.IsValid() { if !parsedMXC.IsValid() {
return return
} }
cm := &database.CachedMedia{ cm := &database.Media{
MXC: parsedMXC, MXC: parsedMXC,
EventRowID: eventRowID, FileName: fileName,
FileName: fileName,
} }
if file != nil { if file != nil {
cm.EncFile = &file.EncryptedFile cm.EncFile = &file.EncryptedFile
@ -293,12 +292,20 @@ func (h *HiClient) addMediaCache(
if info != nil { if info != nil {
cm.MimeType = info.MimeType cm.MimeType = info.MimeType
} }
err := h.DB.CachedMedia.Put(ctx, cm) err := h.DB.Media.Put(ctx, cm)
if err != nil { if err != nil {
zerolog.Ctx(ctx).Warn().Err(err). zerolog.Ctx(ctx).Warn().Err(err).
Stringer("mxc", parsedMXC). Stringer("mxc", parsedMXC).
Int64("event_rowid", int64(eventRowID)). Int64("event_rowid", int64(eventRowID)).
Msg("Failed to add cached media entry") Msg("Failed to add database media entry")
return
}
err = h.DB.Media.AddReference(ctx, eventRowID, parsedMXC)
if err != nil {
zerolog.Ctx(ctx).Warn().Err(err).
Stringer("mxc", parsedMXC).
Int64("event_rowid", int64(eventRowID)).
Msg("Failed to add database media reference")
} }
} }