From 236c645bf16754ca9294545e71014a01a24ccfd8 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Wed, 19 Jul 2023 02:14:47 +0800 Subject: [PATCH] Refactor "Content" for file uploading (#25851) Before: the concept "Content string" is used everywhere. It has some problems: 1. Sometimes it means "base64 encoded content", sometimes it means "raw binary content" 2. It doesn't work with large files, eg: uploading a 1G LFS file would make Gitea process OOM This PR does the refactoring: use "ContentReader" / "ContentBase64" instead of "Content" This PR is not breaking because the key in API JSON is still "content": `` ContentBase64 string `json:"content"` `` --- modules/structs/repo_file.go | 6 +- routers/api/v1/repo/file.go | 61 ++++++++++++------- routers/web/repo/editor.go | 8 +-- services/repository/files/update.go | 23 +++---- templates/swagger/v1_json.tmpl | 6 +- tests/integration/actions_trigger_test.go | 13 ++-- .../integration/api_repo_file_create_test.go | 2 +- tests/integration/api_repo_file_helpers.go | 8 ++- .../integration/api_repo_file_update_test.go | 2 +- .../integration/api_repo_files_change_test.go | 10 +-- tests/integration/empty_repo_test.go | 2 +- tests/integration/gpg_git_test.go | 2 +- tests/integration/pull_merge_test.go | 12 ++-- tests/integration/pull_update_test.go | 13 ++-- tests/integration/repofiles_change_test.go | 15 ++--- 15 files changed, 103 insertions(+), 80 deletions(-) diff --git a/modules/structs/repo_file.go b/modules/structs/repo_file.go index eb4f1c7dc..82bde96ab 100644 --- a/modules/structs/repo_file.go +++ b/modules/structs/repo_file.go @@ -26,7 +26,7 @@ type CreateFileOptions struct { FileOptions // content must be base64 encoded // required: true - Content string `json:"content"` + ContentBase64 string `json:"content"` } // Branch returns branch name @@ -54,7 +54,7 @@ type UpdateFileOptions struct { DeleteFileOptions // content must be base64 encoded // required: true - Content string `json:"content"` + ContentBase64 string `json:"content"` // from_path (optional) is the path of the original file which will be moved/renamed to the path in the URL FromPath string `json:"from_path" binding:"MaxSize(500)"` } @@ -74,7 +74,7 @@ type ChangeFileOperation struct { // required: true Path string `json:"path" binding:"Required;MaxSize(500)"` // new or updated file content, must be base64 encoded - Content string `json:"content"` + ContentBase64 string `json:"content"` // sha is the SHA for the file that already exists, required for update or delete SHA string `json:"sha"` // old path of the file to move diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go index bf37532fc..ffde5855b 100644 --- a/routers/api/v1/repo/file.go +++ b/routers/api/v1/repo/file.go @@ -408,6 +408,14 @@ func canReadFiles(r *context.Repository) bool { return r.Permission.CanRead(unit.TypeCode) } +func base64Reader(s string) (io.Reader, error) { + b, err := base64.StdEncoding.DecodeString(s) + if err != nil { + return nil, err + } + return bytes.NewReader(b), nil +} + // ChangeFiles handles API call for modifying multiple files func ChangeFiles(ctx *context.APIContext) { // swagger:operation POST /repos/{owner}/{repo}/contents repository repoChangeFiles @@ -449,14 +457,19 @@ func ChangeFiles(ctx *context.APIContext) { apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch } - files := []*files_service.ChangeRepoFile{} + var files []*files_service.ChangeRepoFile for _, file := range apiOpts.Files { + contentReader, err := base64Reader(file.ContentBase64) + if err != nil { + ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err) + return + } changeRepoFile := &files_service.ChangeRepoFile{ - Operation: file.Operation, - TreePath: file.Path, - FromTreePath: file.FromPath, - Content: file.Content, - SHA: file.SHA, + Operation: file.Operation, + TreePath: file.Path, + FromTreePath: file.FromPath, + ContentReader: contentReader, + SHA: file.SHA, } files = append(files, changeRepoFile) } @@ -544,12 +557,18 @@ func CreateFile(ctx *context.APIContext) { apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch } + contentReader, err := base64Reader(apiOpts.ContentBase64) + if err != nil { + ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err) + return + } + opts := &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: ctx.Params("*"), - Content: apiOpts.Content, + Operation: "create", + TreePath: ctx.Params("*"), + ContentReader: contentReader, }, }, Message: apiOpts.Message, @@ -636,14 +655,20 @@ func UpdateFile(ctx *context.APIContext) { apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch } + contentReader, err := base64Reader(apiOpts.ContentBase64) + if err != nil { + ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err) + return + } + opts := &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "update", - Content: apiOpts.Content, - SHA: apiOpts.SHA, - FromTreePath: apiOpts.FromPath, - TreePath: ctx.Params("*"), + Operation: "update", + ContentReader: contentReader, + SHA: apiOpts.SHA, + FromTreePath: apiOpts.FromPath, + TreePath: ctx.Params("*"), }, }, Message: apiOpts.Message, @@ -709,14 +734,6 @@ func createOrUpdateFiles(ctx *context.APIContext, opts *files_service.ChangeRepo } } - for _, file := range opts.Files { - content, err := base64.StdEncoding.DecodeString(file.Content) - if err != nil { - return nil, err - } - file.Content = string(content) - } - return files_service.ChangeRepoFiles(ctx, ctx.Repo.Repository, ctx.Doer, opts) } diff --git a/routers/web/repo/editor.go b/routers/web/repo/editor.go index 88f9e42f3..ad2055fd5 100644 --- a/routers/web/repo/editor.go +++ b/routers/web/repo/editor.go @@ -284,10 +284,10 @@ func editFilePost(ctx *context.Context, form forms.EditRepoFileForm, isNewFile b Message: message, Files: []*files_service.ChangeRepoFile{ { - Operation: operation, - FromTreePath: ctx.Repo.TreePath, - TreePath: form.TreePath, - Content: strings.ReplaceAll(form.Content, "\r", ""), + Operation: operation, + FromTreePath: ctx.Repo.TreePath, + TreePath: form.TreePath, + ContentReader: strings.NewReader(strings.ReplaceAll(form.Content, "\r", "")), }, }, Signoff: form.Signoff, diff --git a/services/repository/files/update.go b/services/repository/files/update.go index 1d5f10a3f..cd7713341 100644 --- a/services/repository/files/update.go +++ b/services/repository/files/update.go @@ -6,6 +6,7 @@ package files import ( "context" "fmt" + "io" "path" "strings" "time" @@ -35,12 +36,12 @@ type CommitDateOptions struct { } type ChangeRepoFile struct { - Operation string - TreePath string - FromTreePath string - Content string - SHA string - Options *RepoFileOptions + Operation string + TreePath string + FromTreePath string + ContentReader io.Reader + SHA string + Options *RepoFileOptions } // ChangeRepoFilesOptions holds the repository files update options @@ -387,7 +388,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file } } - treeObjectContent := file.Content + treeObjectContentReader := file.ContentReader var lfsMetaObject *git_model.LFSMetaObject if setting.LFS.StartServer && hasOldBranch { // Check there is no way this can return multiple infos @@ -402,17 +403,17 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" { // OK so we are supposed to LFS this data! - pointer, err := lfs.GeneratePointer(strings.NewReader(file.Content)) + pointer, err := lfs.GeneratePointer(treeObjectContentReader) if err != nil { return err } lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID} - treeObjectContent = pointer.StringContent() + treeObjectContentReader = strings.NewReader(pointer.StringContent()) } } // Add the object to the database - objectHash, err := t.HashObject(strings.NewReader(treeObjectContent)) + objectHash, err := t.HashObject(treeObjectContentReader) if err != nil { return err } @@ -439,7 +440,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file return err } if !exist { - if err := contentStore.Put(lfsMetaObject.Pointer, strings.NewReader(file.Content)); err != nil { + if err := contentStore.Put(lfsMetaObject.Pointer, file.ContentReader); err != nil { if _, err2 := git_model.RemoveLFSMetaObjectByOid(ctx, repoID, lfsMetaObject.Oid); err2 != nil { return fmt.Errorf("unable to remove failed inserted LFS object %s: %v (Prev Error: %w)", lfsMetaObject.Oid, err2, err) } diff --git a/templates/swagger/v1_json.tmpl b/templates/swagger/v1_json.tmpl index 4c6ee55f8..b7620b9e7 100644 --- a/templates/swagger/v1_json.tmpl +++ b/templates/swagger/v1_json.tmpl @@ -16125,7 +16125,7 @@ "content": { "description": "new or updated file content, must be base64 encoded", "type": "string", - "x-go-name": "Content" + "x-go-name": "ContentBase64" }, "from_path": { "description": "old path of the file to move", @@ -16810,7 +16810,7 @@ "content": { "description": "content must be base64 encoded", "type": "string", - "x-go-name": "Content" + "x-go-name": "ContentBase64" }, "dates": { "$ref": "#/definitions/CommitDateOptions" @@ -21687,7 +21687,7 @@ "content": { "description": "content must be base64 encoded", "type": "string", - "x-go-name": "Content" + "x-go-name": "ContentBase64" }, "dates": { "$ref": "#/definitions/CommitDateOptions" diff --git a/tests/integration/actions_trigger_test.go b/tests/integration/actions_trigger_test.go index bacf13e6c..241d6172f 100644 --- a/tests/integration/actions_trigger_test.go +++ b/tests/integration/actions_trigger_test.go @@ -5,6 +5,7 @@ package integration import ( "net/url" + "strings" "testing" "time" @@ -64,9 +65,9 @@ func TestPullRequestTargetEvent(t *testing.T) { addWorkflowToBaseResp, err := files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user2, &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: ".gitea/workflows/pr.yml", - Content: "name: test\non: pull_request_target\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - run: echo helloworld\n", + Operation: "create", + TreePath: ".gitea/workflows/pr.yml", + ContentReader: strings.NewReader("name: test\non: pull_request_target\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - run: echo helloworld\n"), }, }, Message: "add workflow", @@ -92,9 +93,9 @@ func TestPullRequestTargetEvent(t *testing.T) { addFileToForkedResp, err := files_service.ChangeRepoFiles(git.DefaultContext, forkedRepo, user3, &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: "file_1.txt", - Content: "file1", + Operation: "create", + TreePath: "file_1.txt", + ContentReader: strings.NewReader("file1"), }, }, Message: "add file1", diff --git a/tests/integration/api_repo_file_create_test.go b/tests/integration/api_repo_file_create_test.go index cbc164891..2433a68c3 100644 --- a/tests/integration/api_repo_file_create_test.go +++ b/tests/integration/api_repo_file_create_test.go @@ -46,7 +46,7 @@ func getCreateFileOptions() api.CreateFileOptions { Committer: time.Unix(978307190, 0), }, }, - Content: contentEncoded, + ContentBase64: contentEncoded, } } diff --git a/tests/integration/api_repo_file_helpers.go b/tests/integration/api_repo_file_helpers.go index 8e9b2bfec..27e127fcc 100644 --- a/tests/integration/api_repo_file_helpers.go +++ b/tests/integration/api_repo_file_helpers.go @@ -4,6 +4,8 @@ package integration import ( + "strings" + repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" @@ -15,9 +17,9 @@ func createFileInBranch(user *user_model.User, repo *repo_model.Repository, tree opts := &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: treePath, - Content: content, + Operation: "create", + TreePath: treePath, + ContentReader: strings.NewReader(content), }, }, OldBranch: branchName, diff --git a/tests/integration/api_repo_file_update_test.go b/tests/integration/api_repo_file_update_test.go index 5bcb531fc..9a29ccbf5 100644 --- a/tests/integration/api_repo_file_update_test.go +++ b/tests/integration/api_repo_file_update_test.go @@ -44,7 +44,7 @@ func getUpdateFileOptions() *api.UpdateFileOptions { }, SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885", }, - Content: contentEncoded, + ContentBase64: contentEncoded, } } diff --git a/tests/integration/api_repo_files_change_test.go b/tests/integration/api_repo_files_change_test.go index 8599b12a7..c79764a06 100644 --- a/tests/integration/api_repo_files_change_test.go +++ b/tests/integration/api_repo_files_change_test.go @@ -44,13 +44,13 @@ func getChangeFilesOptions() *api.ChangeFilesOptions { }, Files: []*api.ChangeFileOperation{ { - Operation: "create", - Content: newContentEncoded, + Operation: "create", + ContentBase64: newContentEncoded, }, { - Operation: "update", - Content: updateContentEncoded, - SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885", + Operation: "update", + ContentBase64: updateContentEncoded, + SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885", }, { Operation: "delete", diff --git a/tests/integration/empty_repo_test.go b/tests/integration/empty_repo_test.go index d604391da..78453f28a 100644 --- a/tests/integration/empty_repo_test.go +++ b/tests/integration/empty_repo_test.go @@ -125,7 +125,7 @@ func TestEmptyRepoAddFileByAPI(t *testing.T) { NewBranchName: "new_branch", Message: "init", }, - Content: base64.StdEncoding.EncodeToString([]byte("newly-added-api-file")), + ContentBase64: base64.StdEncoding.EncodeToString([]byte("newly-added-api-file")), }) resp := MakeRequest(t, req, http.StatusCreated) diff --git a/tests/integration/gpg_git_test.go b/tests/integration/gpg_git_test.go index 5b49c91c5..10174b99f 100644 --- a/tests/integration/gpg_git_test.go +++ b/tests/integration/gpg_git_test.go @@ -337,7 +337,7 @@ func crudActionCreateFile(t *testing.T, ctx APITestContext, user *user_model.Use Email: user.Email, }, }, - Content: base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("This is new text for %s", path))), + ContentBase64: base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("This is new text for %s", path))), }, callback...) } diff --git a/tests/integration/pull_merge_test.go b/tests/integration/pull_merge_test.go index 7b7c8864c..f958c890f 100644 --- a/tests/integration/pull_merge_test.go +++ b/tests/integration/pull_merge_test.go @@ -370,9 +370,9 @@ func TestConflictChecking(t *testing.T) { _, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user, &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: "important_file", - Content: "Just a non-important file", + Operation: "create", + TreePath: "important_file", + ContentReader: strings.NewReader("Just a non-important file"), }, }, Message: "Add a important file", @@ -385,9 +385,9 @@ func TestConflictChecking(t *testing.T) { _, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user, &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: "important_file", - Content: "Not the same content :P", + Operation: "create", + TreePath: "important_file", + ContentReader: strings.NewReader("Not the same content :P"), }, }, Message: "Add a important file", diff --git a/tests/integration/pull_update_test.go b/tests/integration/pull_update_test.go index fa56cec48..80c55042d 100644 --- a/tests/integration/pull_update_test.go +++ b/tests/integration/pull_update_test.go @@ -6,6 +6,7 @@ package integration import ( "net/http" "net/url" + "strings" "testing" "time" @@ -104,9 +105,9 @@ func createOutdatedPR(t *testing.T, actor, forkOrg *user_model.User) *issues_mod _, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, actor, &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: "File_A", - Content: "File A", + Operation: "create", + TreePath: "File_A", + ContentReader: strings.NewReader("File A"), }, }, Message: "Add File A", @@ -131,9 +132,9 @@ func createOutdatedPR(t *testing.T, actor, forkOrg *user_model.User) *issues_mod _, err = files_service.ChangeRepoFiles(git.DefaultContext, headRepo, actor, &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: "File_B", - Content: "File B", + Operation: "create", + TreePath: "File_B", + ContentReader: strings.NewReader("File B"), }, }, Message: "Add File on PR branch", diff --git a/tests/integration/repofiles_change_test.go b/tests/integration/repofiles_change_test.go index c273d3818..765a44689 100644 --- a/tests/integration/repofiles_change_test.go +++ b/tests/integration/repofiles_change_test.go @@ -6,6 +6,7 @@ package integration import ( "net/url" "path/filepath" + "strings" "testing" "time" @@ -24,9 +25,9 @@ func getCreateRepoFilesOptions(repo *repo_model.Repository) *files_service.Chang return &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "create", - TreePath: "new/file.txt", - Content: "This is a NEW file", + Operation: "create", + TreePath: "new/file.txt", + ContentReader: strings.NewReader("This is a NEW file"), }, }, OldBranch: repo.DefaultBranch, @@ -41,10 +42,10 @@ func getUpdateRepoFilesOptions(repo *repo_model.Repository) *files_service.Chang return &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { - Operation: "update", - TreePath: "README.md", - SHA: "4b4851ad51df6a7d9f25c979345979eaeb5b349f", - Content: "This is UPDATED content for the README file", + Operation: "update", + TreePath: "README.md", + SHA: "4b4851ad51df6a7d9f25c979345979eaeb5b349f", + ContentReader: strings.NewReader("This is UPDATED content for the README file"), }, }, OldBranch: repo.DefaultBranch,