fix: update repo structure validation logic to disallow false positives (#10)

* refactor: update file structure to reflect new changes

* refactor: start splitting up files

* refactor: more domain splitting

* refactor: remove directory validation from contributors file

* fix: update repo structure checks

* fix: improve check for user namespace subdirectories

* docs: add missing words to comment

* docs: update typo

* refactor: make code easier to read

* fix: update README files

* fix: remove employer field entirely

* fix: make Github field optional

* refactor: rename files
This commit is contained in:
Michael Smith
2025-05-02 11:23:52 -04:00
committed by GitHub
parent 9e18a4e3a8
commit 45dc925f8b
15 changed files with 670 additions and 506 deletions
+340
View File
@@ -0,0 +1,340 @@
package main
import (
"errors"
"fmt"
"log"
"net/url"
"os"
"path"
"slices"
"strings"
"gopkg.in/yaml.v3"
)
var validContributorStatuses = []string{"official", "partner", "community"}
type contributorProfileFrontmatter struct {
DisplayName string `yaml:"display_name"`
Bio string `yaml:"bio"`
// Script assumes that if value is nil, the Registry site build step will
// backfill the value with the user's GitHub avatar URL
AvatarURL *string `yaml:"avatar"`
LinkedinURL *string `yaml:"linkedin"`
WebsiteURL *string `yaml:"website"`
SupportEmail *string `yaml:"support_email"`
ContributorStatus *string `yaml:"status"`
}
type contributorProfile struct {
frontmatter contributorProfileFrontmatter
namespace string
filePath string
}
func validateContributorDisplayName(displayName string) error {
if displayName == "" {
return fmt.Errorf("missing display_name")
}
return nil
}
func validateContributorLinkedinURL(linkedinURL *string) error {
if linkedinURL == nil {
return nil
}
if _, err := url.ParseRequestURI(*linkedinURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *linkedinURL, err)
}
return nil
}
func validateContributorSupportEmail(email *string) []error {
if email == nil {
return nil
}
errs := []error{}
// Can't 100% validate that this is correct without actually sending
// an email, and especially with some contributors being individual
// developers, we don't want to do that on every single run of the CI
// pipeline. Best we can do is verify the general structure
username, server, ok := strings.Cut(*email, "@")
if !ok {
errs = append(errs, fmt.Errorf("email address %q is missing @ symbol", *email))
return errs
}
if username == "" {
errs = append(errs, fmt.Errorf("email address %q is missing username", *email))
}
domain, tld, ok := strings.Cut(server, ".")
if !ok {
errs = append(errs, fmt.Errorf("email address %q is missing period for server segment", *email))
return errs
}
if domain == "" {
errs = append(errs, fmt.Errorf("email address %q is missing domain", *email))
}
if tld == "" {
errs = append(errs, fmt.Errorf("email address %q is missing top-level domain", *email))
}
if strings.Contains(*email, "?") {
errs = append(errs, errors.New("email is not allowed to contain query parameters"))
}
return errs
}
func validateContributorWebsite(websiteURL *string) error {
if websiteURL == nil {
return nil
}
if _, err := url.ParseRequestURI(*websiteURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *websiteURL, err)
}
return nil
}
func validateContributorStatus(status *string) error {
if status == nil {
return nil
}
if !slices.Contains(validContributorStatuses, *status) {
return fmt.Errorf("contributor status %q is not valid", *status)
}
return nil
}
// Can't validate the image actually leads to a valid resource in a pure
// function, but can at least catch obvious problems
func validateContributorAvatarURL(avatarURL *string) []error {
if avatarURL == nil {
return nil
}
errs := []error{}
if *avatarURL == "" {
errs = append(errs, errors.New("avatar URL must be omitted or non-empty string"))
return errs
}
// Have to use .Parse instead of .ParseRequestURI because this is the
// one field that's allowed to be a relative URL
if _, err := url.Parse(*avatarURL); err != nil {
errs = append(errs, fmt.Errorf("URL %q is not a valid relative or absolute URL", *avatarURL))
}
if strings.Contains(*avatarURL, "?") {
errs = append(errs, errors.New("avatar URL is not allowed to contain search parameters"))
}
matched := false
for _, ff := range supportedAvatarFileFormats {
matched = strings.HasSuffix(*avatarURL, ff)
if matched {
break
}
}
if !matched {
segments := strings.Split(*avatarURL, ".")
fileExtension := segments[len(segments)-1]
errs = append(errs, fmt.Errorf("avatar URL '.%s' does not end in a supported file format: [%s]", fileExtension, strings.Join(supportedAvatarFileFormats, ", ")))
}
return errs
}
func validateContributorYaml(yml contributorProfile) []error {
allErrs := []error{}
if err := validateContributorDisplayName(yml.frontmatter.DisplayName); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorLinkedinURL(yml.frontmatter.LinkedinURL); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorWebsite(yml.frontmatter.WebsiteURL); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorStatus(yml.frontmatter.ContributorStatus); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorSupportEmail(yml.frontmatter.SupportEmail) {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorAvatarURL(yml.frontmatter.AvatarURL) {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
return allErrs
}
func parseContributorProfile(rm readme) (contributorProfile, error) {
fm, _, err := separateFrontmatter(rm.rawText)
if err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)
}
yml := contributorProfileFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse: %v", rm.filePath, err)
}
return contributorProfile{
filePath: rm.filePath,
frontmatter: yml,
namespace: strings.TrimSuffix(strings.TrimPrefix(rm.filePath, "registry/"), "/README.md"),
}, nil
}
func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfile, error) {
profilesByNamespace := map[string]contributorProfile{}
yamlParsingErrors := []error{}
for _, rm := range readmeEntries {
p, err := parseContributorProfile(rm)
if err != nil {
yamlParsingErrors = append(yamlParsingErrors, err)
continue
}
if prev, alreadyExists := profilesByNamespace[p.namespace]; alreadyExists {
yamlParsingErrors = append(yamlParsingErrors, fmt.Errorf("%q: namespace %q conflicts with namespace from %q", p.filePath, p.namespace, prev.filePath))
continue
}
profilesByNamespace[p.namespace] = p
}
if len(yamlParsingErrors) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadmeParsing,
errors: yamlParsingErrors,
}
}
yamlValidationErrors := []error{}
for _, p := range profilesByNamespace {
errors := validateContributorYaml(p)
if len(errors) > 0 {
yamlValidationErrors = append(yamlValidationErrors, errors...)
continue
}
}
if len(yamlValidationErrors) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadmeParsing,
errors: yamlValidationErrors,
}
}
return profilesByNamespace, nil
}
func aggregateContributorReadmeFiles() ([]readme, error) {
dirEntries, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}
allReadmeFiles := []readme{}
errs := []error{}
for _, e := range dirEntries {
dirPath := path.Join(rootRegistryPath, e.Name())
if !e.IsDir() {
continue
}
readmePath := path.Join(dirPath, "README.md")
rmBytes, err := os.ReadFile(readmePath)
if err != nil {
errs = append(errs, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
filePath: readmePath,
rawText: string(rmBytes),
})
}
if len(errs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseFileLoad,
errors: errs,
}
}
return allReadmeFiles, nil
}
func validateContributorRelativeUrls(contributors map[string]contributorProfile) error {
// This function only validates relative avatar URLs for now, but it can be
// beefed up to validate more in the future
errs := []error{}
for _, con := range contributors {
// If the avatar URL is missing, we'll just assume that the Registry
// site build step will take care of filling in the data properly
if con.frontmatter.AvatarURL == nil {
continue
}
isRelativeURL := strings.HasPrefix(*con.frontmatter.AvatarURL, ".") ||
strings.HasPrefix(*con.frontmatter.AvatarURL, "/")
if !isRelativeURL {
continue
}
if strings.HasPrefix(*con.frontmatter.AvatarURL, "..") {
errs = append(errs, fmt.Errorf("%q: relative avatar URLs cannot be placed outside a user's namespaced directory", con.filePath))
continue
}
absolutePath := strings.TrimSuffix(con.filePath, "README.md") +
*con.frontmatter.AvatarURL
_, err := os.ReadFile(absolutePath)
if err != nil {
errs = append(errs, fmt.Errorf("%q: relative avatar path %q does not point to image in file system", con.filePath, *con.frontmatter.AvatarURL))
}
}
if len(errs) == 0 {
return nil
}
return validationPhaseError{
phase: validationPhaseAssetCrossReference,
errors: errs,
}
}
func validateAllContributorFiles() error {
allReadmeFiles, err := aggregateContributorReadmeFiles()
if err != nil {
return err
}
log.Printf("Processing %d README files\n", len(allReadmeFiles))
contributors, err := parseContributorFiles(allReadmeFiles)
if err != nil {
return err
}
log.Printf("Processed %d README files as valid contributor profiles", len(contributors))
err = validateContributorRelativeUrls(contributors)
if err != nil {
return err
}
log.Println("All relative URLs for READMEs are valid")
log.Printf("Processed all READMEs in the %q directory\n", rootRegistryPath)
return nil
}
+28
View File
@@ -0,0 +1,28 @@
package main
import "fmt"
// validationPhaseError represents an error that occurred during a specific
// phase of README validation. It should be used to collect ALL validation
// errors that happened during a specific phase, rather than the first one
// encountered.
type validationPhaseError struct {
phase validationPhase
errors []error
}
var _ error = validationPhaseError{}
func (vpe validationPhaseError) Error() string {
msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase.String())
for _, e := range vpe.errors {
msg += fmt.Sprintf("\n- %v", e)
}
msg += "\n"
return msg
}
func addFilePathToError(filePath string, err error) error {
return fmt.Errorf("%q: %v", filePath, err)
}
+39
View File
@@ -0,0 +1,39 @@
// This package is for validating all contributors within the main Registry
// directory. It validates that it has nothing but sub-directories, and that
// each sub-directory has a README.md file. Each of those files must then
// describe a specific contributor. The contents of these files will be parsed
// by the Registry site build step, to be displayed in the Registry site's UI.
package main
import (
"fmt"
"log"
"os"
)
func main() {
log.Println("Starting README validation")
// If there are fundamental problems with how the repo is structured, we
// can't make any guarantees that any further validations will be relevant
// or accurate
repoErr := validateRepoStructure()
if repoErr != nil {
log.Println(repoErr)
os.Exit(1)
}
errs := []error{}
err := validateAllContributorFiles()
if err != nil {
errs = append(errs, err)
}
if len(errs) == 0 {
os.Exit(0)
}
for _, err := range errs {
fmt.Println(err)
}
os.Exit(1)
}
+113
View File
@@ -0,0 +1,113 @@
package main
import (
"bufio"
"errors"
"fmt"
"strings"
)
const rootRegistryPath = "./registry"
var supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"}
// readme represents a single README file within the repo (usually within the
// top-level "/registry" directory).
type readme struct {
filePath string
rawText string
}
// separateFrontmatter attempts to separate a README file's frontmatter content
// from the main README body, returning both values in that order. It does not
// validate whether the structure of the frontmatter is valid (i.e., that it's
// structured as YAML).
func separateFrontmatter(readmeText string) (string, string, error) {
if readmeText == "" {
return "", "", errors.New("README is empty")
}
const fence = "---"
fm := ""
body := ""
fenceCount := 0
lineScanner := bufio.NewScanner(
strings.NewReader(strings.TrimSpace(readmeText)),
)
for lineScanner.Scan() {
nextLine := lineScanner.Text()
if fenceCount < 2 && nextLine == fence {
fenceCount++
continue
}
// Break early if the very first line wasn't a fence, because then we
// know for certain that the README has problems
if fenceCount == 0 {
break
}
// It should be safe to trim each line of the frontmatter on a per-line
// basis, because there shouldn't be any extra meaning attached to the
// indentation. The same does NOT apply to the README; best we can do is
// gather all the lines, and then trim around it
if inReadmeBody := fenceCount >= 2; inReadmeBody {
body += nextLine + "\n"
} else {
fm += strings.TrimSpace(nextLine) + "\n"
}
}
if fenceCount < 2 {
return "", "", errors.New("README does not have two sets of frontmatter fences")
}
if fm == "" {
return "", "", errors.New("readme has frontmatter fences but no frontmatter content")
}
return fm, strings.TrimSpace(body), nil
}
// validationPhase represents a specific phase during README validation. It is
// expected that each phase is discrete, and errors during one will prevent a
// future phase from starting.
type validationPhase int
const (
// validationPhaseFileStructureValidation indicates when the entire Registry
// directory is being verified for having all files be placed in the file
// system as expected.
validationPhaseFileStructureValidation validationPhase = iota
// validationPhaseFileLoad indicates when README files are being read from
// the file system
validationPhaseFileLoad
// validationPhaseReadmeParsing indicates when a README's frontmatter is
// being parsed as YAML. This phase does not include YAML validation.
validationPhaseReadmeParsing
// validationPhaseReadmeValidation indicates when a README's frontmatter is
// being validated as proper YAML with expected keys.
validationPhaseReadmeValidation
// validationPhaseAssetCrossReference indicates when a README's frontmatter
// is having all its relative URLs be validated for whether they point to
// valid resources.
validationPhaseAssetCrossReference
)
func (p validationPhase) String() string {
switch p {
case validationPhaseFileStructureValidation:
return "File structure validation"
case validationPhaseFileLoad:
return "Filesystem reading"
case validationPhaseReadmeParsing:
return "README parsing"
case validationPhaseReadmeValidation:
return "README validation"
case validationPhaseAssetCrossReference:
return "Cross-referencing relative asset URLs"
default:
return fmt.Sprintf("Unknown validation phase: %d", p)
}
}
+145
View File
@@ -0,0 +1,145 @@
package main
import (
"errors"
"fmt"
"os"
"path"
"slices"
"strings"
)
var (
supportedResourceTypes = []string{"modules", "templates"}
supportedUserNameSpaceDirectories = append(supportedResourceTypes[:], ".icons", ".images")
)
func validateCoderResourceSubdirectory(dirPath string) []error {
errs := []error{}
subDir, err := os.Stat(dirPath)
if err != nil {
// It's valid for a specific resource directory not to exist. It's just
// that if it does exist, it must follow specific rules
if !errors.Is(err, os.ErrNotExist) {
errs = append(errs, addFilePathToError(dirPath, err))
}
return errs
}
if !subDir.IsDir() {
errs = append(errs, fmt.Errorf("%q: path is not a directory", dirPath))
return errs
}
files, err := os.ReadDir(dirPath)
if err != nil {
errs = append(errs, addFilePathToError(dirPath, err))
return errs
}
for _, f := range files {
// The .coder subdirectories are sometimes generated as part of Bun
// tests. These subdirectories will never be committed to the repo, but
// in the off chance that they don't get cleaned up properly, we want to
// skip over them
if !f.IsDir() || f.Name() == ".coder" {
continue
}
resourceReadmePath := path.Join(dirPath, f.Name(), "README.md")
_, err := os.Stat(resourceReadmePath)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
errs = append(errs, fmt.Errorf("%q: 'README.md' does not exist", resourceReadmePath))
} else {
errs = append(errs, addFilePathToError(resourceReadmePath, err))
}
}
mainTerraformPath := path.Join(dirPath, f.Name(), "main.tf")
_, err = os.Stat(mainTerraformPath)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
errs = append(errs, fmt.Errorf("%q: 'main.tf' file does not exist", mainTerraformPath))
} else {
errs = append(errs, addFilePathToError(mainTerraformPath, err))
}
}
}
return errs
}
func validateRegistryDirectory() []error {
userDirs, err := os.ReadDir(rootRegistryPath)
if err != nil {
return []error{err}
}
allErrs := []error{}
for _, d := range userDirs {
dirPath := path.Join(rootRegistryPath, d.Name())
if !d.IsDir() {
allErrs = append(allErrs, fmt.Errorf("detected non-directory file %q at base of main Registry directory", dirPath))
continue
}
contributorReadmePath := path.Join(dirPath, "README.md")
_, err := os.Stat(contributorReadmePath)
if err != nil {
allErrs = append(allErrs, err)
}
files, err := os.ReadDir(dirPath)
if err != nil {
allErrs = append(allErrs, err)
continue
}
for _, f := range files {
// Todo: Decide if there's anything more formal that we want to
// ensure about non-directories scoped to user namespaces
if !f.IsDir() {
continue
}
segment := f.Name()
filePath := path.Join(dirPath, segment)
if !slices.Contains(supportedUserNameSpaceDirectories, segment) {
allErrs = append(allErrs, fmt.Errorf("%q: only these sub-directories are allowed at top of user namespace: [%s]", filePath, strings.Join(supportedUserNameSpaceDirectories, ", ")))
continue
}
if slices.Contains(supportedResourceTypes, segment) {
errs := validateCoderResourceSubdirectory(filePath)
if len(errs) != 0 {
allErrs = append(allErrs, errs...)
}
}
}
}
return allErrs
}
func validateRepoStructure() error {
var problems []error
if errs := validateRegistryDirectory(); len(errs) != 0 {
problems = append(problems, errs...)
}
_, err := os.Stat("./.icons")
if err != nil {
problems = append(problems, errors.New("missing top-level .icons directory (used for storing reusable Coder resource icons)"))
}
if len(problems) != 0 {
return validationPhaseError{
phase: validationPhaseFileStructureValidation,
errors: problems,
}
}
return nil
}