fix: update repo structure validation logic to disallow false positives (#10)

* refactor: update file structure to reflect new changes

* refactor: start splitting up files

* refactor: more domain splitting

* refactor: remove directory validation from contributors file

* fix: update repo structure checks

* fix: improve check for user namespace subdirectories

* docs: add missing words to comment

* docs: update typo

* refactor: make code easier to read

* fix: update README files

* fix: remove employer field entirely

* fix: make Github field optional

* refactor: rename files
This commit is contained in:
Michael Smith
2025-05-02 11:23:52 -04:00
committed by GitHub
parent 9e18a4e3a8
commit 45dc925f8b
15 changed files with 670 additions and 506 deletions
+2 -2
View File
@@ -65,6 +65,6 @@ jobs:
with:
go-version: "1.23.2"
- name: Validate contributors
run: go build ./scripts/contributors && ./contributors
run: go build ./cmd/readmevalidation && ./readmevalidation
- name: Remove build file artifact
run: rm ./contributors
run: rm ./readmevalidation
+2 -2
View File
@@ -135,8 +135,8 @@ dist
.yarn/install-state.gz
.pnp.*
# Script output
/contributors
# Things needed for CI
/readmevalidation
# Terraform files generated during testing
.terraform*
+340
View File
@@ -0,0 +1,340 @@
package main
import (
"errors"
"fmt"
"log"
"net/url"
"os"
"path"
"slices"
"strings"
"gopkg.in/yaml.v3"
)
var validContributorStatuses = []string{"official", "partner", "community"}
type contributorProfileFrontmatter struct {
DisplayName string `yaml:"display_name"`
Bio string `yaml:"bio"`
// Script assumes that if value is nil, the Registry site build step will
// backfill the value with the user's GitHub avatar URL
AvatarURL *string `yaml:"avatar"`
LinkedinURL *string `yaml:"linkedin"`
WebsiteURL *string `yaml:"website"`
SupportEmail *string `yaml:"support_email"`
ContributorStatus *string `yaml:"status"`
}
type contributorProfile struct {
frontmatter contributorProfileFrontmatter
namespace string
filePath string
}
func validateContributorDisplayName(displayName string) error {
if displayName == "" {
return fmt.Errorf("missing display_name")
}
return nil
}
func validateContributorLinkedinURL(linkedinURL *string) error {
if linkedinURL == nil {
return nil
}
if _, err := url.ParseRequestURI(*linkedinURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *linkedinURL, err)
}
return nil
}
func validateContributorSupportEmail(email *string) []error {
if email == nil {
return nil
}
errs := []error{}
// Can't 100% validate that this is correct without actually sending
// an email, and especially with some contributors being individual
// developers, we don't want to do that on every single run of the CI
// pipeline. Best we can do is verify the general structure
username, server, ok := strings.Cut(*email, "@")
if !ok {
errs = append(errs, fmt.Errorf("email address %q is missing @ symbol", *email))
return errs
}
if username == "" {
errs = append(errs, fmt.Errorf("email address %q is missing username", *email))
}
domain, tld, ok := strings.Cut(server, ".")
if !ok {
errs = append(errs, fmt.Errorf("email address %q is missing period for server segment", *email))
return errs
}
if domain == "" {
errs = append(errs, fmt.Errorf("email address %q is missing domain", *email))
}
if tld == "" {
errs = append(errs, fmt.Errorf("email address %q is missing top-level domain", *email))
}
if strings.Contains(*email, "?") {
errs = append(errs, errors.New("email is not allowed to contain query parameters"))
}
return errs
}
func validateContributorWebsite(websiteURL *string) error {
if websiteURL == nil {
return nil
}
if _, err := url.ParseRequestURI(*websiteURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *websiteURL, err)
}
return nil
}
func validateContributorStatus(status *string) error {
if status == nil {
return nil
}
if !slices.Contains(validContributorStatuses, *status) {
return fmt.Errorf("contributor status %q is not valid", *status)
}
return nil
}
// Can't validate the image actually leads to a valid resource in a pure
// function, but can at least catch obvious problems
func validateContributorAvatarURL(avatarURL *string) []error {
if avatarURL == nil {
return nil
}
errs := []error{}
if *avatarURL == "" {
errs = append(errs, errors.New("avatar URL must be omitted or non-empty string"))
return errs
}
// Have to use .Parse instead of .ParseRequestURI because this is the
// one field that's allowed to be a relative URL
if _, err := url.Parse(*avatarURL); err != nil {
errs = append(errs, fmt.Errorf("URL %q is not a valid relative or absolute URL", *avatarURL))
}
if strings.Contains(*avatarURL, "?") {
errs = append(errs, errors.New("avatar URL is not allowed to contain search parameters"))
}
matched := false
for _, ff := range supportedAvatarFileFormats {
matched = strings.HasSuffix(*avatarURL, ff)
if matched {
break
}
}
if !matched {
segments := strings.Split(*avatarURL, ".")
fileExtension := segments[len(segments)-1]
errs = append(errs, fmt.Errorf("avatar URL '.%s' does not end in a supported file format: [%s]", fileExtension, strings.Join(supportedAvatarFileFormats, ", ")))
}
return errs
}
func validateContributorYaml(yml contributorProfile) []error {
allErrs := []error{}
if err := validateContributorDisplayName(yml.frontmatter.DisplayName); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorLinkedinURL(yml.frontmatter.LinkedinURL); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorWebsite(yml.frontmatter.WebsiteURL); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorStatus(yml.frontmatter.ContributorStatus); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorSupportEmail(yml.frontmatter.SupportEmail) {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorAvatarURL(yml.frontmatter.AvatarURL) {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
return allErrs
}
func parseContributorProfile(rm readme) (contributorProfile, error) {
fm, _, err := separateFrontmatter(rm.rawText)
if err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)
}
yml := contributorProfileFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse: %v", rm.filePath, err)
}
return contributorProfile{
filePath: rm.filePath,
frontmatter: yml,
namespace: strings.TrimSuffix(strings.TrimPrefix(rm.filePath, "registry/"), "/README.md"),
}, nil
}
func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfile, error) {
profilesByNamespace := map[string]contributorProfile{}
yamlParsingErrors := []error{}
for _, rm := range readmeEntries {
p, err := parseContributorProfile(rm)
if err != nil {
yamlParsingErrors = append(yamlParsingErrors, err)
continue
}
if prev, alreadyExists := profilesByNamespace[p.namespace]; alreadyExists {
yamlParsingErrors = append(yamlParsingErrors, fmt.Errorf("%q: namespace %q conflicts with namespace from %q", p.filePath, p.namespace, prev.filePath))
continue
}
profilesByNamespace[p.namespace] = p
}
if len(yamlParsingErrors) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadmeParsing,
errors: yamlParsingErrors,
}
}
yamlValidationErrors := []error{}
for _, p := range profilesByNamespace {
errors := validateContributorYaml(p)
if len(errors) > 0 {
yamlValidationErrors = append(yamlValidationErrors, errors...)
continue
}
}
if len(yamlValidationErrors) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadmeParsing,
errors: yamlValidationErrors,
}
}
return profilesByNamespace, nil
}
func aggregateContributorReadmeFiles() ([]readme, error) {
dirEntries, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}
allReadmeFiles := []readme{}
errs := []error{}
for _, e := range dirEntries {
dirPath := path.Join(rootRegistryPath, e.Name())
if !e.IsDir() {
continue
}
readmePath := path.Join(dirPath, "README.md")
rmBytes, err := os.ReadFile(readmePath)
if err != nil {
errs = append(errs, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
filePath: readmePath,
rawText: string(rmBytes),
})
}
if len(errs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseFileLoad,
errors: errs,
}
}
return allReadmeFiles, nil
}
func validateContributorRelativeUrls(contributors map[string]contributorProfile) error {
// This function only validates relative avatar URLs for now, but it can be
// beefed up to validate more in the future
errs := []error{}
for _, con := range contributors {
// If the avatar URL is missing, we'll just assume that the Registry
// site build step will take care of filling in the data properly
if con.frontmatter.AvatarURL == nil {
continue
}
isRelativeURL := strings.HasPrefix(*con.frontmatter.AvatarURL, ".") ||
strings.HasPrefix(*con.frontmatter.AvatarURL, "/")
if !isRelativeURL {
continue
}
if strings.HasPrefix(*con.frontmatter.AvatarURL, "..") {
errs = append(errs, fmt.Errorf("%q: relative avatar URLs cannot be placed outside a user's namespaced directory", con.filePath))
continue
}
absolutePath := strings.TrimSuffix(con.filePath, "README.md") +
*con.frontmatter.AvatarURL
_, err := os.ReadFile(absolutePath)
if err != nil {
errs = append(errs, fmt.Errorf("%q: relative avatar path %q does not point to image in file system", con.filePath, *con.frontmatter.AvatarURL))
}
}
if len(errs) == 0 {
return nil
}
return validationPhaseError{
phase: validationPhaseAssetCrossReference,
errors: errs,
}
}
func validateAllContributorFiles() error {
allReadmeFiles, err := aggregateContributorReadmeFiles()
if err != nil {
return err
}
log.Printf("Processing %d README files\n", len(allReadmeFiles))
contributors, err := parseContributorFiles(allReadmeFiles)
if err != nil {
return err
}
log.Printf("Processed %d README files as valid contributor profiles", len(contributors))
err = validateContributorRelativeUrls(contributors)
if err != nil {
return err
}
log.Println("All relative URLs for READMEs are valid")
log.Printf("Processed all READMEs in the %q directory\n", rootRegistryPath)
return nil
}
+28
View File
@@ -0,0 +1,28 @@
package main
import "fmt"
// validationPhaseError represents an error that occurred during a specific
// phase of README validation. It should be used to collect ALL validation
// errors that happened during a specific phase, rather than the first one
// encountered.
type validationPhaseError struct {
phase validationPhase
errors []error
}
var _ error = validationPhaseError{}
func (vpe validationPhaseError) Error() string {
msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase.String())
for _, e := range vpe.errors {
msg += fmt.Sprintf("\n- %v", e)
}
msg += "\n"
return msg
}
func addFilePathToError(filePath string, err error) error {
return fmt.Errorf("%q: %v", filePath, err)
}
+39
View File
@@ -0,0 +1,39 @@
// This package is for validating all contributors within the main Registry
// directory. It validates that it has nothing but sub-directories, and that
// each sub-directory has a README.md file. Each of those files must then
// describe a specific contributor. The contents of these files will be parsed
// by the Registry site build step, to be displayed in the Registry site's UI.
package main
import (
"fmt"
"log"
"os"
)
func main() {
log.Println("Starting README validation")
// If there are fundamental problems with how the repo is structured, we
// can't make any guarantees that any further validations will be relevant
// or accurate
repoErr := validateRepoStructure()
if repoErr != nil {
log.Println(repoErr)
os.Exit(1)
}
errs := []error{}
err := validateAllContributorFiles()
if err != nil {
errs = append(errs, err)
}
if len(errs) == 0 {
os.Exit(0)
}
for _, err := range errs {
fmt.Println(err)
}
os.Exit(1)
}
+113
View File
@@ -0,0 +1,113 @@
package main
import (
"bufio"
"errors"
"fmt"
"strings"
)
const rootRegistryPath = "./registry"
var supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"}
// readme represents a single README file within the repo (usually within the
// top-level "/registry" directory).
type readme struct {
filePath string
rawText string
}
// separateFrontmatter attempts to separate a README file's frontmatter content
// from the main README body, returning both values in that order. It does not
// validate whether the structure of the frontmatter is valid (i.e., that it's
// structured as YAML).
func separateFrontmatter(readmeText string) (string, string, error) {
if readmeText == "" {
return "", "", errors.New("README is empty")
}
const fence = "---"
fm := ""
body := ""
fenceCount := 0
lineScanner := bufio.NewScanner(
strings.NewReader(strings.TrimSpace(readmeText)),
)
for lineScanner.Scan() {
nextLine := lineScanner.Text()
if fenceCount < 2 && nextLine == fence {
fenceCount++
continue
}
// Break early if the very first line wasn't a fence, because then we
// know for certain that the README has problems
if fenceCount == 0 {
break
}
// It should be safe to trim each line of the frontmatter on a per-line
// basis, because there shouldn't be any extra meaning attached to the
// indentation. The same does NOT apply to the README; best we can do is
// gather all the lines, and then trim around it
if inReadmeBody := fenceCount >= 2; inReadmeBody {
body += nextLine + "\n"
} else {
fm += strings.TrimSpace(nextLine) + "\n"
}
}
if fenceCount < 2 {
return "", "", errors.New("README does not have two sets of frontmatter fences")
}
if fm == "" {
return "", "", errors.New("readme has frontmatter fences but no frontmatter content")
}
return fm, strings.TrimSpace(body), nil
}
// validationPhase represents a specific phase during README validation. It is
// expected that each phase is discrete, and errors during one will prevent a
// future phase from starting.
type validationPhase int
const (
// validationPhaseFileStructureValidation indicates when the entire Registry
// directory is being verified for having all files be placed in the file
// system as expected.
validationPhaseFileStructureValidation validationPhase = iota
// validationPhaseFileLoad indicates when README files are being read from
// the file system
validationPhaseFileLoad
// validationPhaseReadmeParsing indicates when a README's frontmatter is
// being parsed as YAML. This phase does not include YAML validation.
validationPhaseReadmeParsing
// validationPhaseReadmeValidation indicates when a README's frontmatter is
// being validated as proper YAML with expected keys.
validationPhaseReadmeValidation
// validationPhaseAssetCrossReference indicates when a README's frontmatter
// is having all its relative URLs be validated for whether they point to
// valid resources.
validationPhaseAssetCrossReference
)
func (p validationPhase) String() string {
switch p {
case validationPhaseFileStructureValidation:
return "File structure validation"
case validationPhaseFileLoad:
return "Filesystem reading"
case validationPhaseReadmeParsing:
return "README parsing"
case validationPhaseReadmeValidation:
return "README validation"
case validationPhaseAssetCrossReference:
return "Cross-referencing relative asset URLs"
default:
return fmt.Sprintf("Unknown validation phase: %d", p)
}
}
+145
View File
@@ -0,0 +1,145 @@
package main
import (
"errors"
"fmt"
"os"
"path"
"slices"
"strings"
)
var (
supportedResourceTypes = []string{"modules", "templates"}
supportedUserNameSpaceDirectories = append(supportedResourceTypes[:], ".icons", ".images")
)
func validateCoderResourceSubdirectory(dirPath string) []error {
errs := []error{}
subDir, err := os.Stat(dirPath)
if err != nil {
// It's valid for a specific resource directory not to exist. It's just
// that if it does exist, it must follow specific rules
if !errors.Is(err, os.ErrNotExist) {
errs = append(errs, addFilePathToError(dirPath, err))
}
return errs
}
if !subDir.IsDir() {
errs = append(errs, fmt.Errorf("%q: path is not a directory", dirPath))
return errs
}
files, err := os.ReadDir(dirPath)
if err != nil {
errs = append(errs, addFilePathToError(dirPath, err))
return errs
}
for _, f := range files {
// The .coder subdirectories are sometimes generated as part of Bun
// tests. These subdirectories will never be committed to the repo, but
// in the off chance that they don't get cleaned up properly, we want to
// skip over them
if !f.IsDir() || f.Name() == ".coder" {
continue
}
resourceReadmePath := path.Join(dirPath, f.Name(), "README.md")
_, err := os.Stat(resourceReadmePath)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
errs = append(errs, fmt.Errorf("%q: 'README.md' does not exist", resourceReadmePath))
} else {
errs = append(errs, addFilePathToError(resourceReadmePath, err))
}
}
mainTerraformPath := path.Join(dirPath, f.Name(), "main.tf")
_, err = os.Stat(mainTerraformPath)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
errs = append(errs, fmt.Errorf("%q: 'main.tf' file does not exist", mainTerraformPath))
} else {
errs = append(errs, addFilePathToError(mainTerraformPath, err))
}
}
}
return errs
}
func validateRegistryDirectory() []error {
userDirs, err := os.ReadDir(rootRegistryPath)
if err != nil {
return []error{err}
}
allErrs := []error{}
for _, d := range userDirs {
dirPath := path.Join(rootRegistryPath, d.Name())
if !d.IsDir() {
allErrs = append(allErrs, fmt.Errorf("detected non-directory file %q at base of main Registry directory", dirPath))
continue
}
contributorReadmePath := path.Join(dirPath, "README.md")
_, err := os.Stat(contributorReadmePath)
if err != nil {
allErrs = append(allErrs, err)
}
files, err := os.ReadDir(dirPath)
if err != nil {
allErrs = append(allErrs, err)
continue
}
for _, f := range files {
// Todo: Decide if there's anything more formal that we want to
// ensure about non-directories scoped to user namespaces
if !f.IsDir() {
continue
}
segment := f.Name()
filePath := path.Join(dirPath, segment)
if !slices.Contains(supportedUserNameSpaceDirectories, segment) {
allErrs = append(allErrs, fmt.Errorf("%q: only these sub-directories are allowed at top of user namespace: [%s]", filePath, strings.Join(supportedUserNameSpaceDirectories, ", ")))
continue
}
if slices.Contains(supportedResourceTypes, segment) {
errs := validateCoderResourceSubdirectory(filePath)
if len(errs) != 0 {
allErrs = append(allErrs, errs...)
}
}
}
}
return allErrs
}
func validateRepoStructure() error {
var problems []error
if errs := validateRegistryDirectory(); len(errs) != 0 {
problems = append(problems, errs...)
}
_, err := os.Stat("./.icons")
if err != nil {
problems = append(problems, errors.New("missing top-level .icons directory (used for storing reusable Coder resource icons)"))
}
if len(problems) != 0 {
return validationPhaseError{
phase: validationPhaseFileStructureValidation,
errors: problems,
}
}
return nil
}
-8
View File
@@ -1,8 +0,0 @@
---
display_name: HashiCorp
bio: HashiCorp, an IBM company, empowers organizations to automate and secure multi-cloud and hybrid environments with The Infrastructure Cloud™. Our suite of Infrastructure Lifecycle Management and Security Lifecycle Management solutions are built on projects with source code freely available at their core. The HashiCorp suite underpins the world's most critical applications, helping enterprises achieve efficiency, security, and scalability at any stage of their cloud journey.
github: hashicorp
linkedin: https://www.linkedin.com/company/hashicorp
website: https://www.hashicorp.com/
status: partner
---
-8
View File
@@ -1,8 +0,0 @@
---
display_name: Jfrog
bio: At JFrog, we are making endless software versions a thing of the past, with liquid software that flows continuously and automatically from build all the way through to production.
github: jfrog
linkedin: https://www.linkedin.com/company/jfrog-ltd
website: https://jfrog.com/
status: partner
---
+1 -1
View File
@@ -3,5 +3,5 @@ display_name: Nataindata
bio: Data engineer
github: nataindata
website: https://www.nataindata.com
status: community
status: partner
---
-446
View File
@@ -1,446 +0,0 @@
package main
import (
"bufio"
"errors"
"fmt"
"net/url"
"os"
"path"
"slices"
"strings"
"gopkg.in/yaml.v3"
)
const rootRegistryPath = "./registry"
var (
validContributorStatuses = []string{"official", "partner", "community"}
supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"}
)
type readme struct {
filePath string
rawText string
}
type contributorProfileFrontmatter struct {
DisplayName string `yaml:"display_name"`
Bio string `yaml:"bio"`
GithubUsername string `yaml:"github"`
// Script assumes that if value is nil, the Registry site build step will
// backfill the value with the user's GitHub avatar URL
AvatarURL *string `yaml:"avatar"`
LinkedinURL *string `yaml:"linkedin"`
WebsiteURL *string `yaml:"website"`
SupportEmail *string `yaml:"support_email"`
EmployerGithubUsername *string `yaml:"employer_github"`
ContributorStatus *string `yaml:"status"`
}
type contributorProfile struct {
frontmatter contributorProfileFrontmatter
filePath string
}
var _ error = validationPhaseError{}
type validationPhaseError struct {
phase string
errors []error
}
func (vpe validationPhaseError) Error() string {
validationStrs := []string{}
for _, e := range vpe.errors {
validationStrs = append(validationStrs, fmt.Sprintf("- %v", e))
}
slices.Sort(validationStrs)
msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase)
msg += strings.Join(validationStrs, "\n")
msg += "\n"
return msg
}
func extractFrontmatter(readmeText string) (string, error) {
if readmeText == "" {
return "", errors.New("README is empty")
}
const fence = "---"
fm := ""
fenceCount := 0
lineScanner := bufio.NewScanner(
strings.NewReader(strings.TrimSpace(readmeText)),
)
for lineScanner.Scan() {
nextLine := lineScanner.Text()
if fenceCount == 0 && nextLine != fence {
return "", errors.New("README does not start with frontmatter fence")
}
if nextLine != fence {
fm += nextLine + "\n"
continue
}
fenceCount++
if fenceCount >= 2 {
break
}
}
if fenceCount == 1 {
return "", errors.New("README does not have two sets of frontmatter fences")
}
return fm, nil
}
func validateContributorGithubUsername(githubUsername string) error {
if githubUsername == "" {
return errors.New("missing GitHub username")
}
lower := strings.ToLower(githubUsername)
if uriSafe := url.PathEscape(lower); uriSafe != lower {
return fmt.Errorf("gitHub username %q is not a valid URL path segment", githubUsername)
}
return nil
}
func validateContributorEmployerGithubUsername(
employerGithubUsername *string,
githubUsername string,
) []error {
if employerGithubUsername == nil {
return nil
}
problems := []error{}
if *employerGithubUsername == "" {
problems = append(problems, errors.New("company_github field is defined but has empty value"))
return problems
}
lower := strings.ToLower(*employerGithubUsername)
if uriSafe := url.PathEscape(lower); uriSafe != lower {
problems = append(problems, fmt.Errorf("gitHub company username %q is not a valid URL path segment", *employerGithubUsername))
}
if *employerGithubUsername == githubUsername {
problems = append(problems, fmt.Errorf("cannot list own GitHub name (%q) as employer", githubUsername))
}
return problems
}
func validateContributorDisplayName(displayName string) error {
if displayName == "" {
return fmt.Errorf("missing display_name")
}
return nil
}
func validateContributorLinkedinURL(linkedinURL *string) error {
if linkedinURL == nil {
return nil
}
if _, err := url.ParseRequestURI(*linkedinURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *linkedinURL, err)
}
return nil
}
func validateContributorSupportEmail(email *string) []error {
if email == nil {
return nil
}
problems := []error{}
// Can't 100% validate that this is correct without actually sending
// an email, and especially with some contributors being individual
// developers, we don't want to do that on every single run of the CI
// pipeline. Best we can do is verify the general structure
username, server, ok := strings.Cut(*email, "@")
if !ok {
problems = append(problems, fmt.Errorf("email address %q is missing @ symbol", *email))
return problems
}
if username == "" {
problems = append(problems, fmt.Errorf("email address %q is missing username", *email))
}
domain, tld, ok := strings.Cut(server, ".")
if !ok {
problems = append(problems, fmt.Errorf("email address %q is missing period for server segment", *email))
return problems
}
if domain == "" {
problems = append(problems, fmt.Errorf("email address %q is missing domain", *email))
}
if tld == "" {
problems = append(problems, fmt.Errorf("email address %q is missing top-level domain", *email))
}
if strings.Contains(*email, "?") {
problems = append(problems, errors.New("email is not allowed to contain query parameters"))
}
return problems
}
func validateContributorWebsite(websiteURL *string) error {
if websiteURL == nil {
return nil
}
if _, err := url.ParseRequestURI(*websiteURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *websiteURL, err)
}
return nil
}
func validateContributorStatus(status *string) error {
if status == nil {
return nil
}
if !slices.Contains(validContributorStatuses, *status) {
return fmt.Errorf("contributor status %q is not valid", *status)
}
return nil
}
// Can't validate the image actually leads to a valid resource in a pure
// function, but can at least catch obvious problems
func validateContributorAvatarURL(avatarURL *string) []error {
if avatarURL == nil {
return nil
}
problems := []error{}
if *avatarURL == "" {
problems = append(problems, errors.New("avatar URL must be omitted or non-empty string"))
return problems
}
// Have to use .Parse instead of .ParseRequestURI because this is the
// one field that's allowed to be a relative URL
if _, err := url.Parse(*avatarURL); err != nil {
problems = append(problems, fmt.Errorf("URL %q is not a valid relative or absolute URL", *avatarURL))
}
if strings.Contains(*avatarURL, "?") {
problems = append(problems, errors.New("avatar URL is not allowed to contain search parameters"))
}
matched := false
for _, ff := range supportedAvatarFileFormats {
matched = strings.HasSuffix(*avatarURL, ff)
if matched {
break
}
}
if !matched {
segments := strings.Split(*avatarURL, ".")
fileExtension := segments[len(segments)-1]
problems = append(problems, fmt.Errorf("avatar URL '.%s' does not end in a supported file format: [%s]", fileExtension, strings.Join(supportedAvatarFileFormats, ", ")))
}
return problems
}
func addFilePathToError(filePath string, err error) error {
return fmt.Errorf("%q: %v", filePath, err)
}
func validateContributorYaml(yml contributorProfile) []error {
allProblems := []error{}
if err := validateContributorGithubUsername(yml.frontmatter.GithubUsername); err != nil {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
if err := validateContributorDisplayName(yml.frontmatter.DisplayName); err != nil {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
if err := validateContributorLinkedinURL(yml.frontmatter.LinkedinURL); err != nil {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
if err := validateContributorWebsite(yml.frontmatter.WebsiteURL); err != nil {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
if err := validateContributorStatus(yml.frontmatter.ContributorStatus); err != nil {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorEmployerGithubUsername(yml.frontmatter.EmployerGithubUsername, yml.frontmatter.GithubUsername) {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorSupportEmail(yml.frontmatter.SupportEmail) {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorAvatarURL(yml.frontmatter.AvatarURL) {
allProblems = append(allProblems, addFilePathToError(yml.filePath, err))
}
return allProblems
}
func parseContributorProfile(rm readme) (contributorProfile, error) {
fm, err := extractFrontmatter(rm.rawText)
if err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)
}
yml := contributorProfileFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse: %v", rm.filePath, err)
}
return contributorProfile{
filePath: rm.filePath,
frontmatter: yml,
}, nil
}
func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfile, error) {
profilesByUsername := map[string]contributorProfile{}
yamlParsingErrors := []error{}
for _, rm := range readmeEntries {
p, err := parseContributorProfile(rm)
if err != nil {
yamlParsingErrors = append(yamlParsingErrors, err)
continue
}
if prev, alreadyExists := profilesByUsername[p.frontmatter.GithubUsername]; alreadyExists {
yamlParsingErrors = append(yamlParsingErrors, fmt.Errorf("%q: GitHub name %s conflicts with field defined in %q", p.filePath, p.frontmatter.GithubUsername, prev.filePath))
continue
}
profilesByUsername[p.frontmatter.GithubUsername] = p
}
if len(yamlParsingErrors) != 0 {
return nil, validationPhaseError{
phase: "YAML parsing",
errors: yamlParsingErrors,
}
}
employeeGithubGroups := map[string][]string{}
yamlValidationErrors := []error{}
for _, p := range profilesByUsername {
errors := validateContributorYaml(p)
if len(errors) > 0 {
yamlValidationErrors = append(yamlValidationErrors, errors...)
continue
}
if p.frontmatter.EmployerGithubUsername != nil {
employeeGithubGroups[*p.frontmatter.EmployerGithubUsername] = append(
employeeGithubGroups[*p.frontmatter.EmployerGithubUsername],
p.frontmatter.GithubUsername,
)
}
}
for companyName, group := range employeeGithubGroups {
if _, found := profilesByUsername[companyName]; found {
continue
}
yamlValidationErrors = append(yamlValidationErrors, fmt.Errorf("company %q does not exist in %q directory but is referenced by these profiles: [%s]", companyName, rootRegistryPath, strings.Join(group, ", ")))
}
if len(yamlValidationErrors) != 0 {
return nil, validationPhaseError{
phase: "Raw YAML Validation",
errors: yamlValidationErrors,
}
}
return profilesByUsername, nil
}
func aggregateContributorReadmeFiles() ([]readme, error) {
dirEntries, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}
allReadmeFiles := []readme{}
problems := []error{}
for _, e := range dirEntries {
dirPath := path.Join(rootRegistryPath, e.Name())
if !e.IsDir() {
problems = append(problems, fmt.Errorf("detected non-directory file %q at base of main Registry directory", dirPath))
continue
}
readmePath := path.Join(dirPath, "README.md")
rmBytes, err := os.ReadFile(readmePath)
if err != nil {
problems = append(problems, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
filePath: readmePath,
rawText: string(rmBytes),
})
}
if len(problems) != 0 {
return nil, validationPhaseError{
phase: "FileSystem reading",
errors: problems,
}
}
return allReadmeFiles, nil
}
func validateRelativeUrls(
contributors map[string]contributorProfile,
) error {
// This function only validates relative avatar URLs for now, but it can be
// beefed up to validate more in the future
problems := []error{}
for _, con := range contributors {
// If the avatar URL is missing, we'll just assume that the Registry
// site build step will take care of filling in the data properly
if con.frontmatter.AvatarURL == nil {
continue
}
if isRelativeURL := strings.HasPrefix(*con.frontmatter.AvatarURL, ".") ||
strings.HasPrefix(*con.frontmatter.AvatarURL, "/"); !isRelativeURL {
continue
}
if strings.HasPrefix(*con.frontmatter.AvatarURL, "..") {
problems = append(problems, fmt.Errorf("%q: relative avatar URLs cannot be placed outside a user's namespaced directory", con.filePath))
continue
}
absolutePath := strings.TrimSuffix(con.filePath, "README.md") +
*con.frontmatter.AvatarURL
_, err := os.ReadFile(absolutePath)
if err != nil {
problems = append(problems, fmt.Errorf("%q: relative avatar path %q does not point to image in file system", con.filePath, *con.frontmatter.AvatarURL))
}
}
if len(problems) == 0 {
return nil
}
return validationPhaseError{
phase: "Relative URL validation",
errors: problems,
}
}
-39
View File
@@ -1,39 +0,0 @@
// This package is for validating all contributors within the main Registry
// directory. It validates that it has nothing but sub-directories, and that
// each sub-directory has a README.md file. Each of those files must then
// describe a specific contributor. The contents of these files will be parsed
// by the Registry site build step, to be displayed in the Registry site's UI.
package main
import (
"log"
)
func main() {
log.Println("Starting README validation")
allReadmeFiles, err := aggregateContributorReadmeFiles()
if err != nil {
log.Panic(err)
}
log.Printf("Processing %d README files\n", len(allReadmeFiles))
contributors, err := parseContributorFiles(allReadmeFiles)
log.Printf(
"Processed %d README files as valid contributor profiles",
len(contributors),
)
if err != nil {
log.Panic(err)
}
err = validateRelativeUrls(contributors)
if err != nil {
log.Panic(err)
}
log.Println("All relative URLs for READMEs are valid")
log.Printf(
"Processed all READMEs in the %q directory\n",
rootRegistryPath,
)
}