Skip to content
This repository was archived by the owner on Feb 20, 2020. It is now read-only.

Implement 1459203 (remove NSSM), 1436274 (use eventlog) #214

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 15 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,28 +112,25 @@ with automatic quarantining of workers, waiting for custom events, etc).
and the public key will be written to standard out. Keep a copy of the
public key if you wish to validate artifact signatures.

3. Download NSSM 2.24 from https://nssm.cc/release/nssm-2.24.zip and extract it
under `C:\`.

4. Install generic-worker as a Windows service running under the `LocalSystem`
3. Install generic-worker as a Windows service running under the `LocalSystem`
account, by running the following command as an `Administrator`:

* `generic-worker.exe install service` (see `generic-worker.exe --help` to
apply non-default configuration settings)

5. Download livelog from https://github.com/taskcluster/livelog/releases and
4. Download livelog from https://github.com/taskcluster/livelog/releases and
place it in `C:\generic-worker\livelog.exe`.

6. Download taskcluster proxy from
5. Download taskcluster proxy from
https://github.com/taskcluster/taskcluster-proxy/releases and place it in
`C:\generic-worker\taskcluster-proxy.exe`.

7. Create `C:\generic-worker\generic-worker.config` with appopriate values.
6. Create `C:\generic-worker\generic-worker.config` with appopriate values.

8. Edit file `C:\generic-worker\generic-worker.config` with appropriate
7. Edit file `C:\generic-worker\generic-worker.config` with appropriate
settings (see `generic-worker.exe --help` for information).

9. Reboot the machine, and the worker should be running. Check logs under
8. Reboot the machine, and the worker should be running. Check logs under
`C:\generic-worker\generic-worker.log`.


Expand Down Expand Up @@ -351,6 +348,9 @@ and reports back results to the queue.
Usage:
generic-worker run [--config CONFIG-FILE]
[--configure-for-aws | --configure-for-gcp]
generic-worker install service [--service-name SERVICE-NAME]
[--config CONFIG-FILE]
[--configure-for-aws | --configure-for-gcp]
generic-worker show-payload-schema
generic-worker new-ed25519-keypair --file ED25519-PRIVATE-KEY-FILE
generic-worker --help
Expand Down Expand Up @@ -693,6 +693,12 @@ Only used in a single __Windows-specific__ test - if you don't have a Z: drive
setup on your computer, or you do but you also run tests from the Z: drive, you
can set this env var to a non-empty string to skip this test.

### `GW_SKIP_ADMIN_REQUIRED_TESTS`

This environment variable applies to Windows tests that rely on administrative
privileges to pass. This includes tests that install and remove Generic Worker
as a service.

### `GW_TESTS_RUN_AS_CURRENT_USER`

This environment variable applies only to the __multiuser__ engine.
Expand Down
4 changes: 2 additions & 2 deletions build.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cd gw-codegen
go install -v || exit /b %ERRORLEVEL%
cd ..
go generate || exit /b %ERRORLEVEL%
go install -v ./... || exit /b %ERRORLEVEL%
go install -v -tags multiuser ./... || exit /b %ERRORLEVEL%

:: this counts the number of lines returned by git status
:: dump temp file a directory higher, otherwise git status reports the tmp1.txt file!
Expand All @@ -23,5 +23,5 @@ git rev-parse HEAD > revision.txt
set /p REVISION=< revision.txt
del revision.txt
set GORACE=history_size=7
go test -ldflags "-X github.com/taskcluster/generic-worker.revision=%REVISION%" ./... || exit /b %ERRORLEVEL%
go test -tags multiuser -ldflags "-X github.com/taskcluster/generic-worker.revision=%REVISION%" ./... || exit /b %ERRORLEVEL%
ineffassign . || exit /b %ERRORLEVEL%
3 changes: 2 additions & 1 deletion helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ func execute(t *testing.T, expectedExitCode ExitCode) {
if err != nil {
t.Fatalf("Test setup failure - could not write to tasks-resolved-count.txt file: %v", err)
}
exitCode := RunWorker()
interruptChan := make(chan os.Signal, 1)
exitCode := RunWorker(interruptChan)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this allow us to Ctrl-C out of tests now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it does, yes. Were we not able to before?

I changed this to match the signature of the function, which I had changed before running the tests (yikes!) :P


if exitCode != expectedExitCode {
t.Fatalf("Something went wrong executing worker - got exit code %v but was expecting exit code %v", exitCode, expectedExitCode)
Expand Down
6 changes: 6 additions & 0 deletions helper_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,9 @@ func copyTestdataFileTo(src, dest string) []string {
func singleCommandNoArgs(command string) []string {
return []string{command}
}

// this is opt-out so that we don't skip tests by default
func shouldRunAdminTests() bool {
_, ok := os.LookupEnv("GW_SKIP_SERVICE_INSTALLATION_TESTS")
return !ok
}
150 changes: 84 additions & 66 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func main() {
if revision != "" {
versionName += " [ revision: https://github.com/taskcluster/generic-worker/commits/" + revision + " ]"
}

arguments, err := docopt.Parse(usage(versionName), nil, true, versionName, false, true)
if err != nil {
log.Println("Error parsing command line arguments!")
Expand All @@ -108,70 +109,12 @@ func main() {
switch {
case arguments["show-payload-schema"]:
fmt.Println(taskPayloadSchema())

case arguments["run"]:
configureForAWS = arguments["--configure-for-aws"].(bool)
configureForGCP = arguments["--configure-for-gcp"].(bool)
configFile = arguments["--config"].(string)
config, err = loadConfig(configFile, configureForAWS, configureForGCP)

// We need to persist the generic-worker config file if we fetched it
// over the network, for example if the config is fetched from the AWS
// Provisioner (--configure-for-aws) or from the Google Cloud service
// (--configure-for-gcp). We delete taskcluster credentials from the
// AWS provisioner as soon as we've fetched them, so unless we persist
// the config on the first run, the worker will not work after reboots.
//
// We persist the config _before_ checking for an error from the
// loadConfig function call, so that if there was an error, we can see
// what the processed config looked like before the error occurred.
//
// Note, we only persist the config file if the file doesn't already
// exist. We don't want to overwrite an existing user-provided config.
// The full config is logged (with secrets obfuscated) in the server
// logs, so this should provide a reliable way to inspect what config
// was in the case of an unexpected failure, including default values
// for config settings not provided in the user-supplied config file.
if _, statError := os.Stat(configFile); os.IsNotExist(statError) && config != nil {
err = config.Persist(configFile)
exitOnError(CANT_SAVE_CONFIG, err, "Not able to persist config file %v", configFile)
}
exitOnError(CANT_LOAD_CONFIG, err, "Error loading configuration file %v", configFile)

// Config known to be loaded successfully at this point...

// * If running tasks as dedicated OS users, we should take ownership
// of generic-worker config file, and block access to task users, so
// that tasks can't read from or write to it.
// * If running tasks under the same user account as the generic-worker
// process, then we can't avoid that tasks can read the config file,
// we can just hope that the config file is at least not writable by
// the current user. In this case we won't change file permissions.
secureConfigFile()

exitCode := RunWorker()
handleConfig(arguments)
interruptChan := make(chan os.Signal, 1)
exitCode := RunWorker(interruptChan)
log.Printf("Exiting worker with exit code %v", exitCode)
switch exitCode {
case REBOOT_REQUIRED:
if !config.DisableReboots {
immediateReboot()
}
case IDLE_TIMEOUT:
if config.ShutdownMachineOnIdle {
immediateShutdown("generic-worker idle timeout")
}
case INTERNAL_ERROR:
if config.ShutdownMachineOnInternalError {
immediateShutdown("generic-worker internal error")
}
case NONCURRENT_DEPLOYMENT_ID:
immediateShutdown("generic-worker deploymentId is not latest")
}
os.Exit(int(exitCode))
case arguments["install"]:
// platform specific...
err := install(arguments)
exitOnError(CANT_INSTALL_GENERIC_WORKER, err, "Error installing generic worker")
handleExitCode(exitCode)
case arguments["new-ed25519-keypair"]:
err := generateEd25519Keypair(arguments["--file"].(string))
exitOnError(CANT_CREATE_ED25519_KEYPAIR, err, "Error generating ed25519 keypair %v for worker", arguments["--file"].(string))
Expand All @@ -181,6 +124,68 @@ func main() {
}
}

func handleConfig(arguments map[string]interface{}) {
configureForAWS = arguments["--configure-for-aws"].(bool)
configureForGCP = arguments["--configure-for-gcp"].(bool)
configFile = arguments["--config"].(string)
// avoid shadowing
var err error
config, err = loadConfig(configFile, configureForAWS, configureForGCP)
exitOnError(CANT_LOAD_CONFIG, err, "Error loading configuration file %v", configFile)
// We need to persist the generic-worker config file if we fetched it
// over the network, for example if the config is fetched from the AWS
// Provisioner (--configure-for-aws) or from the Google Cloud service
// (--configure-for-gcp). We delete taskcluster credentials from the
// AWS provisioner as soon as we've fetched them, so unless we persist
// the config on the first run, the worker will not work after reboots.
//
// We persist the config _before_ checking for an error from the
// loadConfig function call, so that if there was an error, we can see
// what the processed config looked like before the error occurred.
//
// Note, we only persist the config file if the file doesn't already
// exist. We don't want to overwrite an existing user-provided config.
// The full config is logged (with secrets obfuscated) in the server
// logs, so this should provide a reliable way to inspect what config
// was in the case of an unexpected failure, including default values
// for config settings not provided in the user-supplied config file.
if _, statError := os.Stat(configFile); os.IsNotExist(statError) && config != nil {
err = config.Persist(configFile)
exitOnError(CANT_SAVE_CONFIG, err, "Not able to persist config file %v", configFile)
}

// Config known to be loaded successfully at this point...

// * If running tasks as dedicated OS users, we should take ownership
// of generic-worker config file, and block access to task users, so
// that tasks can't read from or write to it.
// * If running tasks under the same user account as the generic-worker
// process, then we can't avoid that tasks can read the config file,
// we can just hope that the config file is at least not writable by
// the current user. In this case we won't change file permissions.
secureConfigFile()
}

func handleExitCode(exitCode ExitCode) {
switch exitCode {
case REBOOT_REQUIRED:
if !config.DisableReboots {
immediateReboot()
}
case IDLE_TIMEOUT:
if config.ShutdownMachineOnIdle {
immediateShutdown("generic-worker idle timeout")
}
case INTERNAL_ERROR:
if config.ShutdownMachineOnInternalError {
immediateShutdown("generic-worker internal error")
}
case NONCURRENT_DEPLOYMENT_ID:
immediateShutdown("generic-worker deploymentId is not latest")
}
os.Exit(int(exitCode))
}

func loadConfig(filename string, queryAWSUserData bool, queryGCPMetaData bool) (*gwconfig.Config, error) {
// TODO: would be better to have a json schema, and also define defaults in
// only one place if possible (defaults also declared in `usage`)
Expand Down Expand Up @@ -358,7 +363,7 @@ func CwdOrPanic() string {
return cwd
}

func RunWorker() (exitCode ExitCode) {
func RunWorker(interruptChan chan os.Signal) (exitCode ExitCode) {
defer func() {
if r := recover(); r != nil {
HandleCrash(r)
Expand Down Expand Up @@ -415,8 +420,7 @@ func RunWorker() (exitCode ExitCode) {
// use zero value, to be sure that a check is made before first task runs
lastQueriedProvisioner := time.Time{}
lastReportedNoTasks := time.Now()
sigInterrupt := make(chan os.Signal, 1)
signal.Notify(sigInterrupt, os.Interrupt)
signal.Notify(interruptChan, os.Interrupt)
if RotateTaskEnvironment() {
return REBOOT_REQUIRED
}
Expand Down Expand Up @@ -514,7 +518,8 @@ func RunWorker() (exitCode ExitCode) {
// since a task could complete in less than that amount of time.
select {
case <-wait5Seconds.C:
case <-sigInterrupt:
case <-interruptChan:
log.Printf("RunWorker received SIGINT")
return WORKER_STOPPED
}
}
Expand Down Expand Up @@ -1184,10 +1189,23 @@ func RotateTaskEnvironment() (reboot bool) {
return false
}

func writeCrashFile(exitCode ExitCode, err error, logMessage string, args ...interface{}) {
// useful for debugging broken logging
filename := filepath.Join(
filepath.Dir(os.Args[0]),
fmt.Sprintf("generic-worker-crash-exit-%d-%d.log", exitCode, time.Now().Unix()),
)
err = ioutil.WriteFile(filename, []byte(fmt.Sprintf("exited with %d, %q, %#v", exitCode, logMessage, args)+"\n"+err.Error()), 0666)
if err != nil {
log.Printf("Could not open crash file %q: %v", filename, err)
}
}

func exitOnError(exitCode ExitCode, err error, logMessage string, args ...interface{}) {
if err == nil {
return
}
writeCrashFile(exitCode, err, logMessage, args...)
log.Printf(logMessage, args...)
log.Printf("%v", err)
os.Exit(int(exitCode))
Expand Down
3 changes: 2 additions & 1 deletion main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ func TestIdleWithoutCrash(t *testing.T) {
testutil.RequireTaskclusterCredentials(t)
start := time.Now()
config.IdleTimeoutSecs = 7
exitCode := RunWorker()
interruptChan := make(chan os.Signal, 1)
exitCode := RunWorker(interruptChan)
end := time.Now()
if exitCode != IDLE_TIMEOUT {
t.Fatalf("Was expecting exit code %v, but got exit code %v", IDLE_TIMEOUT, exitCode)
Expand Down
Loading