Skip to content

Commit

Permalink
Merge branch 'release/0.13.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
sb10 committed May 21, 2018
2 parents 05e071e + 4e36873 commit 6de8da6
Show file tree
Hide file tree
Showing 35 changed files with 2,963 additions and 863 deletions.
44 changes: 44 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,50 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this
project adheres to [Semantic Versioning](http://semver.org/).


## [0.13.0] - 2018-05-21
### Added
- Minimual LSF client (bsub, bjobs, bkill) emulation, for using wr as the
backend scheduler and runner for legacy or alternate workflow systems, such as
Nextflow or Martian.
- `wr add` has new --monitor_docker option to get accurate memory and cpu usage
stats for jobs that run docker, and to kill those dockers when you kill the
job.
- `wr add` has new --cloud_shared option, for turning on a simple NFS shared
disk when using the OpenStack scheduler with Ubuntu.
- `wr status`, `retry`, `kill` and `remove` take new -z and -y modifiers to
treat -i as a repgroup substr (show status of jobs in multiple repgroups) or
as an internal job identifier (which are now displayed by `status`).
- `wr status` has new -o option to define the output format, including new json
and summary formats (shows mean resource usage across a repgroup).

### Changed
- Jobs are now only killed if they both use more than expected memory and more
than 90% of total physical memory.
- Local scheduler (and by extension some behaviour of the OpenStack scheduler)
now does bin packing, trying to run as many jobs as possible in parallel by
filling in "gaps" in resource usage. Commands that use more resources will be
scheduled to run before other commands. Job priority only decides the order
that jobs of equal resource usage run in.
- Trying to start the manager in OpenStack mode outside of OpenStack now
immediately returns an error.
- `wr manager start` now shows error/crit lines from the log, on failure to
start.
- Backwards incompatible changes to cloud API.

### Fixed
- `wr manager start` no longer logs its authentication token.
- Race condition where an OpenStack server could be destroyed yet be considered
usable.
- `wr` client commands now obey managerhost config option when not running on
the same host as the manager.
- OpenStack scheduler no longer ignores requested job memory when non-default
OS disk set.
- Reported peak memory usage of jobs fixed to consider usage of all child
processes of the initial command (even if they change their process group and
fork).
- Reported CPU time of jobs fixed to include user time, not just system time.


## [0.12.0] - 2018-04-27
### Added
- All communications to the manager are now via TLS, and authentication is
Expand Down
32 changes: 23 additions & 9 deletions cloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,10 @@ func (p *Provider) Spawn(os string, osUser string, flavorID string, diskGB int,
// non-blank it will CopyOver the specified files (after the server is ready,
// before any postCreationScript is run).
//
// postCreationScript is the optional []byte content of a script that will be
// run on the server (as the user supplied to Spawn()) once it is ready, and it
// will complete before this function returns; empty slice means do nothing.
func (s *Server) WaitUntilReady(files string, postCreationScript ...[]byte) error {
// postCreationScript is the []byte content of a script that will be run on the
// server (as the user supplied to Spawn()) once it is ready, and it will
// complete before this function returns; empty slice means do nothing.
func (s *Server) WaitUntilReady(files string, postCreationScript []byte) error {
// wait for ssh to come up
_, err := s.SSHClient()
if err != nil {
Expand Down Expand Up @@ -619,9 +619,9 @@ SENTINEL:
}

// run the postCreationScript
if len(postCreationScript[0]) > 0 {
if len(postCreationScript) > 0 {
pcsPath := "/tmp/.postCreationScript"
err = s.CreateFile(string(postCreationScript[0]), pcsPath)
err = s.CreateFile(string(postCreationScript), pcsPath)
if err != nil {
return fmt.Errorf("cloud server start up script failed to upload: %s", err)
}
Expand All @@ -647,7 +647,7 @@ SENTINEL:
s.logger.Warn("failed to remove post creation script", "path", pcsPath, "err", rmErr)
}

s.Script = postCreationScript[0]
s.Script = postCreationScript

// because the postCreationScript may have altered PATH and other things
// that subsequent RunCmd may rely on, clear the client
Expand Down Expand Up @@ -727,15 +727,29 @@ func (p *Provider) HeadNode() *Server {

// LocalhostServer returns a Server object with details of the host we are
// currently running on. No cloud API calls are made to construct this.
func (p *Provider) LocalhostServer(os string, postCreationScript []byte, configFiles string) (*Server, error) {
func (p *Provider) LocalhostServer(os string, postCreationScript []byte, configFiles string, cidr ...string) (*Server, error) {
maxRAM, err := internal.ProcMeminfoMBs()
if err != nil {
return nil, err
}

diskSize := internal.DiskSize()

ip, err := internal.CurrentIP(cidr[0])
if err != nil {
return nil, err
}

user, err := internal.Username()
if err != nil {
return nil, err
}

return &Server{
IP: "127.0.0.1",
Name: "localhost",
IP: ip,
OS: os,
UserName: user,
Script: postCreationScript,
ConfigFiles: configFiles,
Flavor: &Flavor{
Expand Down
34 changes: 15 additions & 19 deletions cloud/openstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func (p *openstackp) requiredEnv() []string {
return openstackReqEnvs[:]
}

// maybedEnv returns envs that might be required.
// maybeEnv returns envs that might be required.
func (p *openstackp) maybeEnv() []string {
return openstackMaybeEnvs[:]
}
Expand Down Expand Up @@ -174,25 +174,10 @@ func (p *openstackp) initialize(logger log15.Logger) error {
return err
}

// get the external network id
p.externalNetworkID, err = networks.IDFromName(p.networkClient, p.poolName)
if err != nil {
return err
}

// get the details of all the possible server flavors
// flavors and images are retrieved on-demand via caching methods that store
// in these maps
p.fmap = make(map[string]*Flavor)
err = p.cacheFlavors()
if err != nil {
return err
}

// get the details of all active images
p.imap = make(map[string]*images.Image)
err = p.cacheImages()
if err != nil {
return err
}

// to get a reasonable new server timeout we'll keep track of how long it
// takes to spawn them using an exponentially weighted moving average. We
Expand Down Expand Up @@ -515,6 +500,14 @@ func (p *openstackp) deploy(resources *Resources, requiredPorts []int, gatewayIP
return err
}
if routerID == "" {
// get the external network id
if p.externalNetworkID == "" {
p.externalNetworkID, err = networks.IDFromName(p.networkClient, p.poolName)
if err != nil {
return err
}
}

var router *routers.Router
router, err = routers.Create(p.networkClient, routers.CreateOpts{
Name: resources.ResourceName,
Expand Down Expand Up @@ -617,7 +610,10 @@ func (p *openstackp) flavors() map[string]*Flavor {
}
p.fmapMutex.RLock()
}
fmap := p.fmap
fmap := make(map[string]*Flavor)
for key, val := range p.fmap {
fmap[key] = val
}
p.fmapMutex.RUnlock()
return fmap
}
Expand Down
Loading

0 comments on commit 6de8da6

Please sign in to comment.