Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Network: support down NIC #130

Merged
merged 23 commits into from
Mar 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cmd/attack/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func NewNetworkAttackCommand(uid *string) *cobra.Command {
NetworkDNSCommand(dep, options),
NewNetworkPortOccupiedCommand(dep, options),
NewNetworkBandwidthCommand(dep, options),
NewNICDownCommand(dep, options),
)

return cmd
Expand Down Expand Up @@ -268,3 +269,20 @@ func NewNetworkPortOccupiedCommand(dep fx.Option, options *core.NetworkCommand)
cmd.Flags().StringVarP(&options.Port, "port", "p", "", "this specified port is to occupied")
return cmd
}

func NewNICDownCommand(dep fx.Option, options *core.NetworkCommand) *cobra.Command {
cmd := &cobra.Command{
Use: "down",
Short: "down network interface card",

Run: func(cmd *cobra.Command, args []string) {
options.Action = core.NetworkNICDownAction
options.CompleteDefaults()
utils.FxNewAppWithoutLog(dep, fx.Invoke(commonNetworkAttackFunc)).Run()
},
}

cmd.Flags().StringVarP(&options.Device, "device", "d", "", "the network interface to impact")
SetScheduleFlags(cmd, &options.SchedulerConfig)
return cmd
}
15 changes: 15 additions & 0 deletions pkg/core/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ const (
NetworkPartitionAction = "partition"
NetworkBandwidthAction = "bandwidth"
NetworkPortOccupiedAction = "occupied"
NetworkNICDownAction = "down"
)

func (n *NetworkCommand) Validate() error {
Expand All @@ -89,6 +90,8 @@ func (n *NetworkCommand) Validate() error {
return n.validNetworkOccupied()
case NetworkBandwidthAction:
return n.validNetworkBandwidth()
case NetworkNICDownAction:
return n.validNetworkNICDown()
default:
return errors.Errorf("network action %s not supported", n.Action)
}
Expand Down Expand Up @@ -203,6 +206,18 @@ func (n *NetworkCommand) validNetworkOccupied() error {
return nil
}

func (n *NetworkCommand) validNetworkNICDown() error {
if len(n.Duration) == 0 {
return errors.New("duration is required")
}

if len(n.Device) == 0 {
return errors.New("device is required")
}

return nil
}

func (n *NetworkCommand) CompleteDefaults() {
switch n.Action {
case NetworkDelayAction:
Expand Down
71 changes: 71 additions & 0 deletions pkg/server/chaosd/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@ func (networkAttack) Attack(options core.AttackConfig, env Environment) (err err
return errors.WithStack(err)
}
}

case core.NetworkNICDownAction:
if err := env.Chaos.getNICIP(attack); err != nil {
return errors.WithStack(err)
}

NICDownCommand := fmt.Sprintf("ifconfig %s down", attack.Device)

cmd := exec.Command("bash", "-c", NICDownCommand)
_, err := cmd.CombinedOutput()
if err != nil {
return errors.WithStack(err)
}

if attack.Duration != "-1" {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this should be len(attack.Duration) != 0?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

len(attack.Duration) has been checked before this function, -1 means attack will run continuously. It will be marked in doc.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not check this field before executing this attack?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field has been checked for necessity before executing, if len(n.Duration) == 0 { return errors.New("duration is required") }, the judgement here is only to decide whether to run continuously.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember you add a note that the attack.Duration is required, but if users set it to -1, this attack won't be recovered automatically. This behavior is very dangerous. Can we forbid users to set it to -1?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, this -1 option is added on purpose, because I think some user may need this option to let the experiment run continuously, especially those who run NIC down locally rather than remotely. And attack.Duration is hardly ever set to negative numbers, I think -1 is enough to be a warning sign (maybe -2333 or -32768 is better).

err := env.Chaos.recoverNICDownScheduled(attack)
return errors.WithStack(err)
}
}

return nil
Expand Down Expand Up @@ -348,6 +366,8 @@ func (networkAttack) Recover(exp core.Experiment, env Environment) error {
return errors.WithStack(err)
}
}
case core.NetworkNICDownAction:
return env.Chaos.recoverNICDown(attack)
}
return nil
}
Expand Down Expand Up @@ -511,3 +531,54 @@ func (s *Server) recoverEtcHosts(attack *core.NetworkCommand, uid string) error
}
return nil
}

func (s *Server) recoverNICDown(attack *core.NetworkCommand) error {
NICUpCommand := fmt.Sprintf("ifconfig %s %s up", attack.Device, attack.IPAddress)

recoverCmd := exec.Command("bash", "-c", NICUpCommand)
_, err := recoverCmd.CombinedOutput()
if err != nil {
return errors.WithStack(err)
}

return nil
}

func (s *Server) recoverNICDownScheduled(attack *core.NetworkCommand) error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this way, we need to set the experiment's status to Destroyed.

I think it's better to refine the schedule framework, and let it recover automatically by the schedule framework.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can refine the schedule framework in next pr

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this solution is temporary, real schedule is need, especially for some dangerous experiments.

NICUpCommand := fmt.Sprintf("sleep %s && ifconfig %s %s up", attack.Duration, attack.Device, attack.IPAddress)

recoverCmd := exec.Command("bash", "-c", NICUpCommand)
_, err := recoverCmd.CombinedOutput()
if err != nil {
return errors.WithStack(err)
}
return nil
}

// getNICIP() uses `ifconfig` to get interfaces' IP. The reason for
// not using net.Interfaces() is that net.Interfaces() can't get
// sub interfaces.
func (s *Server) getNICIP(attack *core.NetworkCommand) error {
getIPCommand := fmt.Sprintf("ifconfig %s | awk '/inet\\>/ {print $2}'", attack.Device)

cmd := exec.Command("bash", "-c", getIPCommand)
stdout, err := cmd.StdoutPipe()
if err != nil {
return errors.WithStack(err)
}

if err = cmd.Start(); err != nil {
return errors.WithStack(err)
}

stdoutBytes := make([]byte, 1024)
_, err = stdout.Read(stdoutBytes)
if err != nil {
return errors.WithStack(err)
}
// When stdoutBytes is converted to string, the string will be IPAddress with a few unnecessary
// zeros, which makes IPAddress' format wrong, so the trailing zeros needs to be trimmed.
attack.IPAddress = strings.TrimRight(string(stdoutBytes), "\n\x00")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add some comments to explain why need to trim \x00

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DONE IT!


return nil
}
53 changes: 53 additions & 0 deletions test/integration_test/network/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash

# Copyright 2022 Chaos Mesh Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

set -u

cur=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
cd $cur

bin_path=../../../bin

# test nic down
nic=$(cat /proc/net/dev | awk '{i++; if(i>2){print $1}}' | sed 's/^[\t]*//g' | sed 's/[:]*$//g' | head -n 1)

ifconfig ${nic}:0 192.168.10.28 up

test_nic=$(ifconfig | grep ${nic}:0 | sed 's/:0:.*/:0/g')
if [[ test_nic == "" ]]; then
echo "create test nic failed"
exit 1
fi

${bin_path}/chaosd attack network down -d ${test_nic} --duration 1s
stat=$(ifconfig | grep ${test_nic} | sed 's/:0:.*/:0/g')

if [[ -n ${stat} ]]; then
echo "fail to disable the test nic"
ifconfig ${test_nic} down
exit 1
fi

sleep 1s

stat=$(ifconfig | grep ${test_nic} | sed 's/:0:.*/:0/g')
if [[ ${stat} != ${test_nic} ]]; then
echo "fail to enable the test nic"
exit 1
fi

ifconfig ${test_nic} down

exit 0