PBS是一个批处理作业和计算机系统资源管理软件包,它就可以接受批处理作业、shell脚本和控制属性,作业运行前对其储存并保护,然后运行作业,并且把输出转发回提交者。PBS可以被安装并配置运行在单机系统或多个系统组来支持作业处理。由于PBS的灵活性,多个系统可以以多种方式组合。
pbsnodes: Server has no node list MSG=node list is empty - check 'server_priv/nodes' file
./configure --prefix=/usr/local/torque-4.2.3.1
make
make install
make packages
#Add by myself
export TORQUE=/usr/local/torque-4.2.3.1
export LD_LIBRARY_PATH=$TORQUE/lib
export PATH=$TORQUE/bin:$TORQUE/sbin:$PATH
cp trqauthd pbs_sched pbs_server /etc/init.d/
cd /etc/init.d
chkconfig --add trqauthd
chkconfig --add pbs_sched
chkconfig --add pbs_server
vi /etc/hosts
192.168.1.100 sdu.master
当然IP要根据你的IP变了,应该用的是集群的内网IP,等我来验证
PBS_Server: LOG_ERROR::pbsd_main, unable to determine local server hostname - getaddrinfo(sdu.master) failed, h_errno=1
cd /usr/local/src/torque-4.2.3.1
./torque.setup root
qterm
service pbs_server start
service pbs_sched restart
qmgr -c "set server auto_node_np = True"
sdu.01
sdu.02
sdu.03
sdu.04
sdu.11
sdu.12
sdu.13
sdu.14
./torque-package-clients-linux-x86_64.sh --install
./torque-package-mom-linux-x86_64.sh --install
scp contrib/init.d/pbs_mom [email protected]:/etc/init.d/pbs_mom
chkconfig --add pbs_mom
vi /var/spool/torque/server_name
sdu.master
#当然主机名称要改成你的了
service trqauthd start
service pbs_mom start
chmod 1777 /var/spool/torque/spool
mkdir sched_jobs
chmod 1777 sched_jobs/
pbsnodes -a
sdu.01
state = free
np = 32
ntype = cluster
status = rectime=1372722492,varattr=,jobs=,state=free,netload=415055,gres=,loadave=0.00,ncpus=32,physmem=32858136kb,availmem=66923828kb,totmem=67952656kb,idletime=1673,nusers=1,nsessions=6,sessions=3516 3520 3522 3548 3550 3575,uname=Linux sdu.01 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
sdu.02
state = free
np = 32
ntype = cluster
status = rectime=1372722471,varattr=,jobs=,state=free,netload=1004261,gres=,loadave=0.00,ncpus=32,physmem=32858136kb,availmem=66920092kb,totmem=67952656kb,idletime=1651,nusers=1,nsessions=6,sessions=3521 3525 3527 3554 3556 3582,uname=Linux sdu.02 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
sdu.03
state = free
np = 32
ntype = cluster
status = rectime=1372722480,varattr=,jobs=,state=free,netload=1033615,gres=,loadave=0.00,ncpus=32,physmem=32858140kb,availmem=66919464kb,totmem=67952660kb,idletime=1637,nusers=1,nsessions=6,sessions=3482 3486 3488 3513 3516 3539,uname=Linux sdu.03 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
sdu.04
state = free
np = 32
ntype = cluster
status = rectime=1372722499,varattr=,jobs=,state=free,netload=1075188,gres=,loadave=0.00,ncpus=32,physmem=32858136kb,availmem=66916928kb,totmem=67952656kb,idletime=1679,nusers=1,nsessions=6,sessions=3487 3491 3493 3519 3521 3546,uname=Linux sdu.04 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
sdu.11
state = free
np = 32
ntype = cluster
status = rectime=1372722498,varattr=,jobs=,state=free,netload=1052504,gres=,loadave=0.00,ncpus=32,physmem=32858136kb,availmem=66920564kb,totmem=67952656kb,idletime=1679,nusers=1,nsessions=6,sessions=3481 3485 3487 3514 3516 3541,uname=Linux sdu.11 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
sdu.12
state = free
np = 32
ntype = cluster
status = rectime=1372722459,varattr=,jobs=,state=free,netload=977840,gres=,loadave=0.00,ncpus=32,physmem=32858136kb,availmem=66916144kb,totmem=67952656kb,idletime=1637,nusers=1,nsessions=6,sessions=3617 3621 3623 3648 3651 3677,uname=Linux sdu.12 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
sdu.13
state = free
np = 32
ntype = cluster
status = rectime=1372722501,varattr=,jobs=,state=free,netload=1083084,gres=,loadave=0.00,ncpus=32,physmem=32858136kb,availmem=66922812kb,totmem=67952656kb,idletime=1682,nusers=1,nsessions=6,sessions=3501 3505 3507 3533 3538 3560,uname=Linux sdu.13 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
sdu.14
state = free
np = 32
ntype = cluster
status = rectime=1372722502,varattr=,jobs=,state=free,netload=1115089,gres=,loadave=0.00,ncpus=32,physmem=32858136kb,availmem=66922072kb,totmem=67952656kb,idletime=1682,nusers=1,nsessions=6,sessions=3496 3500 3502 3527 3530 3555,uname=Linux sdu.14 2.6.32-131.0.15.el6.x86_64 #1 SMP Tue May 10 15:42:40 EDT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003