diff --git a/docs/source/AdministratorGuide/Systems/MonitoringSystem/index.rst b/docs/source/AdministratorGuide/Systems/MonitoringSystem/index.rst index a5091e9e2c6..4e4b5fb5374 100644 --- a/docs/source/AdministratorGuide/Systems/MonitoringSystem/index.rst +++ b/docs/source/AdministratorGuide/Systems/MonitoringSystem/index.rst @@ -23,7 +23,8 @@ Install Elasticsearch ====================== This is not covered here, as installation and administration of ES are not part of DIRAC guide. -Just a note on the ES versions supported: ES7 and ES6 are supported, the support for ES5 is not assured. +Just a note on the ES versions supported: ES7 and ES6 are supported, the support for ES5 is not assured, +and the one for ES6 will be dropped in a future release. Configure the MonitoringSystem =============================== @@ -35,7 +36,7 @@ You can run your Elastic cluster even without authentication, or using User name - Host - Port -The User name and Password must be added to the local cfg file while the other can be added to the CS using the Configuration web application. +The *User* name and *Password* must be added to the local cfg file while the other can be added to the CS using the Configuration web application. You have to handle the ES secret information in a similar way to what is done for the other supported SQL databases, e.g. MySQL @@ -54,7 +55,7 @@ For example:: The following option can be set in `Systems/Monitoring//Databases/MonitoringDB`: *IndexPrefix*: Prefix used to prepend to indexes created in the ES instance. If this - is not present in the CS, the indexes are prefixed with the setup name. + is not present in the CS, the indices are prefixed with the setup name. For each monitoring types managed, the Period (how often a new index is created) can be defined with:: @@ -84,9 +85,9 @@ The given periods above are also the default periods in the code. Enable WMSHistory monitoring ============================ -You have to install the WorkloadManagemet/StatesMonitoringAgent. -This agent is used to collect information using the JobDB and send it to the Elasticsearch database. -If you install this agent, you can stop the StatesAccounting agent, that was reporting to the MySQL backend of the Accounting system. +You have to add ``Monitoring`` to the ``Backends`` option of WorkloadManagemet/StatesAccountingAgent. +If you do so, this agent will collect information using the JobDB and send it to the Elasticsearch database. +This same agent can also report to the MySQL backend of the Accounting system (which is in fact the default). You can use RabbitMQ for failover. This is optional as the agent already has a failover mechanism. You can configure RabbitMQ in the local dirac.cfg file where the agent is running:: diff --git a/src/DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py b/src/DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py index 9349e479cf7..bfb4d0c01c9 100644 --- a/src/DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py +++ b/src/DIRAC/WorkloadManagementSystem/Agent/StatesAccountingAgent.py @@ -35,12 +35,12 @@ class StatesAccountingAgent(AgentModule): 'UserGroup', 'JobGroup', 'JobType', - 'ApplicationStatus', - 'MinorStatus'] + 'ApplicationStatus', + 'MinorStatus'] __summaryDefinedFields = [('ApplicationStatus', 'unset'), - ('MinorStatus', 'unset')] + ('MinorStatus', 'unset')] __summaryValueFieldsMapping = ['Jobs', - 'Reschedules'] + 'Reschedules'] __renameFieldsMapping = {'JobType': 'JobSplitType'} def initialize(self): @@ -58,8 +58,8 @@ def initialize(self): self.datastores['Accounting'] = DataStoreClient(retryGraceTime=900) if 'Monitoring' in self.backends: self.datastores['Monitoring'] = MonitoringReporter( - monitoringType="WMSHistory", - failoverQueueName=messageQueue) + monitoringType="WMSHistory", + failoverQueueName=messageQueue) self.__jobDBFields = [] for field in self.__summaryKeyFieldsMapping: @@ -88,36 +88,36 @@ def execute(self): record = record[1:] rD = {} for fV in self.__summaryDefinedFields: - rD[fV[0]] = fV[1] + rD[fV[0]] = fV[1] for iP in range(len(self.__summaryKeyFieldsMapping)): - fieldName = self.__summaryKeyFieldsMapping[iP] - rD[self.__renameFieldsMapping.get(fieldName, fieldName)] = record[iP] + fieldName = self.__summaryKeyFieldsMapping[iP] + rD[self.__renameFieldsMapping.get(fieldName, fieldName)] = record[iP] record = record[len(self.__summaryKeyFieldsMapping):] for iP in range(len(self.__summaryValueFieldsMapping)): - rD[self.__summaryValueFieldsMapping[iP]] = int(record[iP]) + rD[self.__summaryValueFieldsMapping[iP]] = int(record[iP]) for backend in self.datastores: - if backend.lower() == 'monitoring': - rD['timestamp'] = int(Time.toEpoch(now)) - self.datastores['Monitoring'].addRecord(rD) - - elif backend.lower() == 'accounting': - acWMS = WMSHistory() - acWMS.setStartTime(now) - acWMS.setEndTime(now) - acWMS.setValuesFromDict(rD) - retVal = acWMS.checkValues() + if backend.lower() == 'monitoring': + rD['timestamp'] = int(Time.toEpoch(now)) + self.datastores['Monitoring'].addRecord(rD) + + elif backend.lower() == 'accounting': + acWMS = WMSHistory() + acWMS.setStartTime(now) + acWMS.setEndTime(now) + acWMS.setValuesFromDict(rD) + retVal = acWMS.checkValues() if not retVal['OK']: - self.log.error("Invalid accounting record ", "%s -> %s" % (retVal['Message'], rD)) + self.log.error("Invalid accounting record ", "%s -> %s" % (retVal['Message'], rD)) else: - self.datastores['Accounting'].addRegister(acWMS) + self.datastores['Accounting'].addRegister(acWMS) for backend, datastore in self.datastores.items(): self.log.info("Committing to %s backend" % backend) result = datastore.commit() if not result['OK']: - self.log.error("Couldn't commit WMS history to %s" % backend, result['Message']) - return S_ERROR() + self.log.error("Couldn't commit WMS history to %s" % backend, result['Message']) + return S_ERROR() self.log.verbose("Done committing to %s backend" % backend) return S_OK() diff --git a/src/DIRAC/WorkloadManagementSystem/Agent/StatesMonitoringAgent.py b/src/DIRAC/WorkloadManagementSystem/Agent/StatesMonitoringAgent.py index f7ddad71bb3..e756bb7eae8 100644 --- a/src/DIRAC/WorkloadManagementSystem/Agent/StatesMonitoringAgent.py +++ b/src/DIRAC/WorkloadManagementSystem/Agent/StatesMonitoringAgent.py @@ -40,7 +40,7 @@ class StatesMonitoringAgent(AgentModule): 'ApplicationStatus', 'MinorStatus'] __summaryDefinedFields = [('ApplicationStatus', 'unset'), - ('MinorStatus', 'unset')] + ('MinorStatus', 'unset')] __summaryValueFieldsMapping = ['Jobs', 'Reschedules'] __renameFieldsMapping = {'JobType': 'JobSplitType'} @@ -60,8 +60,8 @@ def initialize(self): self.messageQueue = self.am_getOption('MessageQueue', 'dirac.wmshistory') self.monitoringReporter = MonitoringReporter( - monitoringType="WMSHistory", - failoverQueueName=self.messageQueue) + monitoringType="WMSHistory", + failoverQueueName=self.messageQueue) for field in self.__summaryKeyFieldsMapping: if field == 'User': @@ -100,8 +100,8 @@ def execute(self): retVal = self.monitoringReporter.commit() if retVal['OK']: - self.log.info("Records sent", "(%s)" % result['Value']) + self.log.info("Records sent", "(%s)" % result['Value']) else: - self.log.error("Failed to insert the records, it will be retried in the next iteration", retVal['Message']) + self.log.error("Failed to insert the records, it will be retried in the next iteration", retVal['Message']) return S_OK() diff --git a/tests/Jenkins/utilities.sh b/tests/Jenkins/utilities.sh index 8e1bca3218f..b1d3731fa95 100644 --- a/tests/Jenkins/utilities.sh +++ b/tests/Jenkins/utilities.sh @@ -486,12 +486,12 @@ generateCA() { # Generate the CA certificate openssl req -config openssl_config_ca.cnf \ - -key ca.key.pem \ - -new -x509 \ - -days 7300 \ - -sha256 \ - -extensions v3_ca \ - -out ca.cert.pem + -key ca.key.pem \ + -new -x509 \ + -days 7300 \ + -sha256 \ + -extensions v3_ca \ + -out ca.cert.pem # Copy the CA to the list of trusted CA cp ca.cert.pem "${SERVERINSTALLDIR}/etc/grid-security/certificates/" @@ -864,7 +864,7 @@ diracAgents(){ python "${TESTCODE}/DIRAC/tests/Jenkins/dirac-cfg-add-option.py" "agent" "$agent" echo "==> calling dirac-agent $agent -o MaxCycles=1 ${DEBUG}" if ! dirac-agent "$agent" -o MaxCycles=1 "${DEBUG}"; then - echo 'ERROR: dirac-agent failed' >&2 + echo 'ERROR: dirac-agent failed' >&2 exit 1 fi fi diff --git a/tests/System/random_files_creator.sh b/tests/System/random_files_creator.sh index dbae937b4ee..02f42413eef 100755 --- a/tests/System/random_files_creator.sh +++ b/tests/System/random_files_creator.sh @@ -53,14 +53,14 @@ if [[ "${#}" -gt 0 ]]; then -p=*|--Path=*) temporaryPath="${i#*=}" - if [[ ! -d "${temporaryPath}" ]]; then - mkdir -p "${temporaryPath}" + if [[ ! -d "${temporaryPath}" ]]; then + mkdir -p "${temporaryPath}" fi shift # past argument=value ;; *) - echo -e "${helpmessage}" + echo -e "${helpmessage}" exit 0 # unknown option ;;