#Aggregation Function
#The common aggregate functions
#AVG / SUM only applies to fields of numeric types (variables)
SELECT AVG(salary),SUM(salary),AVG(salary)*107
FROM employees;
#The following operations are meaningless
SELECT SUM(last_name),AVG(last_name),SUM(hire_date)
FROM employees;
#MAX / MIN fields (variables) for numeric types, string types, and date types
SELECT MAX(salary),MIN(salary)
FROM employees;
SELECT MAX(last_name),MAX(hire_date)
FROM employees;
#COUNT
#1. Function: Calculate the number of specified fields appearing in the query result (not including null values)
SELECT COUNT(employee_id),COUNT(salary),COUNT(2*salary),COUNT(1),COUNT(2)
FROM employees;
SELECT *
FROM employees;
#How to implement it if there are many records in the calculation table
#count(*)
#count(1)
#count(specific field): Not necessarily correct!
#2. Note: When calculating the number of occurrences of the specified field, the NULL value is not calculated.
SELECT COUNT(commission_pct)
FROM employees;
SELECT commission_pct
FROM employees
WHERE commission_pct IS NOT NULL;
#3. Formula: AVG=SUM/COUNT
SELECT AVG(salary),SUM(salary)/COUNT(salary),
AVG(commission_pct),SUM(commission_pct)/COUNT(commission_pct),
SUM(commission_pct)/107
FROM employees;
#Requirements: Check the average bonus rate in the company
SELECT AVG(commission_pct)#Error de
FROM employees;
#Real
SELECT SUM(commission_pct) / COUNT(IFNULL(commission_pct,0)),AVG(IFNULL(commission_pct,0))
FROM employees;
#How to count the number of records in the table, use count(*), count(1), count(specific fields)
#If you are using MyISAM storage engine, the third party's efficiency is the same, all O(1)
#If you are using the InnoDB storage engine, third-party efficiency: count(*)=count(1)>count(field)
#Variance, Standard Deviation, Median
Use of #GROUP BY
#Requirements: Check the average salary of each department, maximum salary
SELECT department_id,AVG(salary),MAX(salary)
FROM employees
GROUP BY department_id;
#Requirements: Check the average salary of each job_id, maximum salary
SELECT job_id,AVG(salary)
FROM employees
GROUP BY job_id;
#Requirements: Query the average salary of each department_id, job_id, maximum salary
SELECT department_id,job_id,AVG(salary)
FROM employees
GROUP BY department_id,job_id;
#Conclusion 1: The non-group functions that appear in SELECT must be declared in GROUP BY.
# On the contrary, the fields declared in GROUP BY may not appear in the select
#Conclusion 2: GROUP BY statement is behind from, behind WHERE, before ORDER BY, before LIMIT
#Conclusion 3: GROUP BY uses WITH ROLLUP in MySQL
SELECT department_id,AVG(salary)
FROM employees
GROUP BY department_id WITH ROLLUP;
#Requirements: Check the average salary of each department and arrange it in order of average salary
SELECT department_id,AVG(salary) avg_sal
FROM employees
GROUP BY department_id
ORDER BY avg_sal ASC;
#Explanation When using ROLLUP, the ORDER BY clause cannot be used to sort the result at the same time, that is, ROLLUP and ORDER BY are mutually exclusive.
#HAVING (used to filter data)
#Exercise: Check the information of departments with a maximum salary of 10,000 in each department
#Requirement 1: If an aggregate function is used in the filter function, HAVING must be used to replace where. Otherwise, an error is reported
#Requirement 2: HAVING must be declared behind GROUP By
SELECT department_id,MAX(salary)
FROM employees
GROUP BY department_id
HAVING MAX(salary) >10000;
#In development, the premise of using HAVING is that GROUP BY is used in SQL
##Exercise: Query the department information with a maximum salary of 10,000 among the four departments with a department id of 10,20,30,40
#Method 1 Recommended High execution efficiency
SELECT department_id,MAX(salary)
FROM employees
WHERE department_id IN (10,20,30,40)
GROUP BY department_id
HAVING MAX(salary) >10000;
#Method 2
SELECT department_id,MAX(salary)
FROM employees
GROUP BY department_id
HAVING MAX(salary) >10000 AND department_id IN (10,20,30,40);
#Conclusion: When there is an aggregate function in the filter condition, this filter condition must be declared in HAVING
# When there is no aggregate function in the filtering function, this filter condition declaration is OK in WHERE or HAVING. It is recommended to declare it in WHERE.
/*
Comparison between WHERE and HAVING
1. In terms of scope of application, HAVING is more extensive
2. If there is an aggregate function in the filtering condition, WHERE is more efficient
*/
#The underlying execution principle of SQL
#1. The complete structure of SELECT statements
/*
SELECT ......,...,...(There is an aggregate function)
FROM ......,...
SQL92 syntax:
SELECT ....,...,...(The aggregate function exists)
FROM ......,.........
WHERE Join conditions for multi-table AND filter conditions for not containing aggregate functions
GROUP BY ...,...
HAVING Filter conditions containing aggregate functions
ORDER BY ...,...(ASC/ DESC)
SQL99 syntax:
SELECT ......,...,...(There is an aggregate function)
FROM ......,...
SQL92 syntax:
SELECT ....,...,...(The aggregate function exists)
FROM ....JOIN(LEFT/RIGHT)..ON Multi-table connection conditions
(LEFT/RIGHT)JOIN... ON...
WHERE does not contain filtering conditions for aggregate functions
GROUP BY ...,...
HAVING Filter conditions containing aggregate functions
ORDER BY ...,...(ASC/ DESC)
LIMIT ...,...
*/
#2. The execution process of SQL statements
#FROM ...,...-> ON ->(LEFT / RIGHT JOIN) -> WHERE -> GROUP BY -> HAVING -> SELECT -> DISTINCT
# ->ORDER BY -> LIMIT