oracle-analysis 痛定思痛。 2023-10-18 16:02 97阅读 0赞 #### oracle-analysis #### CREATE TABLE EMPLOY ( NAME VARCHAR2(10), --姓名 DEPT VARCHAR2(10), --部门 SALARY NUMBER --工资 ); INSERT INTO EMPLOY VALUES ('张三','市场部',4000); INSERT INTO EMPLOY VALUES ('赵红','技术部',2000); INSERT INTO EMPLOY VALUES ('李四','市场部',5000); INSERT INTO EMPLOY VALUES ('李白','技术部',5000); INSERT INTO EMPLOY VALUES ('王五','市场部',NULL); INSERT INTO EMPLOY VALUES ('王蓝','技术部',4000); SELECT ROW_NUMBER() OVER(ORDER BY SALARY) AS 序号, NAME AS 姓名, DEPT AS 部门, SALARY AS 工资 FROM EMPLOY; /* 查询结果如下: 序号 姓名 部门 工资 1 赵红 技术部 2000 2 张三 市场部 4000 3 王蓝 技术部 4000 4 李四 市场部 5000 5 李白 技术部 5000 6 王五 市场部 (null) */ SELECT ROW_NUMBER() OVER( ORDER BY DEPT) AS 序号, NAME AS 姓名, DEPT AS 部门, SALARY AS 工资 FROM EMPLOY; /* 如果不在over中加入限制分组/作用域,那就是默认整个集合 ROW_NUMBER() 从下面的序号一列中可以看出 1 赵红 技术部 2000 2 李白 技术部 5000 3 王蓝 技术部 4000 4 王五 市场部 5 张三 市场部 4000 6 李四 市场部 5000 */ SELECT ROW_NUMBER() OVER(PARTITION BY dept ORDER BY DEPT) AS 序号, NAME AS 姓名, DEPT AS 部门, SALARY AS 工资 FROM EMPLOY; /* 加上PARTITION BY dept 根据部门重新排序 1 赵红 技术部 2000 2 李白 技术部 5000 3 王蓝 技术部 4000 1 王五 市场部 2 张三 市场部 4000 3 李四 市场部 5000 */ SELECT ROW_NUMBER() OVER(ORDER BY dept,SALARY NULLS first) AS 序号, NAME AS 姓名, DEPT AS 部门, SALARY AS 工资, round(AVG(SALARY) OVER(PARTITION BY dept),2) AS 部门平均工资, AVG(SALARY) OVER() AS 公司平均工资 FROM EMPLOY; /* 加上nulls first后可以使得null的数据排在前面, 因为这里加上了(AVG(SALARY) OVER(PARTITION BY dept),2) 但也是排在部门的前面 1 赵红 技术部 2000 3666.67 4000 2 王蓝 技术部 4000 3666.67 4000 3 李白 技术部 5000 3666.67 4000 4 王五 市场部 4500 4000 5 张三 市场部 4000 4500 4000 6 李四 市场部 5000 4500 4000 */ SELECT ROW_NUMBER() OVER(ORDER BY SALARY DESC NULLS FIRST) AS RN, RANK() OVER(ORDER BY SALARY DESC NULLS FIRST) AS RK, DENSE_RANK() OVER(ORDER BY SALARY DESC NULLS FIRST) AS D_RK, NAME AS 姓名, DEPT AS 部门, SALARY AS 工资 FROM EMPLOY; /* rank函数的作用是排名,如果有并列的名次那么会直接跳到下一名,但是DENSE_RANK就不会了,并列的名次,下面的名次依旧只是加一 查询结果如下: RN RK D_RK 姓名 部门 工资 1 1 1 王五 市场部 (null) 2 2 2 李四 市场部 5000 3 2 2 李白 技术部 5000 4 4 3 张三 市场部 4000 5 4 3 王蓝 技术部 4000 6 6 4 赵红 技术部 2000 */ SELECT NAME AS 姓名, SALARY AS 工资, SUM(SALARY) OVER(ORDER BY SALARY NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS 小于本人工资的总额, SUM(SALARY) OVER(ORDER BY SALARY NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS 大于本人工资的总额, SUM(SALARY) OVER(ORDER BY SALARY NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS 工资总额1, SUM(SALARY) OVER() AS 工资总额2 FROM EMPLOY; DELETE FROM EMPLOY; INSERT INTO EMPLOY VALUES ('张三','市场部',2000); INSERT INTO EMPLOY VALUES ('赵红','技术部',2400); INSERT INTO EMPLOY VALUES ('李四','市场部',3000); INSERT INTO EMPLOY VALUES ('李白','技术部',3200); INSERT INTO EMPLOY VALUES ('王五','市场部',4000); INSERT INTO EMPLOY VALUES ('王蓝','技术部',5000); SELECT NAME AS 姓名, DEPT AS 部门, SALARY AS 工资, FIRST_VALUE(SALARY IGNORE NULLS) OVER(PARTITION BY DEPT) AS 部门最低工资, NTH_VALUE(SALARY, 2) OVER(PARTITION BY DEPT) AS 部门倒数第二工资, LAST_VALUE(SALARY RESPECT NULLS) OVER(PARTITION BY DEPT) AS 部门最高工资, SUM(SALARY) OVER(ORDER BY SALARY ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS "ROWS", SUM(SALARY) OVER(ORDER BY SALARY RANGE BETWEEN 500 PRECEDING AND 500 FOLLOWING) AS "RANGE" FROM EMPLOY; /* rows查询结果 查询结果如下: 姓名 工资 小于本人工资的总额 大于本人工资的总额 工资总额1 工资总额2 王五 (null) (null) 20000 20000 20000 赵红 2000 2000 20000 20000 20000 张三 4000 6000 18000 20000 20000 王蓝 4000 10000 14000 20000 20000 李四 5000 15000 10000 20000 20000 李白 5000 20000 5000 20000 20000 ROWS BETWEEN <上限条件> AND <下限条件> 其中“上限条件”可以是如下关键字: UNBOUNDED PRECEDING <number> PRECEDING CURRENT ROW “下线条件”可以是如下关键字: CURRENT ROW <number> FOLLOWING UNBOUNDED FOLLOWING 以上关键字都是相对当前行的,UNBOUNDED PRECEDING表示当前行前面的所有行,也就是说没有上限;<number> PRECEDING表示从当前行开始到它前面的<number>行为止,例如,number=2,表示的是当前行前面的2行;CURRENT ROW表示当前行 range查询结果 查询结果如下: 姓名 部门 工资 部门最低工资 部门倒数第二工资 部门最高工资 ROWS RANGE 张三 市场部 2000 2000 3000 4000 4400 4400 赵红 技术部 2400 3200 5000 2400 7400 4400 李四 市场部 3000 2000 3000 4000 8600 6200 李白 技术部 3200 3200 5000 2400 10200 6200 王五 市场部 4000 2000 3000 4000 12200 4000 王蓝 技术部 5000 3200 5000 2400 9000 5000 上面SQL的RANGE子句的作用是定义一个工资范围,这个范围的上限是当前行的工资-500,下限是当前行工资+500。例如:李四的工资是3000,所以上限是3000-500=2500,下限是3000+500=3500,那么有谁的工资在2500-3500这个范围呢?只有李四和李白,所以RANGE列的值就是3000(李四)+3200(李白)=6200。以上就是ROWS和RANGE得区别。 上面的 SQL 还用到了FIRST_VALUE,NTH_VALUE 和 LAST_VALUE 三个函数,它们的作用也非常简单,用来求OVER定义集合的最小值,第 n 行的值和最大值。值得注意的是这两个函数有个关键字,IGNORE NULLS 或 RESPECT NULLS,它们的作用正如它们的名字一样,用来忽略NULL值和考虑NULL值。 */ SELECT NAME AS 姓名, SALARY AS 工资, LAG(SALARY,0) OVER(ORDER BY SALARY) AS LAG0, LAG(SALARY) OVER(ORDER BY SALARY) AS LAG1, LAG(SALARY,2) OVER(ORDER BY SALARY) AS LAG2, LAG(SALARY,3 ,0) IGNORE NULLS OVER(ORDER BY SALARY) AS LAG3, LAG(SALARY,4, -1) RESPECT NULLS OVER(ORDER BY SALARY) AS LAG4, LEAD(SALARY) OVER(ORDER BY SALARY) AS LEAD FROM EMPLOY; /* 姓名 工资 LAG0 LAG1 LAG2 LAG3 LAG4 LEAD 张三 2000 2000 (null) (null) 0 -1 2400 赵红 2400 2400 2000 (null) 0 -1 3000 李四 3000 3000 2400 2000 0 -1 3200 李白 3200 3200 3000 2400 2000 -1 4000 王五 4000 4000 3200 3000 2400 2000 5000 王蓝 5000 5000 4000 3200 3000 2400 (null) 解释: LAG(表达式或字段,偏移量, 默认值) IGNORE NULLS或RESPECT NULLS LAG是向下偏移,LEAD是向上偏移 */ CREATE TABLE price_history ( security_id NUMBER(10, 0), price NUMBER(10, 4), price_date DATE, rank NUMBER(2, 0) /* 存放股票历史价格 */ ); CREATE TABLE price ( security_id NUMBER(10, 0), price NUMBER(10, 4) /* 存放最新价格 */ ); insert all into price_history(security_id,price,price_date,rank) values(1,10,CURRENT_DATE,1) into price_history(security_id,price,price_date,rank) values(1,8,CURRENT_DATE-1,3) into price_history(security_id,price,price_date,rank) values(1,9,CURRENT_DATE+1,2) into price_history(security_id,price,price_date,rank) values(2,10,CURRENT_DATE,2) into price_history(security_id,price,price_date,rank) values(2,11,CURRENT_DATE-1,1) into price_history(security_id,price,price_date,rank) values(3,13,CURRENT_DATE+2,3) into price_history(security_id,price,price_date,rank) values(3,10,CURRENT_DATE-1,1) into price_history(security_id,price,price_date,rank) values(3,10,CURRENT_DATE+2,2) into price_history(security_id,price,price_date,rank) values(2,10,CURRENT_DATE-2,3) into price_history(security_id,price,price_date,rank) values(4,10,CURRENT_DATE,2) into price_history(security_id,price,price_date,rank) values(4,10,CURRENT_DATE+3,3) into price_history(security_id,price,price_date,rank) values(4,10,CURRENT_DATE-1,1) select CURRENT_DATE from dual; select * from price_history; MERGE INTO price p USING ( SELECT security_id, price FROM ( SELECT ROW_NUMBER() OVER(PARTITION BY security_id ORDER BY price_date DESC, rank DESC) AS ROW_ID, security_id, price FROM price_history ) WHERE ROW_ID = 1 ) ph ON (p.security_id = ph.security_id) WHEN MATCHED THEN UPDATE SET p.price = ph.price WHEN NOT MATCHED THEN INSERT VALUES (ph.security_id, ph.price); /* 以下是对上述语句的解释 */ /*下面的集合按照security_id编号,在相同的security_id里面会从1开始编码*/ SELECT ROW_NUMBER() OVER(PARTITION BY security_id ORDER BY price_date DESC, rank DESC) AS ROW_ID, security_id, price FROM price_history /*然后选出相同的security_id,也就是row_id为1*/ SELECT security_id, price FROM ( SELECT ROW_NUMBER() OVER(PARTITION BY security_id ORDER BY price_date DESC, rank DESC) AS ROW_ID, security_id, price FROM price_history ) WHERE ROW_ID = 1 /*最后使用merge语句,如果满足条件也就是在最新价格表中存在记录,我们就更新,没有就插入*/ MERGE INTO price p USING ( SELECT security_id, price FROM ( SELECT ROW_NUMBER() OVER(PARTITION BY security_id ORDER BY price_date DESC, rank DESC) AS ROW_ID, security_id, price FROM price_history ) WHERE ROW_ID = 1 ) ph ON (p.security_id = ph.security_id) WHEN MATCHED THEN UPDATE SET p.price = ph.price WHEN NOT MATCHED THEN INSERT VALUES (ph.security_id, ph.price); SELECT security_id, price_date, price, price - NVL(LAG(price) OVER(PARTITION BY security_id ORDER BY price_date), 0) FROM price_history; /* 假设现在我们有下面的表用来存放交易记录,现在我们要做一个报表,查询一下每相邻两次交易记录的 price 的差值,也就是第二条记录的 price 减去第一条的 price,第三条减去第二条 我们可以利用lag进行向下偏移,然后使用当前价格减去上一条记录的价格 1 2019-03-23 12:02:57 8 8 1 2019-03-24 12:02:57 10 2 1 2019-03-25 12:02:57 9 -1 2 2019-03-22 12:02:57 10 10 2 2019-03-23 12:02:57 11 1 2 2019-03-24 12:02:57 10 -1 3 2019-03-23 12:02:57 10 10 3 2019-03-26 12:02:57 10 0 3 2019-03-26 12:02:57 13 3 4 2019-03-23 12:02:57 10 10 4 2019-03-24 12:02:57 10 0 4 2019-03-27 12:02:57 10 0 */
还没有评论,来说两句吧...