IT 개발/데이터 엔지니어(Data Engineer)
BigQuery
OKKY
2021. 7. 2. 10:37
출력
SELECT
CAST(42 AS STRING) => '42'
, CAST('42' AS INT64) => 42
, FORMAT('%03d', 42) => 042
, FORMAT('%5.3f', 32.457842) => 32.458
, FORMAT('%5.3f', 32.4) => 32.400
, FORMAT('**%s**', 'H') => **H**
, FORMAT('%s-%03d', 'Agent', 7) => Agent-007
문자열 조작 함수
SELECT
ENDS_WITH('Hello', 'o') -- true
, ENDS_WITH('Hello', 'h') -- false
, STARTS_WITH('Hello', 'h') -- false
, STARTS_WITH('Hello', 'H') -- true
, STARTS_WITH('Hello', 'Hel') -- true
, STRPOS('Hello', 'e') -- 2
, STRPOS('Hello', 'l') -- 3
, STRPOS('Hello', 'll') -- 3
, STRPOS('Hello', 'f') -- 0 (for not-found)
, SUBSTR('Hello', 2, 4) -- ello
, CONCAT('Hello', ' World') -- Hello World
변환 함수
SELECT
LPAD('Hello', 10, '*') -- 왼쪽에 *가 추가된다
, RPAD('Hello', 10, '*') -- 오른쪽에 *가 추가된다
, LPAD('Hello', 10) -- 왼쪽에 공백이 추가된다
, LTRIM(' Hello ') -- 왼쪽의 공백이 제거된다
, RTRIM(' Hello ') -- 오른쪽의 공백이 제거된다
, TRIM (' Hello ') -- 양쪽의 공백이 제거된다
, TRIM ('***Hello***', '*') -- 양쪽의 *이 제거된다
, REVERSE('Hello') -- olleH
정규 표현식
SELECT
column
, REGEXP_CONTAINS(column, r'\d{5}(?:[-\s]\d{4})?') has_zipcode
, REGEXP_CONTAINS(column, r'^\d{5}(?:[-\s]\d{4})?$') is_zipcode
, REGEXP_EXTRACT(column, r'\d{5}(?:[-\s]\d{4})?') the_zipcode
, REGEXP_EXTRACT_ALL(column, r'\d{5}(?:[-\s]\d{4})?') all_zipcodes
, REGEXP_REPLACE(column, r'\d{5}(?:[-\s]\d{4})?', '*****') masked
FROM (
SELECT * FROM UNNEST([
'12345', '1234', '12345-9876',
'abc 12345 def', 'abcde-fghi',
'12345 ab 34567', '12345 9876'
]) AS column
)
행 column has_zipcode is_zipcode the_zipcode all_zipcodes masked
1 | 12345 | true | true | 12345 | 12345 | ***** | |
2 | 1234 | false | false | null | 1234 | ||
3 | 12345-9876 | true | true | 12345-9876 | 12345-9876 | ***** | |
4 | abc 12345 def | true | false | 12345 | 12345 | abc ***** def | |
5 | abcde-fghi | false | false | null | abcde-fghi | ||
6 | 12345 ab 34567 | true | false | 12345 | 12345 | ***** ab ***** | |
34567 | |||||||
7 | 12345 9876 | true | true | 12345 9876 | 12345 9876 | ***** |
정규식 관련 문서 : https://github.com/google/re2/wiki/Syntax
google/re2
RE2 is a fast, safe, thread-friendly alternative to backtracking regular expression engines like those used in PCRE, Perl, and Python. It is a C++ library. - google/re2
github.com
타임 스탬프 처리
SELECT t1, t2, TIMESTAMP_DIFF(t1, t2, MICROSECOND)
FROM (SELECT
TIMESTAMP "2017-09-27 12:30:00.45" AS t1,
TIMESTAMP "2017-09-27 13:30:00.45+1" AS t2
)
결과 동일
SELECT
fmt, input, zone
, PARSE_TIMESTAMP(fmt, input, zone) AS ts
FROM (
SELECT '%Y%m%d-%H%M%S' AS fmt, '20181118-220800' AS input, '+0' AS zone
UNION ALL SELECT '%c', 'Sat Nov 24 21:26:00 2018', 'America/Los_Angeles'
UNION ALL SELECT '%x %X', '11/18/18 22:08:00', 'UTC'
)
1 | %Y%m%d-%H%M%S | 20181118-220800 | +0 | 2018-11-18 22:08:00 UTC | |
2 | %c | Sat Nov 24 21:26:00 2018 | America/Los_Angeles | 2018-11-25 05:26:00 UTC | |
3 | %x %X | 11/18/18 22:08:00 | UTC | 2018-11-18 22:08:00 UTC |
SELECT
ts, fmt
, FORMAT_TIMESTAMP(fmt, ts, '+6') AS ts_output
FROM (
SELECT CURRENT_TIMESTAMP() AS ts, '%Y%m%d-%H%M%S' AS fmt
UNION ALL SELECT CURRENT_TIMESTAMP() AS ts, '%c' AS fmt
UNION ALL SELECT CURRENT_TIMESTAMP() AS ts, '%x %X' AS fmt
)
1 | 2021-07-02 02:18:59.790695 UTC | %Y%m%d-%H%M%S | 20210702-081859 | |
2 | 2021-07-02 02:18:59.790695 UTC | %c | Fri Jul 2 08:18:59 2021 | |
3 | 2021-07-02 02:18:59.790695 UTC | %x %X | 07/02/21 08:18:59 |