Import dữ liệu RDB vào HDFS qua Sqoop


Cài đặt Postgres
sudo yum -y install https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm
sudo yum search postgresql13
sudo yum -y install postgresql13 postgresql13-server
sudo /usr/pgsql-13/bin/postgresql-13-setup initdb
sudo systemctl start postgresql-13
systemctl status postgresql-13
sudo systemctl enable postgresql-13

Install pgAdmin 4
sudo yum install https://ftp.postgresql.org/pub/pgadmin/pgadmin4/yum/pgadmin4-redhat-repo-2-1.noarch.rpm
sudo yum install pgadmin4-web
sudo systemctl start httpd && sudo systemctl enable httpd
sudo /usr/pgadmin4/bin/setup-web.sh
sudo firewall-cmd --permanent --add-service=http
sudo firewall-cmd --reload

Cấu hình Postgres
sudo su - postgres
psql
alter user postgres with password 'Nuce@1234';
sudo vi /var/lib/pgsql/13/data/postgresql.conf
  # line 59
  listen_addresses = '172.20.11.11'
sudo vi /var/lib/pgsql/13/data/pg_hba.conf
  # Accept from anywhere (not recommended)
  host all all 0.0.0.0/0 md5
sudo systemctl restart postgresql-13

psql -U postgres -h localhost -p 5432 dataas
psql -U postgres -h 10.20.0.27 -p 5432 dataas

Tao Database
create database dataas;

Tạo User
create user quangtv with encrypted password 'Nuce@1234';

Phân quyền cho user có Full quyền của 1 Database
grant ALL on DATABASE dataas to quangtv;

Kiểm tra lại phân quyền
\l

Tham khảo: https://gist.github.com/oNguyenNgocTrung/aafb157fde607476987463c0e2f9d148



sqoop  list-databases \
  --connect jdbc:mysql://10.20.0.49:3306 \
  --username root \
  --password Huce@4321Huce@4321

sqoop import-all-tables \
    -m 1 \
    --connect jdbc:mysql://quickstart:3306/retail_db \
    --username=retail_dba \
    --password=cloudera \
    --compression-codec=snappy \
    --as-parquetfile \
    --warehouse-dir=/user/hive/warehouse \
    --hive-import

sqoop import-all-tables \
  --m 1 \
  --connect jdbc:mysql://10.20.0.49:3306/stce \
  --username root \
  --password Huce@4321Huce@4321 \
  --compression-codec=snappy \
  --as-parquetfile \
  --warehouse-dir=/user/hive/warehouse/sendy \
  --hive-import

# hive
beeline> !connect jdbc:hive2://10.20.0.27:10000/stce

# impala-shell
> connect ip-ser-impalad

[Not connected] > connect 10.20.0.28;
[10.20.0.28:21000] default>