1. Jindo 直接加速 OSS
1
2
3
4
5
6
7
| export ENDPOINT=oss-cn-beijing-internal.aliyuncs.com
export BUCKET=
export AK=
export SK=
export NAMESPACE=default
export PVC=myoss-jindo
export URI=datacenter
|
1
2
3
4
5
6
7
8
9
10
11
| kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
metadata:
name: ${PVC}
namespace: ${NAMESPACE}
type: Opaque
stringData:
fs.oss.accessKeyId: ${AK}
fs.oss.accessKeySecret: ${SK}
EOF
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
| kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: Dataset
metadata:
name: ${PVC}
namespace: ${NAMESPACE}
spec:
mounts:
- mountPoint: oss://${BUCKET}/${URI}/
options:
fs.oss.endpoint: ${ENDPOINT}
name: default
path: "/"
encryptOptions:
- name: fs.oss.accessKeyId
valueFrom:
secretKeyRef:
name: ${PVC}
key: fs.oss.accessKeyId
- name: fs.oss.accessKeySecret
valueFrom:
secretKeyRef:
name: ${PVC}
key: fs.oss.accessKeySecret
accessModes:
- ReadWriteMany
EOF
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
| kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: JindoRuntime
metadata:
name: ${PVC}
namespace: ${NAMESPACE}
spec:
replicas: 2
fuse:
image: registry.cn-shanghai.aliyuncs.com/jindofs/jindo-fuse
imageTag: 6.2.0
args:
- -oro
- -ometrics_port=0
- -okernel_cache
- -oattr_timeout=7200
- -oentry_timeout=7200
- -onegative_timeout=7200
- -opread
tieredstore:
levels:
- mediumtype: SSD
path: /cache
quota: 40960
EOF
|
1
| export IMAGE=shaowenchen/demo:ubuntu
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
| kubectl apply -f - <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${PVC}-deploy
namespace: ${NAMESPACE}
spec:
replicas: 1
selector:
matchLabels:
app: ${PVC}-demo
template:
metadata:
labels:
app: ${PVC}-demo
spec:
containers:
- name: demo
image: ${IMAGE}
volumeMounts:
- mountPath: /data
name: data
volumes:
- name: data
persistentVolumeClaim:
claimName: ${PVC}
EOF
|
1.1 juicefs 性能测试
1
2
3
4
5
6
7
8
9
10
11
12
| juicefs bench --block-size 4096 --big-file-size 1024 --threads 30 ./
BlockSize: 4096 MiB, BigFileSize: 1024 MiB, SmallFileSize: 128 KiB, SmallFileCount: 100, NumThreads: 30
+------------------+---------------+-----------------+
| ITEM | VALUE | COST |
+------------------+---------------+-----------------+
| Write big file | 1520.22 MiB/s | 20.21 s/file |
| Read big file | 1595.94 MiB/s | 19.25 s/file |
| Write small file | 8.9 files/s | 3373.29 ms/file |
| Read small file | 289.1 files/s | 103.79 ms/file |
| Stat file | 496.8 files/s | 60.39 ms/file |
+------------------+---------------+-----------------+
|
1.2 DD 性能测试
1
2
3
4
5
6
7
| time dd if=/dev/zero of=./dd.txt bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 24.8855 s, 421 MB/s
real 0m25.047s
user 0m0.004s
sys 0m2.857s
|
1
2
3
4
5
6
7
| time dd if=./dd.txt of=/dev/null bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 21.6259 s, 485 MB/s
real 0m21.683s
user 0m0.000s
sys 0m1.451s
|
1
2
3
4
5
6
7
| time dd if=./dd.txt of=/dev/null bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 19.6688 s, 533 MB/s
real 0m19.692s
user 0m0.004s
sys 0m1.284s
|
1.3 清理资源
1
2
3
4
| kubectl delete pod myoss-jindo
kubectl delete jindoruntimes myoss-jindo
kubectl delete dataset myoss-jindo
kubectl delete secret myosssecret
|
2. JuiceFS 社区版对接 OSS
2.1 配置环境变量
1
2
3
4
5
6
7
8
9
10
11
| export REDIS_IP=x.x.x.x
export REDIS_PORT=6379
export REDIS_USER=default
export REDIS_PASSWORD=mypassword
export REDIS_DIRECTSERVER=redis://${REDIS_USER}:${REDIS_PASSWORD}@${REDIS_IP}:${REDIS_PORT}/1
export ACCESS_KEY=xxx
export SECRET_KEY=xxx
export BUCKET=xxx
export ENDPOINT=oss-cn-beijing-internal.aliyuncs.com
export BUCKET_ENPOINT=$BUCKET.$ENDPOINT
|
2.2 初始化文件系统
1
| curl -sSL https://d.juicefs.com/install | sh -
|
1
2
3
4
5
| juicefs format \
--storage oss \
--bucket ${BUCKET_ENPOINT}\
${REDIS_DIRECTSERVER} \
oss-direct
|
3. 主机直接挂载 JuiceFS
3.1 juicefs 性能测试
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| juicefs mount -d --buffer-size 2000 --max-uploads 150 ${REDIS_DIRECTSERVER} ./oss-direct --cache-dir=/data/jfs-oss-direct
+------------------+-------------------+---------------+
| ITEM | VALUE | COST |
+------------------+-------------------+---------------+
| Write big file | 2348.54 MiB/s | 13.08 s/file |
| Read big file | 5988.49 MiB/s | 5.13 s/file |
| Write small file | 867.1 files/s | 34.60 ms/file |
| Read small file | 35705.2 files/s | 0.84 ms/file |
| Stat file | 103844.2 files/s | 0.29 ms/file |
| FUSE operation | 534217 operations | 0.91 ms/op |
| Update meta | 9543 operations | 0.10 ms/op |
| Put object | 10680 operations | 154.07 ms/op |
| Get object | 7680 operations | 71.21 ms/op |
| Delete object | 0 operations | 0.00 ms/op |
| Write into cache | 4314 operations | 1.10 ms/op |
| Read from cache | 3000 operations | 0.16 ms/op |
+------------------+-------------------+---------------+
|
3.2 DD 性能测试
1
2
3
4
5
6
7
| time dd if=/dev/zero of=./dd.txt bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 5.99897 s, 1.7 GB/s
real 0m6.001s
user 0m0.004s
sys 0m3.112s
|
1
2
3
4
5
6
7
| time dd if=./dd.txt of=/dev/null bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 28.4491 s, 369 MB/s
real 0m29.033s
user 0m0.000s
sys 0m3.808s
|
1
2
3
4
5
6
7
| time dd if=./dd.txt of=/dev/null bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 1.65887 s, 6.3 GB/s
real 0m1.660s
user 0m0.000s
sys 0m1.659s
|
4. Pod 挂载 JuiceFS
4.1 创建测试负载
1
2
3
4
5
6
7
8
9
10
11
| kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
metadata:
name: juicefs-direct-secret
type: Opaque
stringData:
metaurl: redis://${REDIS_USER}:${REDIS_PASSWORD}@${REDIS_IP}:6379/1
access-key: ${ACCESS_KEY}
secret-key: ${SECRET_KEY}
EOF
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: Dataset
metadata:
name: juicefs-direct-demo
spec:
accessModes:
- ReadWriteMany
mounts:
- name: oss-direct
mountPoint: "juicefs:///"
options:
bucket: ${BUCKET_ENPOINT}
storage: oss
encryptOptions:
- name: metaurl
valueFrom:
secretKeyRef:
name: juicefs-direct-secret
key: metaurl
- name: access-key
valueFrom:
secretKeyRef:
name: juicefs-direct-secret
key: access-key
- name: secret-key
valueFrom:
secretKeyRef:
name: juicefs-direct-secret
key: secret-key
EOF
|
需要注意这里的 name 需要和 format
中的 name 保持一致。
1
2
3
4
5
6
7
8
9
10
11
12
13
| kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: JuiceFSRuntime
metadata:
name: juicefs-direct-demo
spec:
replicas: 1
tieredstore:
levels:
- mediumtype: SSD
path: /cache
quota: 40960
EOF
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| kubectl apply -f - <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
name: juicefs-direct-demo
spec:
replicas: 1
selector:
matchLabels:
app: juicefs-direct-demo
template:
metadata:
labels:
app: juicefs-direct-demo
spec:
containers:
- name: demo
image: shaowenchen/demo:ubuntu
volumeMounts:
- mountPath: /data/jfs
name: data
volumes:
- name: data
persistentVolumeClaim:
claimName: juicefs-direct-demo
EOF
|
4.2 juicefs 性能测试
进入 Pod 并执行 curl -sSL https://d.juicefs.com/install | sh -
安装 JuiceFS 客户端。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| juicefs bench --block-size 4096 --big-file-size 1024 --threads 30 ./
+------------------+-------------------+---------------+
| ITEM | VALUE | COST |
+------------------+-------------------+---------------+
| Write big file | 754.37 MiB/s | 40.72 s/file |
| Read big file | 1808.45 MiB/s | 16.99 s/file |
| Write small file | 628.6 files/s | 47.72 ms/file |
| Read small file | 1129.1 files/s | 26.57 ms/file |
| Stat file | 120037.9 files/s | 0.25 ms/file |
| FUSE operation | 536005 operations | 3.39 ms/op |
| Update meta | 9547 operations | 0.32 ms/op |
| Put object | 10680 operations | 80.47 ms/op |
| Get object | 15152 operations | 50.53 ms/op |
| Delete object | 0 operations | 0.00 ms/op |
| Write into cache | 0 operations | 0.00 ms/op |
| Read from cache | 0 operations | 0.00 ms/op |
+------------------+-------------------+---------------+
|
4.3 DD 性能测试
1
2
3
4
5
6
7
| time dd if=/dev/zero of=./dd.txt bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 13.198 s, 794 MB/s
real 0m13.199s
user 0m0.004s
sys 0m2.860s
|
1
2
3
4
5
6
7
| time dd if=./dd.txt of=/dev/null bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 34.8118 s, 301 MB/s
real 0m35.162s
user 0m0.004s
sys 0m3.222s
|
1
2
3
4
5
6
7
| time dd if=./dd.txt of=/dev/null bs=4M count=2500
10485760000 bytes (10 GB, 9.8 GiB) copied, 1.48848 s, 7.0 GB/s
real 0m1.490s
user 0m0.000s
sys 0m1.489s
|
5. 使用过程遇到的问题
5.1 JindoRuntime 挂载 OSS 目录没有数据
现象: 运行一段时间之后,发现之前挂载的 OSS 目录查看不到数据。
解决: 多删除几次实例的 jindofs-master-0,自动重建之后,能热恢复。
原因:jindofs-master-0 日志无异常,原因待查。
- 删除某个命名空间下的 jindofs-master-0 实例
1
2
| NS="atms-model-cache"
kubectl -n "$NS" get pod | grep jindofs-master | awk '{print $1}' | xargs kubectl -n "$NS" delete pod
|
- 删除所有命名空间下的 jindofs-master-0 实例
1
| kubectl get pod --all-namespaces | grep jindofs-master | awk '{print $1, $2}' | xargs -n2 sh -c 'kubectl -n $0 delete pod $1'
|
5.2 jindofs-master 一直 Crash
现象: jindofs-master-0 报错
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
| 4# JfsxMainBase::checkAndSetExitCode(int, bool) at /root/workspace/code/jindocache/jfsx-common/include/JfsxMainBase.hpp:97
5# JfsxMainBase::runImpl() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:57
6# JfsxMainBase::run() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:22
7# main at /root/workspace/code/jindocache/jfsx-nsmain/main_entry.cpp:37
8# __libc_start_main at ../csu/libc-start.c:342
9# 0x00000000005D75BE in /smartdata/sbin/jindocache-server
Error: signal 11 Segmentation fault, PID 1, TID 7f10f9f9f3c0
Stacktrace with Glog:
@ 0x7f10f9fc5611 abort
@ 0x8b6b92 JfsxMainBase::checkAndSetExitCode()
@ 0x429831 _ZN12JfsxMainBase7runImplEv.cold
@ 0x8b9710 JfsxMainBase::run()
@ 0x4f572e main
@ 0x7f10f9fc709b __libc_start_main
@ 0x5d75be (unknown)
@ (nil) (unknown)
Stacktrace with Boost:
0# 0x0000000000FD2B83 in /smartdata/sbin/jindocache-server
1# 0x00007F10F9FDA970 in /lib/x86_64-linux-gnu/libc.so.6
2# __GI_abort at /build/glibc-6iIyft/glibc-2.28/stdlib/abort.c:107
3# JfsxMainBase::checkAndSetExitCode(int, bool) at /root/workspace/code/jindocache/jfsx-common/include/JfsxMainBase.hpp:97
4# JfsxMainBase::runImpl() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:57
5# JfsxMainBase::run() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:22
6# main at /root/workspace/code/jindocache/jfsx-nsmain/main_entry.cpp:37
7# __libc_start_main at ../csu/libc-start.c:342
8# 0x00000000005D75BE in /smartdata/sbin/jindocache-server
|
解决: 检查 jindoruntime-controller 的日志,发现是有些 Jindo 实例数据异常,导致 Controller 无法正常工作,需要修复异常数据。
原因:jindoruntime-controller 是串行处理 Jindo 实例,某个 Jindo 实例异常会导致后续的实例配置没有得到更新,此时 smartdata 会报异常。
5.3 存储就绪慢
现象: 创建 Dataset 之后就绪慢、更新配置慢等
解决: 增加 jindoruntime-controller 的资源限制。默认 CPU limits 为 100m,可以增加到 2000m 。
原因:jindoruntime-controller 使用 CPU 被限流。
5.4 热修挂载点
进入 jindo-master 查看,查看是否有挂载
如果没有挂载,可以尝试挂载
1
| jindocache -mount / oss://my-bucket/path-dir
|
退出 jindo-master 节点之后,需要重启一下业务应用使用的 Fuse Pod 才能生效。
5.5 修复挂载的脚本
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
| NAMESPACE="atms-model-cache"
datasets=$(kubectl -n "$NAMESPACE" get dataset -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^jindo')
for dataset in $datasets; do
echo "🔍 Dataset: $dataset"
master_pods=$(kubectl -n "$NAMESPACE" get pod -l release="$dataset",role=jindofs-master -o name)
if [[ -z "$master_pods" ]]; then
echo "❌ 未找到 master pod,跳过 $dataset"
echo "---------------------------------------"
continue
fi
for pod in $master_pods; do
pod_name=${pod##*/}
echo "📦 Master Pod: $pod_name"
echo " 正在列出挂载路径..."
output=$(timeout 3s kubectl -n "$NAMESPACE" exec "$pod_name" -- jindo fs -ls jindo:/// 2>/dev/null)
if [[ -n "$output" ]]; then
echo "$output" | sed 's/^/ /'
else
echo "⚠️ 未挂载或无返回"
fi
echo "---------------------------------------"
done
done
|
- 修复命名空间下异常的全部 jindo 实例的挂载点,需要重启业务应用的 Pod 才能生效。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
| NAMESPACE="atms-model-cache"
datasets=$(kubectl -n "$NAMESPACE" get dataset -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^jindo')
for dataset in $datasets; do
echo "处理 Dataset: $dataset"
oss_path=$(kubectl -n "$NAMESPACE" get dataset "$dataset" -o jsonpath='{.spec.mounts[0].mountPoint}')
if [[ -z "$oss_path" ]]; then
echo "未找到 mountPoint,跳过 $dataset"
continue
fi
echo "OSS 路径: $oss_path"
master_pods=$(kubectl -n "$NAMESPACE" get pod -l release="$dataset",role=jindofs-master -o name)
if [[ -z "$master_pods" ]]; then
echo "未找到 master pod,跳过 $dataset"
continue
fi
restart_needed=false
for pod in $master_pods; do
pod_name=${pod##*/}
echo "检查 master pod $pod_name 是否已挂载..."
ls_output=$(timeout 2s kubectl -n "$NAMESPACE" exec "$pod_name" -- jindo fs -ls jindo:/// 2>/dev/null)
if [[ -n "$ls_output" ]]; then
echo "已挂载,跳过"
else
echo "执行挂载: jindocache -mount / $oss_path"
kubectl -n "$NAMESPACE" exec "$pod_name" -- jindocache -umount /
kubectl -n "$NAMESPACE" exec "$pod_name" -- jindocache -mount / "$oss_path"
restart_needed=true
fi
done
if [ "$restart_needed" = true ]; then
echo "挂载完成,重启 fuse pod(release=$dataset, role=jindofs-fuse)"
fuse_pods=$(kubectl -n "$NAMESPACE" get pod -l release="$dataset",role=jindofs-fuse -o name)
for fpod in $fuse_pods; do
fpod_name=${fpod##*/}
echo "删除 pod: $fpod_name"
kubectl -n "$NAMESPACE" delete pod "$fpod_name"
done
else
echo "未执行挂载操作,跳过 fuse pod 重启"
fi
echo "Dataset $dataset 处理完成"
echo "---------------------------------------"
done
|
6. 总结
测试场景 | 写入大文件速度 | 读取大文件速度 | 写入小文件速度 | 读取小文件速度 |
---|
使用 Jindo 加速 OSS | 1520.22 MiB/s | 1595.94 MiB/s | 8.9 files/s | 289.1 files/s |
主机上 JuiceFS + OSS | 2348.54 MiB/s | 5988.49 MiB/s | 867.1 files/s | 35705.2 files/s |
Pod 上 JuiceFS + OSS | 754.37 MiB/s | 1808.45 MiB/s | 628.6 files/s | 1129.1 files/s |
测试场景 | 写入速度 | 首次读取速度 | 第二次读取速度 |
---|
使用 Jindo 加速 OSS | 421 MB/s | 485 MB/s | 533 MB/s |
主机上 JuiceFS + OSS | 1.7 GB/s | 369 MB/s | 6.3 GB/s |
Pod 上 JuiceFS + OSS | 794 MB/s | 301 MB/s | 7.0 GB/s |
基于以上的测试结果,在阿里云上直接使用 JindoRuntime 将 OSS 以 PVC 挂载到 Pod 中使用即可满足模型推理需求。
使用 Fluid 直接加速对象存储的方式,用于推理时模型的加载,是非常推荐的一种方式。不仅免去部署 JuiceFS 的元数据存储服务,还能够实现 PVC 与 OSS 之间的双向同步,在运维上提供了极大便利。
7. 参考