Please enable Javascript to view the contents

使用 Fluid 对接 OSS 存储及性能测试

 ·  ☕ 7 分钟

1. Jindo 直接加速 OSS

  • 配置环境变量
1
2
3
4
5
6
7
export ENDPOINT=oss-cn-beijing-internal.aliyuncs.com
export BUCKET=
export AK=
export SK=
export NAMESPACE=default
export PVC=myoss-jindo
export URI=datacenter
  • 创建凭证
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: ${PVC}
  namespace: ${NAMESPACE}
type: Opaque
stringData:
  fs.oss.accessKeyId: ${AK}
  fs.oss.accessKeySecret: ${SK}
EOF
  • 创建 Dataset
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: Dataset
metadata:
  name: ${PVC}
  namespace: ${NAMESPACE}
spec:
  mounts:
    - mountPoint: oss://${BUCKET}/${URI}/
      options:
        fs.oss.endpoint: ${ENDPOINT}
      name: default
      path: "/"
      encryptOptions:
        - name: fs.oss.accessKeyId
          valueFrom:
            secretKeyRef:
              name: ${PVC}
              key: fs.oss.accessKeyId
        - name: fs.oss.accessKeySecret
          valueFrom:
            secretKeyRef:
              name: ${PVC}
              key: fs.oss.accessKeySecret
  accessModes:
    - ReadWriteMany
EOF
  • 创建 JindoRuntime
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: JindoRuntime
metadata:
  name: ${PVC}
  namespace: ${NAMESPACE}
spec:
  replicas: 2
  fuse:
    image: registry.cn-shanghai.aliyuncs.com/jindofs/jindo-fuse
    imageTag: 6.2.0
    args:
      - -oro
      - -ometrics_port=0
      - -okernel_cache
      - -oattr_timeout=7200
      - -oentry_timeout=7200
      - -onegative_timeout=7200
      - -opread
  tieredstore:
    levels:
      - mediumtype: SSD
        path: /cache
        quota: 40960
EOF
  • 创建 Pod 负载
1
export IMAGE=shaowenchen/demo:ubuntu
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
kubectl apply -f - <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
  name: ${PVC}-deploy
  namespace: ${NAMESPACE}
spec:
  replicas: 1
  selector:
    matchLabels:
      app: ${PVC}-demo
  template:
    metadata:
      labels:
        app: ${PVC}-demo
    spec:
      containers:
        - name: demo
          image: ${IMAGE}
          volumeMounts:
            - mountPath: /data
              name: data
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: ${PVC}
EOF

1.1 juicefs 性能测试

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
juicefs bench --block-size 4096 --big-file-size 1024 --threads 30 ./

BlockSize: 4096 MiB, BigFileSize: 1024 MiB, SmallFileSize: 128 KiB, SmallFileCount: 100, NumThreads: 30
+------------------+---------------+-----------------+
|       ITEM       |     VALUE     |       COST      |
+------------------+---------------+-----------------+
|   Write big file | 1520.22 MiB/s |    20.21 s/file |
|    Read big file | 1595.94 MiB/s |    19.25 s/file |
| Write small file |   8.9 files/s | 3373.29 ms/file |
|  Read small file | 289.1 files/s |  103.79 ms/file |
|        Stat file | 496.8 files/s |   60.39 ms/file |
+------------------+---------------+-----------------+

1.2 DD 性能测试

  • dd 写入,421 MB/s
1
2
3
4
5
6
7
time dd if=/dev/zero of=./dd.txt bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 24.8855 s, 421 MB/s

real	0m25.047s
user	0m0.004s
sys	0m2.857s
  • dd 首次读,485 MB/s
1
2
3
4
5
6
7
time dd if=./dd.txt of=/dev/null bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 21.6259 s, 485 MB/s

real	0m21.683s
user	0m0.000s
sys	0m1.451s
  • dd 第二次读,533 MB/s
1
2
3
4
5
6
7
time dd if=./dd.txt of=/dev/null bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 19.6688 s, 533 MB/s

real	0m19.692s
user	0m0.004s
sys	0m1.284s

1.3 清理资源

1
2
3
4
kubectl delete pod myoss-jindo
kubectl delete jindoruntimes myoss-jindo
kubectl delete dataset myoss-jindo
kubectl delete secret myosssecret

2. JuiceFS 社区版对接 OSS

2.1 配置环境变量

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
export REDIS_IP=x.x.x.x
export REDIS_PORT=6379
export REDIS_USER=default
export REDIS_PASSWORD=mypassword
export REDIS_DIRECTSERVER=redis://${REDIS_USER}:${REDIS_PASSWORD}@${REDIS_IP}:${REDIS_PORT}/1

export ACCESS_KEY=xxx
export SECRET_KEY=xxx
export BUCKET=xxx
export ENDPOINT=oss-cn-beijing-internal.aliyuncs.com
export BUCKET_ENPOINT=$BUCKET.$ENDPOINT

2.2 初始化文件系统

  • 安装 JuiceFS
1
curl -sSL https://d.juicefs.com/install | sh -
  • 初始化文件系统
1
2
3
4
5
juicefs format \
        --storage oss \
        --bucket ${BUCKET_ENPOINT}\
        ${REDIS_DIRECTSERVER} \
        oss-direct

3. 主机直接挂载 JuiceFS

3.1 juicefs 性能测试

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
juicefs mount -d --buffer-size 2000 --max-uploads 150 ${REDIS_DIRECTSERVER} ./oss-direct --cache-dir=/data/jfs-oss-direct

+------------------+-------------------+---------------+
|       ITEM       |       VALUE       |      COST     |
+------------------+-------------------+---------------+
|   Write big file |     2348.54 MiB/s |  13.08 s/file |
|    Read big file |     5988.49 MiB/s |   5.13 s/file |
| Write small file |     867.1 files/s | 34.60 ms/file |
|  Read small file |   35705.2 files/s |  0.84 ms/file |
|        Stat file |  103844.2 files/s |  0.29 ms/file |
|   FUSE operation | 534217 operations |    0.91 ms/op |
|      Update meta |   9543 operations |    0.10 ms/op |
|       Put object |  10680 operations |  154.07 ms/op |
|       Get object |   7680 operations |   71.21 ms/op |
|    Delete object |      0 operations |    0.00 ms/op |
| Write into cache |   4314 operations |    1.10 ms/op |
|  Read from cache |   3000 operations |    0.16 ms/op |
+------------------+-------------------+---------------+

3.2 DD 性能测试

  • dd 写入,1.7 GB/s
1
2
3
4
5
6
7
time dd if=/dev/zero of=./dd.txt bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 5.99897 s, 1.7 GB/s

real	0m6.001s
user	0m0.004s
sys	0m3.112s
  • dd 首次读,369 MB/s
1
2
3
4
5
6
7
time dd if=./dd.txt of=/dev/null bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 28.4491 s, 369 MB/s

real	0m29.033s
user	0m0.000s
sys	0m3.808s
  • dd 第二次读,6.3 GB/s
1
2
3
4
5
6
7
time dd if=./dd.txt of=/dev/null bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 1.65887 s, 6.3 GB/s

real	0m1.660s
user	0m0.000s
sys	0m1.659s

4. Pod 挂载 JuiceFS

4.1 创建测试负载

  • 创建秘钥
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: juicefs-direct-secret
type: Opaque
stringData:
  metaurl: redis://${REDIS_USER}:${REDIS_PASSWORD}@${REDIS_IP}:6379/1
  access-key: ${ACCESS_KEY}
  secret-key: ${SECRET_KEY}
EOF
  • 创建 Dataset
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: Dataset
metadata:
  name: juicefs-direct-demo
spec:
  accessModes:
    - ReadWriteMany
  mounts:
    - name: oss-direct
      mountPoint: "juicefs:///"
      options:
        bucket: ${BUCKET_ENPOINT}
        storage: oss
      encryptOptions:
        - name: metaurl
          valueFrom:
            secretKeyRef:
              name: juicefs-direct-secret
              key: metaurl
        - name: access-key
          valueFrom:
            secretKeyRef:
              name: juicefs-direct-secret
              key: access-key
        - name: secret-key
          valueFrom:
            secretKeyRef:
              name: juicefs-direct-secret
              key: secret-key
EOF

需要注意这里的 name 需要和 format 中的 name 保持一致。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
kubectl apply -f - <<EOF
apiVersion: data.fluid.io/v1alpha1
kind: JuiceFSRuntime
metadata:
  name: juicefs-direct-demo
spec:
  replicas: 1
  tieredstore:
    levels:
      - mediumtype: SSD
        path: /cache
        quota: 40960
EOF
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
kubectl apply -f - <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
  name: juicefs-direct-demo
spec:
  replicas: 1
  selector:
    matchLabels:
      app: juicefs-direct-demo
  template:
    metadata:
      labels:
        app: juicefs-direct-demo
    spec:
      containers:
        - name: demo
          image: shaowenchen/demo:ubuntu
          volumeMounts:
            - mountPath: /data/jfs
              name: data
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: juicefs-direct-demo
EOF

4.2 juicefs 性能测试

进入 Pod 并执行 curl -sSL https://d.juicefs.com/install | sh - 安装 JuiceFS 客户端。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
juicefs bench --block-size 4096 --big-file-size 1024 --threads 30 ./

+------------------+-------------------+---------------+
|       ITEM       |       VALUE       |      COST     |
+------------------+-------------------+---------------+
|   Write big file |      754.37 MiB/s |  40.72 s/file |
|    Read big file |     1808.45 MiB/s |  16.99 s/file |
| Write small file |     628.6 files/s | 47.72 ms/file |
|  Read small file |    1129.1 files/s | 26.57 ms/file |
|        Stat file |  120037.9 files/s |  0.25 ms/file |
|   FUSE operation | 536005 operations |    3.39 ms/op |
|      Update meta |   9547 operations |    0.32 ms/op |
|       Put object |  10680 operations |   80.47 ms/op |
|       Get object |  15152 operations |   50.53 ms/op |
|    Delete object |      0 operations |    0.00 ms/op |
| Write into cache |      0 operations |    0.00 ms/op |
|  Read from cache |      0 operations |    0.00 ms/op |
+------------------+-------------------+---------------+

4.3 DD 性能测试

  • dd 写入,794 MB/s
1
2
3
4
5
6
7
time dd if=/dev/zero of=./dd.txt bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 13.198 s, 794 MB/s

real	0m13.199s
user	0m0.004s
sys	0m2.860s
  • dd 首次读,301 MB/s
1
2
3
4
5
6
7
time dd if=./dd.txt of=/dev/null bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 34.8118 s, 301 MB/s

real	0m35.162s
user	0m0.004s
sys	0m3.222s
  • dd 第二次读,7.0 GB/s
1
2
3
4
5
6
7
time dd if=./dd.txt of=/dev/null bs=4M count=2500

10485760000 bytes (10 GB, 9.8 GiB) copied, 1.48848 s, 7.0 GB/s

real	0m1.490s
user	0m0.000s
sys	0m1.489s

5. 使用过程遇到的问题

5.1 JindoRuntime 挂载 OSS 目录没有数据

现象: 运行一段时间之后,发现之前挂载的 OSS 目录查看不到数据。

解决: 多删除几次实例的 jindofs-master-0,自动重建之后,能热恢复。

原因:jindofs-master-0 日志无异常,原因待查。

  • 删除某个命名空间下的 jindofs-master-0 实例
1
2
NS="atms-model-cache"
kubectl -n "$NS" get pod | grep jindofs-master | awk '{print $1}' | xargs kubectl -n "$NS" delete pod
  • 删除所有命名空间下的 jindofs-master-0 实例
1
kubectl get pod --all-namespaces | grep jindofs-master | awk '{print $1, $2}' | xargs -n2 sh -c 'kubectl -n $0 delete pod $1'

5.2 jindofs-master 一直 Crash

现象: jindofs-master-0 报错

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
4# JfsxMainBase::checkAndSetExitCode(int, bool) at /root/workspace/code/jindocache/jfsx-common/include/JfsxMainBase.hpp:97
 5# JfsxMainBase::runImpl() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:57
 6# JfsxMainBase::run() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:22
 7# main at /root/workspace/code/jindocache/jfsx-nsmain/main_entry.cpp:37
 8# __libc_start_main at ../csu/libc-start.c:342
 9# 0x00000000005D75BE in /smartdata/sbin/jindocache-server


Error: signal 11 Segmentation fault, PID 1, TID 7f10f9f9f3c0
Stacktrace with Glog:
    @     0x7f10f9fc5611  abort
    @           0x8b6b92  JfsxMainBase::checkAndSetExitCode()
    @           0x429831  _ZN12JfsxMainBase7runImplEv.cold
    @           0x8b9710  JfsxMainBase::run()
    @           0x4f572e  main
    @     0x7f10f9fc709b  __libc_start_main
    @           0x5d75be  (unknown)
    @              (nil)  (unknown)


Stacktrace with Boost:
 0# 0x0000000000FD2B83 in /smartdata/sbin/jindocache-server
 1# 0x00007F10F9FDA970 in /lib/x86_64-linux-gnu/libc.so.6
 2# __GI_abort at /build/glibc-6iIyft/glibc-2.28/stdlib/abort.c:107
 3# JfsxMainBase::checkAndSetExitCode(int, bool) at /root/workspace/code/jindocache/jfsx-common/include/JfsxMainBase.hpp:97
 4# JfsxMainBase::runImpl() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:57
 5# JfsxMainBase::run() at /root/workspace/code/jindocache/jfsx-common/src/JfsxMainBase.cpp:22
 6# main at /root/workspace/code/jindocache/jfsx-nsmain/main_entry.cpp:37
 7# __libc_start_main at ../csu/libc-start.c:342
 8# 0x00000000005D75BE in /smartdata/sbin/jindocache-server

解决: 检查 jindoruntime-controller 的日志,发现是有些 Jindo 实例数据异常,导致 Controller 无法正常工作,需要修复异常数据。

原因:jindoruntime-controller 是串行处理 Jindo 实例,某个 Jindo 实例异常会导致后续的实例配置没有得到更新,此时 smartdata 会报异常。

5.3 存储就绪慢

现象: 创建 Dataset 之后就绪慢、更新配置慢等

解决: 增加 jindoruntime-controller 的资源限制。默认 CPU limits 为 100m,可以增加到 2000m 。

原因:jindoruntime-controller 使用 CPU 被限流。

5.4 热修挂载点

进入 jindo-master 查看,查看是否有挂载

1
jindo fs -ls jindo:///

如果没有挂载,可以尝试挂载

1
jindocache -mount / oss://my-bucket/path-dir

退出 jindo-master 节点之后,需要重启一下业务应用使用的 Fuse Pod 才能生效。

5.5 修复挂载的脚本

  • 查看命名空间下的 jindo 挂载点
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
NAMESPACE="atms-model-cache"

datasets=$(kubectl -n "$NAMESPACE" get dataset -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^jindo')

for dataset in $datasets; do
  echo "🔍 Dataset: $dataset"

  master_pods=$(kubectl -n "$NAMESPACE" get pod -l release="$dataset",role=jindofs-master -o name)
  if [[ -z "$master_pods" ]]; then
    echo "❌ 未找到 master pod,跳过 $dataset"
    echo "---------------------------------------"
    continue
  fi

  for pod in $master_pods; do
    pod_name=${pod##*/}
    echo "📦 Master Pod: $pod_name"
    echo "   正在列出挂载路径..."

    output=$(timeout 3s kubectl -n "$NAMESPACE" exec "$pod_name" -- jindo fs -ls jindo:/// 2>/dev/null)

    if [[ -n "$output" ]]; then
      echo "$output" | sed 's/^/   /'
    else
      echo "⚠️  未挂载或无返回"
    fi

    echo "---------------------------------------"
  done
done
  • 修复命名空间下异常的全部 jindo 实例的挂载点,需要重启业务应用的 Pod 才能生效。
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
NAMESPACE="atms-model-cache"

datasets=$(kubectl -n "$NAMESPACE" get dataset -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^jindo')

for dataset in $datasets; do
  echo "处理 Dataset: $dataset"

  oss_path=$(kubectl -n "$NAMESPACE" get dataset "$dataset" -o jsonpath='{.spec.mounts[0].mountPoint}')
  if [[ -z "$oss_path" ]]; then
    echo "未找到 mountPoint,跳过 $dataset"
    continue
  fi
  echo "OSS 路径: $oss_path"

  master_pods=$(kubectl -n "$NAMESPACE" get pod -l release="$dataset",role=jindofs-master -o name)
  if [[ -z "$master_pods" ]]; then
    echo "未找到 master pod,跳过 $dataset"
    continue
  fi

  restart_needed=false

  for pod in $master_pods; do
    pod_name=${pod##*/}
    echo "检查 master pod $pod_name 是否已挂载..."
    ls_output=$(timeout 2s kubectl -n "$NAMESPACE" exec "$pod_name" -- jindo fs -ls jindo:/// 2>/dev/null)

    if [[ -n "$ls_output" ]]; then
      echo "已挂载,跳过"
    else
      echo "执行挂载: jindocache -mount / $oss_path"
      kubectl -n "$NAMESPACE" exec "$pod_name" -- jindocache -umount /
      kubectl -n "$NAMESPACE" exec "$pod_name" -- jindocache -mount / "$oss_path"
      restart_needed=true
    fi
  done

  if [ "$restart_needed" = true ]; then
    echo "挂载完成,重启 fuse pod(release=$dataset, role=jindofs-fuse)"
    fuse_pods=$(kubectl -n "$NAMESPACE" get pod -l release="$dataset",role=jindofs-fuse -o name)
    for fpod in $fuse_pods; do
      fpod_name=${fpod##*/}
      echo "删除 pod: $fpod_name"
      kubectl -n "$NAMESPACE" delete pod "$fpod_name"
    done
  else
    echo "未执行挂载操作,跳过 fuse pod 重启"
  fi

  echo "Dataset $dataset 处理完成"
  echo "---------------------------------------"
done

6. 总结

  • JuiceFS 性能测试结果如下
测试场景写入大文件速度读取大文件速度写入小文件速度读取小文件速度
使用 Jindo 加速 OSS1520.22 MiB/s1595.94 MiB/s8.9 files/s289.1 files/s
主机上 JuiceFS + OSS2348.54 MiB/s5988.49 MiB/s867.1 files/s35705.2 files/s
Pod 上 JuiceFS + OSS754.37 MiB/s1808.45 MiB/s628.6 files/s1129.1 files/s
  • DD 性能测试结果如下
测试场景写入速度首次读取速度第二次读取速度
使用 Jindo 加速 OSS421 MB/s485 MB/s533 MB/s
主机上 JuiceFS + OSS1.7 GB/s369 MB/s6.3 GB/s
Pod 上 JuiceFS + OSS794 MB/s301 MB/s7.0 GB/s

基于以上的测试结果,在阿里云上直接使用 JindoRuntime 将 OSS 以 PVC 挂载到 Pod 中使用即可满足模型推理需求。

使用 Fluid 直接加速对象存储的方式,用于推理时模型的加载,是非常推荐的一种方式。不仅免去部署 JuiceFS 的元数据存储服务,还能够实现 PVC 与 OSS 之间的双向同步,在运维上提供了极大便利。

7. 参考


微信公众号
作者
微信公众号