Skip to content

第61天:云服务部署

学习目标

  • 理解云服务概述
  • 掌握主流云平台对比
  • 学习容器化部署
  • 掌握Kubernetes部署
  • 了解无服务器部署

云服务概述

什么是云服务

云服务是通过互联网提供的计算资源、存储和应用程序服务。

核心概念

本地部署 → 云端部署 → 混合部署

云服务类型

  1. IaaS(基础设施即服务):AWS EC2, Google Compute Engine, Azure VM
  2. PaaS(平台即服务):Google App Engine, Azure App Service, Heroku
  3. SaaS(软件即服务):Google Workspace, Microsoft 365, Salesforce
  4. FaaS(函数即服务):AWS Lambda, Google Cloud Functions, Azure Functions

云服务优势

1. 可扩展性

python
class CloudScalability:
    def __init__(self, cloud_provider: str):
        self.cloud_provider = cloud_provider
    
    def auto_scale(self, current_load: int, 
                   target_load: int) -> int:
        if current_load > target_load * 0.8:
            return self._scale_up()
        elif current_load < target_load * 0.3:
            return self._scale_down()
        else:
            return 0
    
    def _scale_up(self) -> int:
        n_instances = self._calculate_needed_instances()
        
        self._provision_instances(n_instances)
        
        return n_instances
    
    def _scale_down(self) -> int:
        n_instances = self._calculate_excess_instances()
        
        self._terminate_instances(n_instances)
        
        return -n_instances
    
    def _calculate_needed_instances(self) -> int:
        return 2
    
    def _calculate_excess_instances(self) -> int:
        return 1
    
    def _provision_instances(self, n: int):
        print(f"Provisioning {n} instances...")
    
    def _terminate_instances(self, n: int):
        print(f"Terminating {n} instances...")

2. 高可用性

python
class CloudHighAvailability:
    def __init__(self, cloud_provider: str):
        self.cloud_provider = cloud_provider
    
    def setup_multi_region(self, regions: List[str]):
        for region in regions:
            self._deploy_to_region(region)
        
        self._setup_load_balancer(regions)
        self._setup_failover()
    
    def _deploy_to_region(self, region: str):
        print(f"Deploying to {region}...")
    
    def _setup_load_balancer(self, regions: List[str]):
        print(f"Setting up load balancer across {regions}...")
    
    def _setup_failover(self):
        print("Setting up automatic failover...")

主流云平台对比

AWS

python
class AWSProvider:
    def __init__(self, access_key: str, secret_key: str, 
                 region: str = "us-east-1"):
        self.access_key = access_key
        self.secret_key = secret_key
        self.region = region
    
    def deploy_model(self, model_path: str, 
                     config: Dict) -> str:
        endpoint_name = self._create_endpoint(model_path, config)
        
        return endpoint_name
    
    def _create_endpoint(self, model_path: str, 
                          config: Dict) -> str:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        s3_client = boto3.client(
            's3',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        bucket_name = config.get("bucket", "my-model-bucket")
        s3_key = f"models/{os.path.basename(model_path)}"
        
        s3_client.upload_file(model_path, bucket_name, s3_key)
        
        sm_client = boto3.client(
            'sagemaker',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        endpoint_name = f"model-endpoint-{int(time.time())}"
        
        sm_client.create_endpoint(
            EndpointName=endpoint_name,
            EndpointConfigName=config.get("endpoint_config", "default")
        )
        
        return endpoint_name
    
    def get_endpoint_info(self, endpoint_name: str) -> Dict:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        sm_client = boto3.client(
            'sagemaker',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        response = sm_client.describe_endpoint(
            EndpointName=endpoint_name
        )
        
        return {
            "endpoint_name": response["EndpointName"],
            "endpoint_status": response["EndpointStatus"],
            "endpoint_url": response["EndpointUrl"],
            "creation_time": response["CreationTime"]
        }

Google Cloud

python
class GCPProvider:
    def __init__(self, project_id: str, 
                 credentials_path: str):
        self.project_id = project_id
        self.credentials_path = credentials_path
    
    def deploy_model(self, model_path: str, 
                     config: Dict) -> str:
        model_name = self._upload_model(model_path, config)
        endpoint_name = self._create_endpoint(model_name, config)
        
        return endpoint_name
    
    def _upload_model(self, model_path: str, 
                       config: Dict) -> str:
        try:
            from google.cloud import aiplatform
            from google.cloud import storage
        except ImportError:
            raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
        
        storage_client = storage.Client.from_service_account_json(
            self.credentials_path
        )
        
        bucket_name = config.get("bucket", "my-model-bucket")
        bucket = storage_client.bucket(bucket_name)
        
        model_name = f"models/{os.path.basename(model_path)}"
        blob = bucket.blob(model_name)
        
        blob.upload_from_filename(model_path)
        
        return model_name
    
    def _create_endpoint(self, model_name: str, 
                          config: Dict) -> str:
        try:
            from google.cloud import aiplatform
        except ImportError:
            raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
        
        aiplatform.init(
            project=self.project_id,
            location=config.get("region", "us-central1"),
            credentials=self.credentials_path
        )
        
        endpoint = aiplatform.Endpoint.create(
            display_name=f"model-endpoint-{int(time.time())}"
        )
        
        return endpoint.display_name
    
    def get_endpoint_info(self, endpoint_name: str) -> Dict:
        try:
            from google.cloud import aiplatform
        except ImportError:
            raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
        
        aiplatform.init(
            project=self.project_id,
            credentials=self.credentials_path
        )
        
        endpoint = aiplatform.Endpoint(endpoint_name)
        
        return {
            "endpoint_name": endpoint.display_name,
            "endpoint_status": "SERVING",
            "create_time": endpoint.create_time
        }

Azure

python
class AzureProvider:
    def __init__(self, subscription_id: str, 
                 resource_group: str):
        self.subscription_id = subscription_id
        self.resource_group = resource_group
    
    def deploy_model(self, model_path: str, 
                     config: Dict) -> str:
        model_name = self._register_model(model_path, config)
        endpoint_name = self._create_endpoint(model_name, config)
        
        return endpoint_name
    
    def _register_model(self, model_path: str, 
                         config: Dict) -> str:
        try:
            from azure.ai.ml import MLClient
            from azure.identity import DefaultAzureCredential
        except ImportError:
            raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
        
        credential = DefaultAzureCredential()
        
        ml_client = MLClient(
            credential=credential,
            subscription_id=self.subscription_id,
            resource_group_name=self.resource_group
        )
        
        model_name = f"model-{int(time.time())}"
        
        ml_client.models.create_or_update(
            name=model_name,
            path=model_path,
            description=config.get("description", "")
        )
        
        return model_name
    
    def _create_endpoint(self, model_name: str, 
                          config: Dict) -> str:
        try:
            from azure.ai.ml import MLClient
            from azure.identity import DefaultAzureCredential
        except ImportError:
            raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
        
        credential = DefaultAzureCredential()
        
        ml_client = MLClient(
            credential=credential,
            subscription_id=self.subscription_id,
            resource_group_name=self.resource_group
        )
        
        endpoint_name = f"endpoint-{int(time.time())}"
        
        ml_client.online_endpoints.begin_create_or_update(
            name=endpoint_name,
            description=config.get("description", "")
        ).result()
        
        return endpoint_name
    
    def get_endpoint_info(self, endpoint_name: str) -> Dict:
        try:
            from azure.ai.ml import MLClient
            from azure.identity import DefaultAzureCredential
        except ImportError:
            raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
        
        credential = DefaultAzureCredential()
        
        ml_client = MLClient(
            credential=credential,
            subscription_id=self.subscription_id,
            resource_group_name=self.resource_group
        )
        
        endpoint = ml_client.online_endpoints.get(endpoint_name)
        
        return {
            "endpoint_name": endpoint.name,
            "endpoint_status": endpoint.provisioning_state,
            "endpoint_url": endpoint.scoring_uri,
            "create_time": endpoint.creation_time
        }

容器化部署

Docker镜像构建

python
class DockerBuilder:
    def __init__(self, project_path: str):
        self.project_path = project_path
    
    def build_image(self, image_name: str, 
                    dockerfile_path: str = "Dockerfile") -> str:
        dockerfile = self._create_dockerfile(dockerfile_path)
        
        self._write_dockerfile(dockerfile, dockerfile_path)
        
        image_id = self._build_docker_image(image_name, dockerfile_path)
        
        return image_id
    
    def _create_dockerfile(self, dockerfile_path: str) -> str:
        return f"""
FROM python:3.10-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 8000

CMD ["python", "server.py"]
"""
    
    def _write_dockerfile(self, content: str, 
                           dockerfile_path: str):
        with open(os.path.join(self.project_path, dockerfile_path), 'w') as f:
            f.write(content)
    
    def _build_docker_image(self, image_name: str, 
                             dockerfile_path: str) -> str:
        import subprocess
        
        result = subprocess.run(
            ["docker", "build", "-t", image_name, 
             "-f", dockerfile_path, "."],
            cwd=self.project_path,
            capture_output=True,
            text=True
        )
        
        if result.returncode != 0:
            raise RuntimeError(f"Docker build failed: {result.stderr}")
        
        return image_name

Docker Compose部署

python
class DockerComposeDeployer:
    def __init__(self, project_path: str):
        self.project_path = project_path
    
    def deploy(self, compose_file: str = "docker-compose.yml"):
        compose_config = self._create_compose_config()
        
        self._write_compose_file(compose_config, compose_file)
        
        self._deploy_compose(compose_file)
    
    def _create_compose_config(self) -> str:
        return f"""
version: '3.8'

services:
  model-server:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "8000:8000"
    environment:
      - MODEL_PATH=models/model.pt
      - DEVICE=cuda
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
"""
    
    def _write_compose_file(self, content: str, 
                             compose_file: str):
        with open(os.path.join(self.project_path, compose_file), 'w') as f:
            f.write(content)
    
    def _deploy_compose(self, compose_file: str):
        import subprocess
        
        subprocess.run(
            ["docker-compose", "-f", compose_file, "up", "-d"],
            cwd=self.project_path
        )

Kubernetes部署

K8s配置

python
class KubernetesDeployer:
    def __init__(self, namespace: str = "default"):
        self.namespace = namespace
    
    def deploy(self, config: Dict):
        self._create_namespace()
        self._deploy_deployment(config)
        self._deploy_service(config)
    
    def _create_namespace(self):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        v1 = client.CoreV1Api()
        
        namespace = client.V1Namespace(
            metadata=client.V1ObjectMeta(name=self.namespace)
        )
        
        try:
            v1.create_namespace(namespace)
        except client.exceptions.ApiException as e:
            if e.status != 409:
                raise
    
    def _deploy_deployment(self, config: Dict):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        apps_v1 = client.AppsV1Api()
        
        deployment = client.V1Deployment(
            metadata=client.V1ObjectMeta(
                name=config.get("name", "model-server")
            ),
            spec=client.V1DeploymentSpec(
                replicas=config.get("replicas", 1),
                selector=client.V1LabelSelector(
                    match_labels={"app": config.get("name", "model-server")}
                ),
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(
                        labels={"app": config.get("name", "model-server")}
                    ),
                    spec=client.V1PodSpec(
                        containers=[
                            client.V1Container(
                                name="model-server",
                                image=config.get("image", "model-server:latest"),
                                ports=[
                                    client.V1ContainerPort(
                                        container_port=config.get("port", 8000)
                                    )
                                ],
                                resources=client.V1ResourceRequirements(
                                    requests={
                                        "cpu": config.get("cpu_request", "500m"),
                                        "memory": config.get("memory_request", "1Gi")
                                    },
                                    limits={
                                        "cpu": config.get("cpu_limit", "2"),
                                        "memory": config.get("memory_limit", "4Gi")
                                    }
                                )
                            )
                        ]
                    )
                )
            )
        )
        
        apps_v1.create_namespaced_deployment(
            namespace=self.namespace,
            body=deployment
        )
    
    def _deploy_service(self, config: Dict):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        v1 = client.CoreV1Api()
        
        service = client.V1Service(
            metadata=client.V1ObjectMeta(
                name=config.get("name", "model-server")
            ),
            spec=client.V1ServiceSpec(
                selector={"app": config.get("name", "model-server")},
                ports=[
                    client.V1ServicePort(
                        port=config.get("port", 8000),
                        target_port=config.get("port", 8000)
                    )
                ],
                type=config.get("service_type", "LoadBalancer")
            )
        )
        
        v1.create_namespaced_service(
            namespace=self.namespace,
            body=service
        )

K8s自动缩放

python
class KubernetesAutoScaler:
    def __init__(self, namespace: str = "default"):
        self.namespace = namespace
    
    def setup_hpa(self, deployment_name: str, config: Dict):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        autoscaling_v2 = client.AutoscalingV2Api()
        
        hpa = client.V2HorizontalPodAutoscaler(
            metadata=client.V1ObjectMeta(
                name=f"{deployment_name}-hpa"
            ),
            spec=client.V2HorizontalPodAutoscalerSpec(
                scale_target_ref=client.V2CrossVersionObjectReference(
                    api_version="apps/v1",
                    kind="Deployment",
                    name=deployment_name
                ),
                min_replicas=config.get("min_replicas", 1),
                max_replicas=config.get("max_replicas", 10),
                metrics=[
                    client.V2MetricSpec(
                        type="Resource",
                        resource=client.V2ResourceMetricSource(
                            name="cpu",
                            target=client.V2MetricTarget(
                                type="Utilization",
                                average_utilization=config.get("cpu_target", 70)
                            )
                        )
                    )
                ]
            )
        )
        
        autoscaling_v2.create_namespaced_horizontal_pod_autoscaler(
            namespace=self.namespace,
            body=hpa
        )

无服务器部署

AWS Lambda部署

python
class AWSLambdaDeployer:
    def __init__(self, access_key: str, secret_key: str, 
                 region: str = "us-east-1"):
        self.access_key = access_key
        self.secret_key = secret_key
        self.region = region
    
    def deploy_function(self, function_code: str, 
                         config: Dict) -> str:
        function_name = self._create_function(function_code, config)
        
        return function_name
    
    def _create_function(self, function_code: str, 
                          config: Dict) -> str:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        lambda_client = boto3.client(
            'lambda',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        function_name = config.get("name", "model-inference")
        
        lambda_client.create_function(
            FunctionName=function_name,
            Runtime=config.get("runtime", "python3.10"),
            Role=config.get("role_arn"),
            Handler=config.get("handler", "lambda_function.lambda_handler"),
            Code={
                'ZipFile': function_code.encode()
            },
            Timeout=config.get("timeout", 30),
            MemorySize=config.get("memory", 512)
        )
        
        return function_name
    
    def invoke_function(self, function_name: str, 
                         payload: Dict) -> Dict:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        lambda_client = boto3.client(
            'lambda',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        response = lambda_client.invoke(
            FunctionName=function_name,
            InvocationType='RequestResponse',
            Payload=json.dumps(payload)
        )
        
        return json.loads(response['Payload'].read())

Google Cloud Functions部署

python
class GCPFunctionsDeployer:
    def __init__(self, project_id: str, 
                 credentials_path: str):
        self.project_id = project_id
        self.credentials_path = credentials_path
    
    def deploy_function(self, function_code: str, 
                         config: Dict) -> str:
        function_name = self._create_function(function_code, config)
        
        return function_name
    
    def _create_function(self, function_code: str, 
                          config: Dict) -> str:
        try:
            from google.cloud import functions_v2
            from google.cloud import storage
        except ImportError:
            raise ImportError("Install google-cloud-functions: pip install google-cloud-functions")
        
        storage_client = storage.Client.from_service_account_json(
            self.credentials_path
        )
        
        bucket_name = config.get("bucket", "my-functions-bucket")
        bucket = storage_client.bucket(bucket_name)
        
        function_name = config.get("name", "model-inference")
        blob_name = f"functions/{function_name}.zip"
        blob = bucket.blob(blob_name)
        
        blob.upload_from_filename(function_code)
        
        functions_client = functions_v2.FunctionsServiceClient(
            credentials=self.credentials_path
        )
        
        function = functions_v2.Function()
        function.name = f"projects/{self.project_id}/locations/{config.get('region', 'us-central1')}/functions/{function_name}"
        
        functions_client.create_function(
            parent=f"projects/{self.project_id}/locations/{config.get('region', 'us-central1')}",
            function=function
        )
        
        return function_name

实践练习

练习1:构建Docker镜像

python
def build_docker_image(project_path: str, image_name: str):
    import subprocess
    
    subprocess.run(
        ["docker", "build", "-t", image_name, "."],
        cwd=project_path
    )
    
    return image_name

练习2:部署到Kubernetes

python
def deploy_to_kubernetes(deployment_name: str, image: str):
    from kubernetes import client, config
    
    config.load_kube_config()
    
    apps_v1 = client.AppsV1Api()
    
    deployment = client.V1Deployment(
        metadata=client.V1ObjectMeta(name=deployment_name),
        spec=client.V1DeploymentSpec(
            replicas=1,
            selector=client.V1LabelSelector(
                match_labels={"app": deployment_name}
            ),
            template=client.V1PodTemplateSpec(
                metadata=client.V1ObjectMeta(
                    labels={"app": deployment_name}
                ),
                spec=client.V1PodSpec(
                    containers=[
                        client.V1Container(
                            name="model-server",
                            image=image,
                            ports=[
                                client.V1ContainerPort(container_port=8000)
                            ]
                        )
                    ]
                )
            )
        )
    )
    
    apps_v1.create_namespaced_deployment(
        namespace="default",
        body=deployment
    )

总结

本节我们学习了云服务部署:

  1. 云服务概述和优势
  2. 主流云平台对比(AWS、GCP、Azure)
  3. 容器化部署(Docker、Docker Compose)
  4. Kubernetes部署
  5. 无服务器部署(AWS Lambda、GCP Functions)

云服务部署是AI应用规模化的重要途径。

参考资源