第61天：云服务部署

学习目标

理解云服务概述
掌握主流云平台对比
学习容器化部署
掌握Kubernetes部署
了解无服务器部署

云服务概述

什么是云服务

云服务是通过互联网提供的计算资源、存储和应用程序服务。

核心概念：

本地部署 → 云端部署 → 混合部署

云服务类型：

IaaS（基础设施即服务）：AWS EC2, Google Compute Engine, Azure VM
PaaS（平台即服务）：Google App Engine, Azure App Service, Heroku
SaaS（软件即服务）：Google Workspace, Microsoft 365, Salesforce
FaaS（函数即服务）：AWS Lambda, Google Cloud Functions, Azure Functions

云服务优势

1. 可扩展性

python

class CloudScalability:
    def __init__(self, cloud_provider: str):
        self.cloud_provider = cloud_provider
    
    def auto_scale(self, current_load: int, 
                   target_load: int) -> int:
        if current_load > target_load * 0.8:
            return self._scale_up()
        elif current_load < target_load * 0.3:
            return self._scale_down()
        else:
            return 0
    
    def _scale_up(self) -> int:
        n_instances = self._calculate_needed_instances()
        
        self._provision_instances(n_instances)
        
        return n_instances
    
    def _scale_down(self) -> int:
        n_instances = self._calculate_excess_instances()
        
        self._terminate_instances(n_instances)
        
        return -n_instances
    
    def _calculate_needed_instances(self) -> int:
        return 2
    
    def _calculate_excess_instances(self) -> int:
        return 1
    
    def _provision_instances(self, n: int):
        print(f"Provisioning {n} instances...")
    
    def _terminate_instances(self, n: int):
        print(f"Terminating {n} instances...")

2. 高可用性

python

class CloudHighAvailability:
    def __init__(self, cloud_provider: str):
        self.cloud_provider = cloud_provider
    
    def setup_multi_region(self, regions: List[str]):
        for region in regions:
            self._deploy_to_region(region)
        
        self._setup_load_balancer(regions)
        self._setup_failover()
    
    def _deploy_to_region(self, region: str):
        print(f"Deploying to {region}...")
    
    def _setup_load_balancer(self, regions: List[str]):
        print(f"Setting up load balancer across {regions}...")
    
    def _setup_failover(self):
        print("Setting up automatic failover...")

主流云平台对比

AWS

python

class AWSProvider:
    def __init__(self, access_key: str, secret_key: str, 
                 region: str = "us-east-1"):
        self.access_key = access_key
        self.secret_key = secret_key
        self.region = region
    
    def deploy_model(self, model_path: str, 
                     config: Dict) -> str:
        endpoint_name = self._create_endpoint(model_path, config)
        
        return endpoint_name
    
    def _create_endpoint(self, model_path: str, 
                          config: Dict) -> str:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        s3_client = boto3.client(
            's3',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        bucket_name = config.get("bucket", "my-model-bucket")
        s3_key = f"models/{os.path.basename(model_path)}"
        
        s3_client.upload_file(model_path, bucket_name, s3_key)
        
        sm_client = boto3.client(
            'sagemaker',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        endpoint_name = f"model-endpoint-{int(time.time())}"
        
        sm_client.create_endpoint(
            EndpointName=endpoint_name,
            EndpointConfigName=config.get("endpoint_config", "default")
        )
        
        return endpoint_name
    
    def get_endpoint_info(self, endpoint_name: str) -> Dict:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        sm_client = boto3.client(
            'sagemaker',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        response = sm_client.describe_endpoint(
            EndpointName=endpoint_name
        )
        
        return {
            "endpoint_name": response["EndpointName"],
            "endpoint_status": response["EndpointStatus"],
            "endpoint_url": response["EndpointUrl"],
            "creation_time": response["CreationTime"]
        }

Google Cloud

python

class GCPProvider:
    def __init__(self, project_id: str, 
                 credentials_path: str):
        self.project_id = project_id
        self.credentials_path = credentials_path
    
    def deploy_model(self, model_path: str, 
                     config: Dict) -> str:
        model_name = self._upload_model(model_path, config)
        endpoint_name = self._create_endpoint(model_name, config)
        
        return endpoint_name
    
    def _upload_model(self, model_path: str, 
                       config: Dict) -> str:
        try:
            from google.cloud import aiplatform
            from google.cloud import storage
        except ImportError:
            raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
        
        storage_client = storage.Client.from_service_account_json(
            self.credentials_path
        )
        
        bucket_name = config.get("bucket", "my-model-bucket")
        bucket = storage_client.bucket(bucket_name)
        
        model_name = f"models/{os.path.basename(model_path)}"
        blob = bucket.blob(model_name)
        
        blob.upload_from_filename(model_path)
        
        return model_name
    
    def _create_endpoint(self, model_name: str, 
                          config: Dict) -> str:
        try:
            from google.cloud import aiplatform
        except ImportError:
            raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
        
        aiplatform.init(
            project=self.project_id,
            location=config.get("region", "us-central1"),
            credentials=self.credentials_path
        )
        
        endpoint = aiplatform.Endpoint.create(
            display_name=f"model-endpoint-{int(time.time())}"
        )
        
        return endpoint.display_name
    
    def get_endpoint_info(self, endpoint_name: str) -> Dict:
        try:
            from google.cloud import aiplatform
        except ImportError:
            raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
        
        aiplatform.init(
            project=self.project_id,
            credentials=self.credentials_path
        )
        
        endpoint = aiplatform.Endpoint(endpoint_name)
        
        return {
            "endpoint_name": endpoint.display_name,
            "endpoint_status": "SERVING",
            "create_time": endpoint.create_time
        }

Azure

python

class AzureProvider:
    def __init__(self, subscription_id: str, 
                 resource_group: str):
        self.subscription_id = subscription_id
        self.resource_group = resource_group
    
    def deploy_model(self, model_path: str, 
                     config: Dict) -> str:
        model_name = self._register_model(model_path, config)
        endpoint_name = self._create_endpoint(model_name, config)
        
        return endpoint_name
    
    def _register_model(self, model_path: str, 
                         config: Dict) -> str:
        try:
            from azure.ai.ml import MLClient
            from azure.identity import DefaultAzureCredential
        except ImportError:
            raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
        
        credential = DefaultAzureCredential()
        
        ml_client = MLClient(
            credential=credential,
            subscription_id=self.subscription_id,
            resource_group_name=self.resource_group
        )
        
        model_name = f"model-{int(time.time())}"
        
        ml_client.models.create_or_update(
            name=model_name,
            path=model_path,
            description=config.get("description", "")
        )
        
        return model_name
    
    def _create_endpoint(self, model_name: str, 
                          config: Dict) -> str:
        try:
            from azure.ai.ml import MLClient
            from azure.identity import DefaultAzureCredential
        except ImportError:
            raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
        
        credential = DefaultAzureCredential()
        
        ml_client = MLClient(
            credential=credential,
            subscription_id=self.subscription_id,
            resource_group_name=self.resource_group
        )
        
        endpoint_name = f"endpoint-{int(time.time())}"
        
        ml_client.online_endpoints.begin_create_or_update(
            name=endpoint_name,
            description=config.get("description", "")
        ).result()
        
        return endpoint_name
    
    def get_endpoint_info(self, endpoint_name: str) -> Dict:
        try:
            from azure.ai.ml import MLClient
            from azure.identity import DefaultAzureCredential
        except ImportError:
            raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
        
        credential = DefaultAzureCredential()
        
        ml_client = MLClient(
            credential=credential,
            subscription_id=self.subscription_id,
            resource_group_name=self.resource_group
        )
        
        endpoint = ml_client.online_endpoints.get(endpoint_name)
        
        return {
            "endpoint_name": endpoint.name,
            "endpoint_status": endpoint.provisioning_state,
            "endpoint_url": endpoint.scoring_uri,
            "create_time": endpoint.creation_time
        }

容器化部署

Docker镜像构建

python

class DockerBuilder:
    def __init__(self, project_path: str):
        self.project_path = project_path
    
    def build_image(self, image_name: str, 
                    dockerfile_path: str = "Dockerfile") -> str:
        dockerfile = self._create_dockerfile(dockerfile_path)
        
        self._write_dockerfile(dockerfile, dockerfile_path)
        
        image_id = self._build_docker_image(image_name, dockerfile_path)
        
        return image_id
    
    def _create_dockerfile(self, dockerfile_path: str) -> str:
        return f"""
FROM python:3.10-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 8000

CMD ["python", "server.py"]
"""
    
    def _write_dockerfile(self, content: str, 
                           dockerfile_path: str):
        with open(os.path.join(self.project_path, dockerfile_path), 'w') as f:
            f.write(content)
    
    def _build_docker_image(self, image_name: str, 
                             dockerfile_path: str) -> str:
        import subprocess
        
        result = subprocess.run(
            ["docker", "build", "-t", image_name, 
             "-f", dockerfile_path, "."],
            cwd=self.project_path,
            capture_output=True,
            text=True
        )
        
        if result.returncode != 0:
            raise RuntimeError(f"Docker build failed: {result.stderr}")
        
        return image_name

Docker Compose部署

python

class DockerComposeDeployer:
    def __init__(self, project_path: str):
        self.project_path = project_path
    
    def deploy(self, compose_file: str = "docker-compose.yml"):
        compose_config = self._create_compose_config()
        
        self._write_compose_file(compose_config, compose_file)
        
        self._deploy_compose(compose_file)
    
    def _create_compose_config(self) -> str:
        return f"""
version: '3.8'

services:
  model-server:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "8000:8000"
    environment:
      - MODEL_PATH=models/model.pt
      - DEVICE=cuda
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
"""
    
    def _write_compose_file(self, content: str, 
                             compose_file: str):
        with open(os.path.join(self.project_path, compose_file), 'w') as f:
            f.write(content)
    
    def _deploy_compose(self, compose_file: str):
        import subprocess
        
        subprocess.run(
            ["docker-compose", "-f", compose_file, "up", "-d"],
            cwd=self.project_path
        )

Kubernetes部署

K8s配置

python

class KubernetesDeployer:
    def __init__(self, namespace: str = "default"):
        self.namespace = namespace
    
    def deploy(self, config: Dict):
        self._create_namespace()
        self._deploy_deployment(config)
        self._deploy_service(config)
    
    def _create_namespace(self):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        v1 = client.CoreV1Api()
        
        namespace = client.V1Namespace(
            metadata=client.V1ObjectMeta(name=self.namespace)
        )
        
        try:
            v1.create_namespace(namespace)
        except client.exceptions.ApiException as e:
            if e.status != 409:
                raise
    
    def _deploy_deployment(self, config: Dict):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        apps_v1 = client.AppsV1Api()
        
        deployment = client.V1Deployment(
            metadata=client.V1ObjectMeta(
                name=config.get("name", "model-server")
            ),
            spec=client.V1DeploymentSpec(
                replicas=config.get("replicas", 1),
                selector=client.V1LabelSelector(
                    match_labels={"app": config.get("name", "model-server")}
                ),
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(
                        labels={"app": config.get("name", "model-server")}
                    ),
                    spec=client.V1PodSpec(
                        containers=[
                            client.V1Container(
                                name="model-server",
                                image=config.get("image", "model-server:latest"),
                                ports=[
                                    client.V1ContainerPort(
                                        container_port=config.get("port", 8000)
                                    )
                                ],
                                resources=client.V1ResourceRequirements(
                                    requests={
                                        "cpu": config.get("cpu_request", "500m"),
                                        "memory": config.get("memory_request", "1Gi")
                                    },
                                    limits={
                                        "cpu": config.get("cpu_limit", "2"),
                                        "memory": config.get("memory_limit", "4Gi")
                                    }
                                )
                            )
                        ]
                    )
                )
            )
        )
        
        apps_v1.create_namespaced_deployment(
            namespace=self.namespace,
            body=deployment
        )
    
    def _deploy_service(self, config: Dict):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        v1 = client.CoreV1Api()
        
        service = client.V1Service(
            metadata=client.V1ObjectMeta(
                name=config.get("name", "model-server")
            ),
            spec=client.V1ServiceSpec(
                selector={"app": config.get("name", "model-server")},
                ports=[
                    client.V1ServicePort(
                        port=config.get("port", 8000),
                        target_port=config.get("port", 8000)
                    )
                ],
                type=config.get("service_type", "LoadBalancer")
            )
        )
        
        v1.create_namespaced_service(
            namespace=self.namespace,
            body=service
        )

K8s自动缩放

python

class KubernetesAutoScaler:
    def __init__(self, namespace: str = "default"):
        self.namespace = namespace
    
    def setup_hpa(self, deployment_name: str, config: Dict):
        from kubernetes import client, config
        
        config.load_kube_config()
        
        autoscaling_v2 = client.AutoscalingV2Api()
        
        hpa = client.V2HorizontalPodAutoscaler(
            metadata=client.V1ObjectMeta(
                name=f"{deployment_name}-hpa"
            ),
            spec=client.V2HorizontalPodAutoscalerSpec(
                scale_target_ref=client.V2CrossVersionObjectReference(
                    api_version="apps/v1",
                    kind="Deployment",
                    name=deployment_name
                ),
                min_replicas=config.get("min_replicas", 1),
                max_replicas=config.get("max_replicas", 10),
                metrics=[
                    client.V2MetricSpec(
                        type="Resource",
                        resource=client.V2ResourceMetricSource(
                            name="cpu",
                            target=client.V2MetricTarget(
                                type="Utilization",
                                average_utilization=config.get("cpu_target", 70)
                            )
                        )
                    )
                ]
            )
        )
        
        autoscaling_v2.create_namespaced_horizontal_pod_autoscaler(
            namespace=self.namespace,
            body=hpa
        )

无服务器部署

AWS Lambda部署

python

class AWSLambdaDeployer:
    def __init__(self, access_key: str, secret_key: str, 
                 region: str = "us-east-1"):
        self.access_key = access_key
        self.secret_key = secret_key
        self.region = region
    
    def deploy_function(self, function_code: str, 
                         config: Dict) -> str:
        function_name = self._create_function(function_code, config)
        
        return function_name
    
    def _create_function(self, function_code: str, 
                          config: Dict) -> str:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        lambda_client = boto3.client(
            'lambda',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        function_name = config.get("name", "model-inference")
        
        lambda_client.create_function(
            FunctionName=function_name,
            Runtime=config.get("runtime", "python3.10"),
            Role=config.get("role_arn"),
            Handler=config.get("handler", "lambda_function.lambda_handler"),
            Code={
                'ZipFile': function_code.encode()
            },
            Timeout=config.get("timeout", 30),
            MemorySize=config.get("memory", 512)
        )
        
        return function_name
    
    def invoke_function(self, function_name: str, 
                         payload: Dict) -> Dict:
        try:
            import boto3
        except ImportError:
            raise ImportError("Install boto3: pip install boto3")
        
        lambda_client = boto3.client(
            'lambda',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            region_name=self.region
        )
        
        response = lambda_client.invoke(
            FunctionName=function_name,
            InvocationType='RequestResponse',
            Payload=json.dumps(payload)
        )
        
        return json.loads(response['Payload'].read())

Google Cloud Functions部署

python

class GCPFunctionsDeployer:
    def __init__(self, project_id: str, 
                 credentials_path: str):
        self.project_id = project_id
        self.credentials_path = credentials_path
    
    def deploy_function(self, function_code: str, 
                         config: Dict) -> str:
        function_name = self._create_function(function_code, config)
        
        return function_name
    
    def _create_function(self, function_code: str, 
                          config: Dict) -> str:
        try:
            from google.cloud import functions_v2
            from google.cloud import storage
        except ImportError:
            raise ImportError("Install google-cloud-functions: pip install google-cloud-functions")
        
        storage_client = storage.Client.from_service_account_json(
            self.credentials_path
        )
        
        bucket_name = config.get("bucket", "my-functions-bucket")
        bucket = storage_client.bucket(bucket_name)
        
        function_name = config.get("name", "model-inference")
        blob_name = f"functions/{function_name}.zip"
        blob = bucket.blob(blob_name)
        
        blob.upload_from_filename(function_code)
        
        functions_client = functions_v2.FunctionsServiceClient(
            credentials=self.credentials_path
        )
        
        function = functions_v2.Function()
        function.name = f"projects/{self.project_id}/locations/{config.get('region', 'us-central1')}/functions/{function_name}"
        
        functions_client.create_function(
            parent=f"projects/{self.project_id}/locations/{config.get('region', 'us-central1')}",
            function=function
        )
        
        return function_name

实践练习

练习1：构建Docker镜像

python

def build_docker_image(project_path: str, image_name: str):
    import subprocess
    
    subprocess.run(
        ["docker", "build", "-t", image_name, "."],
        cwd=project_path
    )
    
    return image_name

练习2：部署到Kubernetes

python

def deploy_to_kubernetes(deployment_name: str, image: str):
    from kubernetes import client, config
    
    config.load_kube_config()
    
    apps_v1 = client.AppsV1Api()
    
    deployment = client.V1Deployment(
        metadata=client.V1ObjectMeta(name=deployment_name),
        spec=client.V1DeploymentSpec(
            replicas=1,
            selector=client.V1LabelSelector(
                match_labels={"app": deployment_name}
            ),
            template=client.V1PodTemplateSpec(
                metadata=client.V1ObjectMeta(
                    labels={"app": deployment_name}
                ),
                spec=client.V1PodSpec(
                    containers=[
                        client.V1Container(
                            name="model-server",
                            image=image,
                            ports=[
                                client.V1ContainerPort(container_port=8000)
                            ]
                        )
                    ]
                )
            )
        )
    )
    
    apps_v1.create_namespaced_deployment(
        namespace="default",
        body=deployment
    )

总结

本节我们学习了云服务部署：

云服务概述和优势
主流云平台对比（AWS、GCP、Azure）
容器化部署（Docker、Docker Compose）
Kubernetes部署
无服务器部署（AWS Lambda、GCP Functions）

云服务部署是AI应用规模化的重要途径。

第61天：云服务部署 ​

学习目标 ​

云服务概述 ​

什么是云服务 ​

云服务优势 ​

主流云平台对比 ​

AWS ​

Google Cloud ​

Azure ​

容器化部署 ​

Docker镜像构建 ​

Docker Compose部署 ​

Kubernetes部署 ​

K8s配置 ​

K8s自动缩放 ​

无服务器部署 ​

AWS Lambda部署 ​

Google Cloud Functions部署 ​

实践练习 ​

练习1：构建Docker镜像 ​

练习2：部署到Kubernetes ​

总结 ​

参考资源 ​

第61天：云服务部署

学习目标

云服务概述

什么是云服务

云服务优势

主流云平台对比

AWS

Google Cloud

Azure

容器化部署

Docker镜像构建

Docker Compose部署

Kubernetes部署

K8s配置

K8s自动缩放

无服务器部署

AWS Lambda部署

Google Cloud Functions部署

实践练习

练习1：构建Docker镜像

练习2：部署到Kubernetes

总结

参考资源