Appearance
第61天:云服务部署
学习目标
- 理解云服务概述
- 掌握主流云平台对比
- 学习容器化部署
- 掌握Kubernetes部署
- 了解无服务器部署
云服务概述
什么是云服务
云服务是通过互联网提供的计算资源、存储和应用程序服务。
核心概念:
本地部署 → 云端部署 → 混合部署云服务类型:
- IaaS(基础设施即服务):AWS EC2, Google Compute Engine, Azure VM
- PaaS(平台即服务):Google App Engine, Azure App Service, Heroku
- SaaS(软件即服务):Google Workspace, Microsoft 365, Salesforce
- FaaS(函数即服务):AWS Lambda, Google Cloud Functions, Azure Functions
云服务优势
1. 可扩展性
python
class CloudScalability:
def __init__(self, cloud_provider: str):
self.cloud_provider = cloud_provider
def auto_scale(self, current_load: int,
target_load: int) -> int:
if current_load > target_load * 0.8:
return self._scale_up()
elif current_load < target_load * 0.3:
return self._scale_down()
else:
return 0
def _scale_up(self) -> int:
n_instances = self._calculate_needed_instances()
self._provision_instances(n_instances)
return n_instances
def _scale_down(self) -> int:
n_instances = self._calculate_excess_instances()
self._terminate_instances(n_instances)
return -n_instances
def _calculate_needed_instances(self) -> int:
return 2
def _calculate_excess_instances(self) -> int:
return 1
def _provision_instances(self, n: int):
print(f"Provisioning {n} instances...")
def _terminate_instances(self, n: int):
print(f"Terminating {n} instances...")2. 高可用性
python
class CloudHighAvailability:
def __init__(self, cloud_provider: str):
self.cloud_provider = cloud_provider
def setup_multi_region(self, regions: List[str]):
for region in regions:
self._deploy_to_region(region)
self._setup_load_balancer(regions)
self._setup_failover()
def _deploy_to_region(self, region: str):
print(f"Deploying to {region}...")
def _setup_load_balancer(self, regions: List[str]):
print(f"Setting up load balancer across {regions}...")
def _setup_failover(self):
print("Setting up automatic failover...")主流云平台对比
AWS
python
class AWSProvider:
def __init__(self, access_key: str, secret_key: str,
region: str = "us-east-1"):
self.access_key = access_key
self.secret_key = secret_key
self.region = region
def deploy_model(self, model_path: str,
config: Dict) -> str:
endpoint_name = self._create_endpoint(model_path, config)
return endpoint_name
def _create_endpoint(self, model_path: str,
config: Dict) -> str:
try:
import boto3
except ImportError:
raise ImportError("Install boto3: pip install boto3")
s3_client = boto3.client(
's3',
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region
)
bucket_name = config.get("bucket", "my-model-bucket")
s3_key = f"models/{os.path.basename(model_path)}"
s3_client.upload_file(model_path, bucket_name, s3_key)
sm_client = boto3.client(
'sagemaker',
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region
)
endpoint_name = f"model-endpoint-{int(time.time())}"
sm_client.create_endpoint(
EndpointName=endpoint_name,
EndpointConfigName=config.get("endpoint_config", "default")
)
return endpoint_name
def get_endpoint_info(self, endpoint_name: str) -> Dict:
try:
import boto3
except ImportError:
raise ImportError("Install boto3: pip install boto3")
sm_client = boto3.client(
'sagemaker',
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region
)
response = sm_client.describe_endpoint(
EndpointName=endpoint_name
)
return {
"endpoint_name": response["EndpointName"],
"endpoint_status": response["EndpointStatus"],
"endpoint_url": response["EndpointUrl"],
"creation_time": response["CreationTime"]
}Google Cloud
python
class GCPProvider:
def __init__(self, project_id: str,
credentials_path: str):
self.project_id = project_id
self.credentials_path = credentials_path
def deploy_model(self, model_path: str,
config: Dict) -> str:
model_name = self._upload_model(model_path, config)
endpoint_name = self._create_endpoint(model_name, config)
return endpoint_name
def _upload_model(self, model_path: str,
config: Dict) -> str:
try:
from google.cloud import aiplatform
from google.cloud import storage
except ImportError:
raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
storage_client = storage.Client.from_service_account_json(
self.credentials_path
)
bucket_name = config.get("bucket", "my-model-bucket")
bucket = storage_client.bucket(bucket_name)
model_name = f"models/{os.path.basename(model_path)}"
blob = bucket.blob(model_name)
blob.upload_from_filename(model_path)
return model_name
def _create_endpoint(self, model_name: str,
config: Dict) -> str:
try:
from google.cloud import aiplatform
except ImportError:
raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
aiplatform.init(
project=self.project_id,
location=config.get("region", "us-central1"),
credentials=self.credentials_path
)
endpoint = aiplatform.Endpoint.create(
display_name=f"model-endpoint-{int(time.time())}"
)
return endpoint.display_name
def get_endpoint_info(self, endpoint_name: str) -> Dict:
try:
from google.cloud import aiplatform
except ImportError:
raise ImportError("Install google-cloud-aiplatform: pip install google-cloud-aiplatform")
aiplatform.init(
project=self.project_id,
credentials=self.credentials_path
)
endpoint = aiplatform.Endpoint(endpoint_name)
return {
"endpoint_name": endpoint.display_name,
"endpoint_status": "SERVING",
"create_time": endpoint.create_time
}Azure
python
class AzureProvider:
def __init__(self, subscription_id: str,
resource_group: str):
self.subscription_id = subscription_id
self.resource_group = resource_group
def deploy_model(self, model_path: str,
config: Dict) -> str:
model_name = self._register_model(model_path, config)
endpoint_name = self._create_endpoint(model_name, config)
return endpoint_name
def _register_model(self, model_path: str,
config: Dict) -> str:
try:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
except ImportError:
raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
credential = DefaultAzureCredential()
ml_client = MLClient(
credential=credential,
subscription_id=self.subscription_id,
resource_group_name=self.resource_group
)
model_name = f"model-{int(time.time())}"
ml_client.models.create_or_update(
name=model_name,
path=model_path,
description=config.get("description", "")
)
return model_name
def _create_endpoint(self, model_name: str,
config: Dict) -> str:
try:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
except ImportError:
raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
credential = DefaultAzureCredential()
ml_client = MLClient(
credential=credential,
subscription_id=self.subscription_id,
resource_group_name=self.resource_group
)
endpoint_name = f"endpoint-{int(time.time())}"
ml_client.online_endpoints.begin_create_or_update(
name=endpoint_name,
description=config.get("description", "")
).result()
return endpoint_name
def get_endpoint_info(self, endpoint_name: str) -> Dict:
try:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
except ImportError:
raise ImportError("Install azure-ai-ml: pip install azure-ai-ml")
credential = DefaultAzureCredential()
ml_client = MLClient(
credential=credential,
subscription_id=self.subscription_id,
resource_group_name=self.resource_group
)
endpoint = ml_client.online_endpoints.get(endpoint_name)
return {
"endpoint_name": endpoint.name,
"endpoint_status": endpoint.provisioning_state,
"endpoint_url": endpoint.scoring_uri,
"create_time": endpoint.creation_time
}容器化部署
Docker镜像构建
python
class DockerBuilder:
def __init__(self, project_path: str):
self.project_path = project_path
def build_image(self, image_name: str,
dockerfile_path: str = "Dockerfile") -> str:
dockerfile = self._create_dockerfile(dockerfile_path)
self._write_dockerfile(dockerfile, dockerfile_path)
image_id = self._build_docker_image(image_name, dockerfile_path)
return image_id
def _create_dockerfile(self, dockerfile_path: str) -> str:
return f"""
FROM python:3.10-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["python", "server.py"]
"""
def _write_dockerfile(self, content: str,
dockerfile_path: str):
with open(os.path.join(self.project_path, dockerfile_path), 'w') as f:
f.write(content)
def _build_docker_image(self, image_name: str,
dockerfile_path: str) -> str:
import subprocess
result = subprocess.run(
["docker", "build", "-t", image_name,
"-f", dockerfile_path, "."],
cwd=self.project_path,
capture_output=True,
text=True
)
if result.returncode != 0:
raise RuntimeError(f"Docker build failed: {result.stderr}")
return image_nameDocker Compose部署
python
class DockerComposeDeployer:
def __init__(self, project_path: str):
self.project_path = project_path
def deploy(self, compose_file: str = "docker-compose.yml"):
compose_config = self._create_compose_config()
self._write_compose_file(compose_config, compose_file)
self._deploy_compose(compose_file)
def _create_compose_config(self) -> str:
return f"""
version: '3.8'
services:
model-server:
build:
context: .
dockerfile: Dockerfile
ports:
- "8000:8000"
environment:
- MODEL_PATH=models/model.pt
- DEVICE=cuda
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
"""
def _write_compose_file(self, content: str,
compose_file: str):
with open(os.path.join(self.project_path, compose_file), 'w') as f:
f.write(content)
def _deploy_compose(self, compose_file: str):
import subprocess
subprocess.run(
["docker-compose", "-f", compose_file, "up", "-d"],
cwd=self.project_path
)Kubernetes部署
K8s配置
python
class KubernetesDeployer:
def __init__(self, namespace: str = "default"):
self.namespace = namespace
def deploy(self, config: Dict):
self._create_namespace()
self._deploy_deployment(config)
self._deploy_service(config)
def _create_namespace(self):
from kubernetes import client, config
config.load_kube_config()
v1 = client.CoreV1Api()
namespace = client.V1Namespace(
metadata=client.V1ObjectMeta(name=self.namespace)
)
try:
v1.create_namespace(namespace)
except client.exceptions.ApiException as e:
if e.status != 409:
raise
def _deploy_deployment(self, config: Dict):
from kubernetes import client, config
config.load_kube_config()
apps_v1 = client.AppsV1Api()
deployment = client.V1Deployment(
metadata=client.V1ObjectMeta(
name=config.get("name", "model-server")
),
spec=client.V1DeploymentSpec(
replicas=config.get("replicas", 1),
selector=client.V1LabelSelector(
match_labels={"app": config.get("name", "model-server")}
),
template=client.V1PodTemplateSpec(
metadata=client.V1ObjectMeta(
labels={"app": config.get("name", "model-server")}
),
spec=client.V1PodSpec(
containers=[
client.V1Container(
name="model-server",
image=config.get("image", "model-server:latest"),
ports=[
client.V1ContainerPort(
container_port=config.get("port", 8000)
)
],
resources=client.V1ResourceRequirements(
requests={
"cpu": config.get("cpu_request", "500m"),
"memory": config.get("memory_request", "1Gi")
},
limits={
"cpu": config.get("cpu_limit", "2"),
"memory": config.get("memory_limit", "4Gi")
}
)
)
]
)
)
)
)
apps_v1.create_namespaced_deployment(
namespace=self.namespace,
body=deployment
)
def _deploy_service(self, config: Dict):
from kubernetes import client, config
config.load_kube_config()
v1 = client.CoreV1Api()
service = client.V1Service(
metadata=client.V1ObjectMeta(
name=config.get("name", "model-server")
),
spec=client.V1ServiceSpec(
selector={"app": config.get("name", "model-server")},
ports=[
client.V1ServicePort(
port=config.get("port", 8000),
target_port=config.get("port", 8000)
)
],
type=config.get("service_type", "LoadBalancer")
)
)
v1.create_namespaced_service(
namespace=self.namespace,
body=service
)K8s自动缩放
python
class KubernetesAutoScaler:
def __init__(self, namespace: str = "default"):
self.namespace = namespace
def setup_hpa(self, deployment_name: str, config: Dict):
from kubernetes import client, config
config.load_kube_config()
autoscaling_v2 = client.AutoscalingV2Api()
hpa = client.V2HorizontalPodAutoscaler(
metadata=client.V1ObjectMeta(
name=f"{deployment_name}-hpa"
),
spec=client.V2HorizontalPodAutoscalerSpec(
scale_target_ref=client.V2CrossVersionObjectReference(
api_version="apps/v1",
kind="Deployment",
name=deployment_name
),
min_replicas=config.get("min_replicas", 1),
max_replicas=config.get("max_replicas", 10),
metrics=[
client.V2MetricSpec(
type="Resource",
resource=client.V2ResourceMetricSource(
name="cpu",
target=client.V2MetricTarget(
type="Utilization",
average_utilization=config.get("cpu_target", 70)
)
)
)
]
)
)
autoscaling_v2.create_namespaced_horizontal_pod_autoscaler(
namespace=self.namespace,
body=hpa
)无服务器部署
AWS Lambda部署
python
class AWSLambdaDeployer:
def __init__(self, access_key: str, secret_key: str,
region: str = "us-east-1"):
self.access_key = access_key
self.secret_key = secret_key
self.region = region
def deploy_function(self, function_code: str,
config: Dict) -> str:
function_name = self._create_function(function_code, config)
return function_name
def _create_function(self, function_code: str,
config: Dict) -> str:
try:
import boto3
except ImportError:
raise ImportError("Install boto3: pip install boto3")
lambda_client = boto3.client(
'lambda',
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region
)
function_name = config.get("name", "model-inference")
lambda_client.create_function(
FunctionName=function_name,
Runtime=config.get("runtime", "python3.10"),
Role=config.get("role_arn"),
Handler=config.get("handler", "lambda_function.lambda_handler"),
Code={
'ZipFile': function_code.encode()
},
Timeout=config.get("timeout", 30),
MemorySize=config.get("memory", 512)
)
return function_name
def invoke_function(self, function_name: str,
payload: Dict) -> Dict:
try:
import boto3
except ImportError:
raise ImportError("Install boto3: pip install boto3")
lambda_client = boto3.client(
'lambda',
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region
)
response = lambda_client.invoke(
FunctionName=function_name,
InvocationType='RequestResponse',
Payload=json.dumps(payload)
)
return json.loads(response['Payload'].read())Google Cloud Functions部署
python
class GCPFunctionsDeployer:
def __init__(self, project_id: str,
credentials_path: str):
self.project_id = project_id
self.credentials_path = credentials_path
def deploy_function(self, function_code: str,
config: Dict) -> str:
function_name = self._create_function(function_code, config)
return function_name
def _create_function(self, function_code: str,
config: Dict) -> str:
try:
from google.cloud import functions_v2
from google.cloud import storage
except ImportError:
raise ImportError("Install google-cloud-functions: pip install google-cloud-functions")
storage_client = storage.Client.from_service_account_json(
self.credentials_path
)
bucket_name = config.get("bucket", "my-functions-bucket")
bucket = storage_client.bucket(bucket_name)
function_name = config.get("name", "model-inference")
blob_name = f"functions/{function_name}.zip"
blob = bucket.blob(blob_name)
blob.upload_from_filename(function_code)
functions_client = functions_v2.FunctionsServiceClient(
credentials=self.credentials_path
)
function = functions_v2.Function()
function.name = f"projects/{self.project_id}/locations/{config.get('region', 'us-central1')}/functions/{function_name}"
functions_client.create_function(
parent=f"projects/{self.project_id}/locations/{config.get('region', 'us-central1')}",
function=function
)
return function_name实践练习
练习1:构建Docker镜像
python
def build_docker_image(project_path: str, image_name: str):
import subprocess
subprocess.run(
["docker", "build", "-t", image_name, "."],
cwd=project_path
)
return image_name练习2:部署到Kubernetes
python
def deploy_to_kubernetes(deployment_name: str, image: str):
from kubernetes import client, config
config.load_kube_config()
apps_v1 = client.AppsV1Api()
deployment = client.V1Deployment(
metadata=client.V1ObjectMeta(name=deployment_name),
spec=client.V1DeploymentSpec(
replicas=1,
selector=client.V1LabelSelector(
match_labels={"app": deployment_name}
),
template=client.V1PodTemplateSpec(
metadata=client.V1ObjectMeta(
labels={"app": deployment_name}
),
spec=client.V1PodSpec(
containers=[
client.V1Container(
name="model-server",
image=image,
ports=[
client.V1ContainerPort(container_port=8000)
]
)
]
)
)
)
)
apps_v1.create_namespaced_deployment(
namespace="default",
body=deployment
)总结
本节我们学习了云服务部署:
- 云服务概述和优势
- 主流云平台对比(AWS、GCP、Azure)
- 容器化部署(Docker、Docker Compose)
- Kubernetes部署
- 无服务器部署(AWS Lambda、GCP Functions)
云服务部署是AI应用规模化的重要途径。
